diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,175041 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.010031634190783, + "eval_steps": 500, + "global_step": 250000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 4.040126536763131e-05, + "grad_norm": 67829032.0, + "learning_rate": 1.6e-08, + "loss": 968110.7, + "step": 10 + }, + { + "epoch": 8.080253073526263e-05, + "grad_norm": 16803730.0, + "learning_rate": 3.2e-08, + "loss": 1196399.9, + "step": 20 + }, + { + "epoch": 0.00012120379610289395, + "grad_norm": 22838876.0, + "learning_rate": 4.8e-08, + "loss": 1056602.5, + "step": 30 + }, + { + "epoch": 0.00016160506147052525, + "grad_norm": 73843272.0, + "learning_rate": 6.4e-08, + "loss": 1141494.8, + "step": 40 + }, + { + "epoch": 0.00020200632683815657, + "grad_norm": 54545196.0, + "learning_rate": 8e-08, + "loss": 957907.8, + "step": 50 + }, + { + "epoch": 0.0002424075922057879, + "grad_norm": 92213856.0, + "learning_rate": 9.6e-08, + "loss": 848764.8, + "step": 60 + }, + { + "epoch": 0.0002828088575734192, + "grad_norm": 31028078.0, + "learning_rate": 1.1200000000000001e-07, + "loss": 1170701.2, + "step": 70 + }, + { + "epoch": 0.0003232101229410505, + "grad_norm": 163952224.0, + "learning_rate": 1.28e-07, + "loss": 1103386.8, + "step": 80 + }, + { + "epoch": 0.0003636113883086818, + "grad_norm": 18770234.0, + "learning_rate": 1.4400000000000002e-07, + "loss": 843690.0, + "step": 90 + }, + { + "epoch": 0.00040401265367631315, + "grad_norm": 23307910.0, + "learning_rate": 1.6e-07, + "loss": 510580.9, + "step": 100 + }, + { + "epoch": 0.00044441391904394446, + "grad_norm": 20383214.0, + "learning_rate": 1.7600000000000001e-07, + "loss": 836211.4, + "step": 110 + }, + { + "epoch": 0.0004848151844115758, + "grad_norm": 6664257.5, + "learning_rate": 1.92e-07, + "loss": 538982.2, + "step": 120 + }, + { + "epoch": 0.000525216449779207, + "grad_norm": 40624088.0, + "learning_rate": 2.08e-07, + "loss": 573649.65, + "step": 130 + }, + { + "epoch": 0.0005656177151468384, + "grad_norm": 3334806.25, + "learning_rate": 2.2400000000000002e-07, + "loss": 375054.3, + "step": 140 + }, + { + "epoch": 0.0006060189805144697, + "grad_norm": 13133202.0, + "learning_rate": 2.4000000000000003e-07, + "loss": 315558.45, + "step": 150 + }, + { + "epoch": 0.000646420245882101, + "grad_norm": 11623496.0, + "learning_rate": 2.56e-07, + "loss": 291291.15, + "step": 160 + }, + { + "epoch": 0.0006868215112497323, + "grad_norm": 9045907.0, + "learning_rate": 2.72e-07, + "loss": 206214.7, + "step": 170 + }, + { + "epoch": 0.0007272227766173637, + "grad_norm": 2797081.5, + "learning_rate": 2.8800000000000004e-07, + "loss": 102995.2688, + "step": 180 + }, + { + "epoch": 0.000767624041984995, + "grad_norm": 1488353.375, + "learning_rate": 3.04e-07, + "loss": 63808.2375, + "step": 190 + }, + { + "epoch": 0.0008080253073526263, + "grad_norm": 9341711.0, + "learning_rate": 3.2e-07, + "loss": 63790.7125, + "step": 200 + }, + { + "epoch": 0.0008484265727202576, + "grad_norm": 973900.0, + "learning_rate": 3.36e-07, + "loss": 31788.6031, + "step": 210 + }, + { + "epoch": 0.0008888278380878889, + "grad_norm": 1171955.0, + "learning_rate": 3.5200000000000003e-07, + "loss": 20258.8859, + "step": 220 + }, + { + "epoch": 0.0009292291034555202, + "grad_norm": 2075967.5, + "learning_rate": 3.68e-07, + "loss": 18445.9719, + "step": 230 + }, + { + "epoch": 0.0009696303688231516, + "grad_norm": 518227.84375, + "learning_rate": 3.84e-07, + "loss": 11526.9164, + "step": 240 + }, + { + "epoch": 0.0010100316341907828, + "grad_norm": 294450.53125, + "learning_rate": 4.0000000000000003e-07, + "loss": 13908.4438, + "step": 250 + }, + { + "epoch": 0.001050432899558414, + "grad_norm": 111182.6328125, + "learning_rate": 4.16e-07, + "loss": 5702.7922, + "step": 260 + }, + { + "epoch": 0.0010908341649260454, + "grad_norm": 768859.5625, + "learning_rate": 4.3200000000000006e-07, + "loss": 2948.0676, + "step": 270 + }, + { + "epoch": 0.0011312354302936767, + "grad_norm": 89492.234375, + "learning_rate": 4.4800000000000004e-07, + "loss": 3319.5156, + "step": 280 + }, + { + "epoch": 0.001171636695661308, + "grad_norm": 46964.84375, + "learning_rate": 4.64e-07, + "loss": 1031.323, + "step": 290 + }, + { + "epoch": 0.0012120379610289394, + "grad_norm": 33340.5703125, + "learning_rate": 4.800000000000001e-07, + "loss": 846.0583, + "step": 300 + }, + { + "epoch": 0.0012524392263965707, + "grad_norm": 28215.482421875, + "learning_rate": 4.96e-07, + "loss": 834.5062, + "step": 310 + }, + { + "epoch": 0.001292840491764202, + "grad_norm": 11181.080078125, + "learning_rate": 5.12e-07, + "loss": 864.9401, + "step": 320 + }, + { + "epoch": 0.0013332417571318333, + "grad_norm": 1238.0142822265625, + "learning_rate": 5.280000000000001e-07, + "loss": 585.2978, + "step": 330 + }, + { + "epoch": 0.0013736430224994647, + "grad_norm": 1966.2215576171875, + "learning_rate": 5.44e-07, + "loss": 666.2775, + "step": 340 + }, + { + "epoch": 0.001414044287867096, + "grad_norm": 1869.56103515625, + "learning_rate": 5.6e-07, + "loss": 461.7512, + "step": 350 + }, + { + "epoch": 0.0014544455532347273, + "grad_norm": 1312.7501220703125, + "learning_rate": 5.760000000000001e-07, + "loss": 352.5466, + "step": 360 + }, + { + "epoch": 0.0014948468186023586, + "grad_norm": 841.4824829101562, + "learning_rate": 5.920000000000001e-07, + "loss": 616.5967, + "step": 370 + }, + { + "epoch": 0.00153524808396999, + "grad_norm": 2345.778564453125, + "learning_rate": 6.08e-07, + "loss": 425.3517, + "step": 380 + }, + { + "epoch": 0.0015756493493376213, + "grad_norm": 5840.93505859375, + "learning_rate": 6.24e-07, + "loss": 552.9114, + "step": 390 + }, + { + "epoch": 0.0016160506147052526, + "grad_norm": 11822.4033203125, + "learning_rate": 6.4e-07, + "loss": 525.1168, + "step": 400 + }, + { + "epoch": 0.001656451880072884, + "grad_norm": 1601.13232421875, + "learning_rate": 6.560000000000002e-07, + "loss": 552.4253, + "step": 410 + }, + { + "epoch": 0.0016968531454405152, + "grad_norm": 1350.9642333984375, + "learning_rate": 6.72e-07, + "loss": 327.495, + "step": 420 + }, + { + "epoch": 0.0017372544108081465, + "grad_norm": 1152.662109375, + "learning_rate": 6.88e-07, + "loss": 527.8169, + "step": 430 + }, + { + "epoch": 0.0017776556761757779, + "grad_norm": 1413.875, + "learning_rate": 7.040000000000001e-07, + "loss": 472.8792, + "step": 440 + }, + { + "epoch": 0.0018180569415434092, + "grad_norm": 9319.2451171875, + "learning_rate": 7.2e-07, + "loss": 535.9127, + "step": 450 + }, + { + "epoch": 0.0018584582069110405, + "grad_norm": 1880.06494140625, + "learning_rate": 7.36e-07, + "loss": 470.9137, + "step": 460 + }, + { + "epoch": 0.0018988594722786718, + "grad_norm": 4890.30078125, + "learning_rate": 7.520000000000001e-07, + "loss": 538.6518, + "step": 470 + }, + { + "epoch": 0.0019392607376463031, + "grad_norm": 2007.5809326171875, + "learning_rate": 7.68e-07, + "loss": 565.6874, + "step": 480 + }, + { + "epoch": 0.0019796620030139342, + "grad_norm": 2471.47900390625, + "learning_rate": 7.84e-07, + "loss": 442.5784, + "step": 490 + }, + { + "epoch": 0.0020200632683815656, + "grad_norm": 1540.8028564453125, + "learning_rate": 8.000000000000001e-07, + "loss": 450.0033, + "step": 500 + }, + { + "epoch": 0.002060464533749197, + "grad_norm": 1443.65966796875, + "learning_rate": 8.160000000000001e-07, + "loss": 445.0297, + "step": 510 + }, + { + "epoch": 0.002100865799116828, + "grad_norm": 1733.938232421875, + "learning_rate": 8.32e-07, + "loss": 400.4208, + "step": 520 + }, + { + "epoch": 0.0021412670644844595, + "grad_norm": 1513.56689453125, + "learning_rate": 8.480000000000001e-07, + "loss": 510.4969, + "step": 530 + }, + { + "epoch": 0.002181668329852091, + "grad_norm": 2161.153564453125, + "learning_rate": 8.640000000000001e-07, + "loss": 277.8636, + "step": 540 + }, + { + "epoch": 0.002222069595219722, + "grad_norm": 20168.064453125, + "learning_rate": 8.8e-07, + "loss": 522.7577, + "step": 550 + }, + { + "epoch": 0.0022624708605873535, + "grad_norm": 7988.58447265625, + "learning_rate": 8.960000000000001e-07, + "loss": 371.9067, + "step": 560 + }, + { + "epoch": 0.002302872125954985, + "grad_norm": 8020.52880859375, + "learning_rate": 9.120000000000001e-07, + "loss": 322.6992, + "step": 570 + }, + { + "epoch": 0.002343273391322616, + "grad_norm": 1934.175048828125, + "learning_rate": 9.28e-07, + "loss": 521.0251, + "step": 580 + }, + { + "epoch": 0.0023836746566902474, + "grad_norm": 7777.93994140625, + "learning_rate": 9.440000000000001e-07, + "loss": 580.8935, + "step": 590 + }, + { + "epoch": 0.0024240759220578788, + "grad_norm": 1465.2525634765625, + "learning_rate": 9.600000000000001e-07, + "loss": 323.2696, + "step": 600 + }, + { + "epoch": 0.00246447718742551, + "grad_norm": 6267.78955078125, + "learning_rate": 9.76e-07, + "loss": 551.8063, + "step": 610 + }, + { + "epoch": 0.0025048784527931414, + "grad_norm": 1145.3438720703125, + "learning_rate": 9.92e-07, + "loss": 469.9723, + "step": 620 + }, + { + "epoch": 0.0025452797181607727, + "grad_norm": 1489.368408203125, + "learning_rate": 1.0080000000000001e-06, + "loss": 508.7093, + "step": 630 + }, + { + "epoch": 0.002585680983528404, + "grad_norm": 6181.95556640625, + "learning_rate": 1.024e-06, + "loss": 379.0744, + "step": 640 + }, + { + "epoch": 0.0026260822488960354, + "grad_norm": 1673.41943359375, + "learning_rate": 1.04e-06, + "loss": 443.2694, + "step": 650 + }, + { + "epoch": 0.0026664835142636667, + "grad_norm": 1307.235595703125, + "learning_rate": 1.0560000000000001e-06, + "loss": 330.2708, + "step": 660 + }, + { + "epoch": 0.002706884779631298, + "grad_norm": 1477.4444580078125, + "learning_rate": 1.072e-06, + "loss": 367.9195, + "step": 670 + }, + { + "epoch": 0.0027472860449989293, + "grad_norm": 2627.85595703125, + "learning_rate": 1.088e-06, + "loss": 471.7599, + "step": 680 + }, + { + "epoch": 0.0027876873103665606, + "grad_norm": 18068.255859375, + "learning_rate": 1.1040000000000001e-06, + "loss": 461.6373, + "step": 690 + }, + { + "epoch": 0.002828088575734192, + "grad_norm": 1454.4468994140625, + "learning_rate": 1.12e-06, + "loss": 384.7038, + "step": 700 + }, + { + "epoch": 0.0028684898411018233, + "grad_norm": 3437.469970703125, + "learning_rate": 1.1360000000000002e-06, + "loss": 408.2528, + "step": 710 + }, + { + "epoch": 0.0029088911064694546, + "grad_norm": 5858.83203125, + "learning_rate": 1.1520000000000002e-06, + "loss": 524.3583, + "step": 720 + }, + { + "epoch": 0.002949292371837086, + "grad_norm": 1421.0924072265625, + "learning_rate": 1.168e-06, + "loss": 369.25, + "step": 730 + }, + { + "epoch": 0.0029896936372047172, + "grad_norm": 11731.91796875, + "learning_rate": 1.1840000000000002e-06, + "loss": 399.4635, + "step": 740 + }, + { + "epoch": 0.0030300949025723486, + "grad_norm": 2317.9013671875, + "learning_rate": 1.2000000000000002e-06, + "loss": 293.395, + "step": 750 + }, + { + "epoch": 0.00307049616793998, + "grad_norm": 2414.58154296875, + "learning_rate": 1.216e-06, + "loss": 274.6507, + "step": 760 + }, + { + "epoch": 0.003110897433307611, + "grad_norm": 1939.6138916015625, + "learning_rate": 1.2320000000000002e-06, + "loss": 528.3517, + "step": 770 + }, + { + "epoch": 0.0031512986986752425, + "grad_norm": 1406.26220703125, + "learning_rate": 1.248e-06, + "loss": 338.4011, + "step": 780 + }, + { + "epoch": 0.003191699964042874, + "grad_norm": 20347.54296875, + "learning_rate": 1.2640000000000003e-06, + "loss": 471.6086, + "step": 790 + }, + { + "epoch": 0.003232101229410505, + "grad_norm": 1740.917236328125, + "learning_rate": 1.28e-06, + "loss": 306.3603, + "step": 800 + }, + { + "epoch": 0.0032725024947781365, + "grad_norm": 2866.4833984375, + "learning_rate": 1.296e-06, + "loss": 391.702, + "step": 810 + }, + { + "epoch": 0.003312903760145768, + "grad_norm": 2046.43017578125, + "learning_rate": 1.3120000000000003e-06, + "loss": 365.4885, + "step": 820 + }, + { + "epoch": 0.003353305025513399, + "grad_norm": 1237.2532958984375, + "learning_rate": 1.328e-06, + "loss": 340.2963, + "step": 830 + }, + { + "epoch": 0.0033937062908810304, + "grad_norm": 1663.76904296875, + "learning_rate": 1.344e-06, + "loss": 322.8104, + "step": 840 + }, + { + "epoch": 0.0034341075562486618, + "grad_norm": 986.255615234375, + "learning_rate": 1.3600000000000001e-06, + "loss": 400.075, + "step": 850 + }, + { + "epoch": 0.003474508821616293, + "grad_norm": 1229.50048828125, + "learning_rate": 1.376e-06, + "loss": 437.9705, + "step": 860 + }, + { + "epoch": 0.0035149100869839244, + "grad_norm": 1681.372314453125, + "learning_rate": 1.392e-06, + "loss": 348.3813, + "step": 870 + }, + { + "epoch": 0.0035553113523515557, + "grad_norm": 1042.6865234375, + "learning_rate": 1.4080000000000001e-06, + "loss": 387.8172, + "step": 880 + }, + { + "epoch": 0.003595712617719187, + "grad_norm": 1339.73291015625, + "learning_rate": 1.424e-06, + "loss": 454.262, + "step": 890 + }, + { + "epoch": 0.0036361138830868184, + "grad_norm": 1832.9056396484375, + "learning_rate": 1.44e-06, + "loss": 338.0524, + "step": 900 + }, + { + "epoch": 0.0036765151484544497, + "grad_norm": 1756.4337158203125, + "learning_rate": 1.4560000000000001e-06, + "loss": 342.8832, + "step": 910 + }, + { + "epoch": 0.003716916413822081, + "grad_norm": 1183.12841796875, + "learning_rate": 1.472e-06, + "loss": 472.4732, + "step": 920 + }, + { + "epoch": 0.0037573176791897123, + "grad_norm": 1523.04541015625, + "learning_rate": 1.488e-06, + "loss": 302.5532, + "step": 930 + }, + { + "epoch": 0.0037977189445573436, + "grad_norm": 1680.9580078125, + "learning_rate": 1.5040000000000001e-06, + "loss": 469.7435, + "step": 940 + }, + { + "epoch": 0.003838120209924975, + "grad_norm": 1084.1322021484375, + "learning_rate": 1.52e-06, + "loss": 351.1293, + "step": 950 + }, + { + "epoch": 0.0038785214752926063, + "grad_norm": 1235.6416015625, + "learning_rate": 1.536e-06, + "loss": 321.9481, + "step": 960 + }, + { + "epoch": 0.003918922740660237, + "grad_norm": 3025.70703125, + "learning_rate": 1.5520000000000001e-06, + "loss": 376.9666, + "step": 970 + }, + { + "epoch": 0.0039593240060278685, + "grad_norm": 4222.2294921875, + "learning_rate": 1.568e-06, + "loss": 332.1988, + "step": 980 + }, + { + "epoch": 0.0039997252713955, + "grad_norm": 1029.98486328125, + "learning_rate": 1.5840000000000002e-06, + "loss": 352.5739, + "step": 990 + }, + { + "epoch": 0.004040126536763131, + "grad_norm": 2628.78125, + "learning_rate": 1.6000000000000001e-06, + "loss": 329.429, + "step": 1000 + }, + { + "epoch": 0.0040805278021307624, + "grad_norm": 1164.842041015625, + "learning_rate": 1.616e-06, + "loss": 305.5859, + "step": 1010 + }, + { + "epoch": 0.004120929067498394, + "grad_norm": 1922.541259765625, + "learning_rate": 1.6320000000000002e-06, + "loss": 511.8121, + "step": 1020 + }, + { + "epoch": 0.004161330332866025, + "grad_norm": 1714.0921630859375, + "learning_rate": 1.6480000000000001e-06, + "loss": 407.0236, + "step": 1030 + }, + { + "epoch": 0.004201731598233656, + "grad_norm": 1210.8438720703125, + "learning_rate": 1.664e-06, + "loss": 566.8404, + "step": 1040 + }, + { + "epoch": 0.004242132863601288, + "grad_norm": 1249.5894775390625, + "learning_rate": 1.6800000000000002e-06, + "loss": 384.7839, + "step": 1050 + }, + { + "epoch": 0.004282534128968919, + "grad_norm": 3367.826416015625, + "learning_rate": 1.6960000000000002e-06, + "loss": 404.7083, + "step": 1060 + }, + { + "epoch": 0.00432293539433655, + "grad_norm": 2480.017578125, + "learning_rate": 1.712e-06, + "loss": 462.2562, + "step": 1070 + }, + { + "epoch": 0.004363336659704182, + "grad_norm": 1116.3057861328125, + "learning_rate": 1.7280000000000002e-06, + "loss": 320.063, + "step": 1080 + }, + { + "epoch": 0.004403737925071813, + "grad_norm": 7996.5927734375, + "learning_rate": 1.7440000000000002e-06, + "loss": 617.6091, + "step": 1090 + }, + { + "epoch": 0.004444139190439444, + "grad_norm": 1276.182373046875, + "learning_rate": 1.76e-06, + "loss": 248.9301, + "step": 1100 + }, + { + "epoch": 0.004484540455807076, + "grad_norm": 1531.5565185546875, + "learning_rate": 1.7760000000000002e-06, + "loss": 414.5104, + "step": 1110 + }, + { + "epoch": 0.004524941721174707, + "grad_norm": 1622.10498046875, + "learning_rate": 1.7920000000000002e-06, + "loss": 348.6984, + "step": 1120 + }, + { + "epoch": 0.004565342986542338, + "grad_norm": 1182.023193359375, + "learning_rate": 1.808e-06, + "loss": 391.4778, + "step": 1130 + }, + { + "epoch": 0.00460574425190997, + "grad_norm": 2435.02392578125, + "learning_rate": 1.8240000000000002e-06, + "loss": 401.6438, + "step": 1140 + }, + { + "epoch": 0.004646145517277601, + "grad_norm": 1774.41357421875, + "learning_rate": 1.8400000000000002e-06, + "loss": 450.1066, + "step": 1150 + }, + { + "epoch": 0.004686546782645232, + "grad_norm": 1903.6357421875, + "learning_rate": 1.856e-06, + "loss": 435.0743, + "step": 1160 + }, + { + "epoch": 0.0047269480480128636, + "grad_norm": 3751.902099609375, + "learning_rate": 1.8720000000000002e-06, + "loss": 410.6276, + "step": 1170 + }, + { + "epoch": 0.004767349313380495, + "grad_norm": 970.9257202148438, + "learning_rate": 1.8880000000000002e-06, + "loss": 341.2953, + "step": 1180 + }, + { + "epoch": 0.004807750578748126, + "grad_norm": 1704.4547119140625, + "learning_rate": 1.9040000000000003e-06, + "loss": 353.8378, + "step": 1190 + }, + { + "epoch": 0.0048481518441157575, + "grad_norm": 1300.0616455078125, + "learning_rate": 1.9200000000000003e-06, + "loss": 360.7929, + "step": 1200 + }, + { + "epoch": 0.004888553109483389, + "grad_norm": 1489.1114501953125, + "learning_rate": 1.936e-06, + "loss": 295.9987, + "step": 1210 + }, + { + "epoch": 0.00492895437485102, + "grad_norm": 1338.783935546875, + "learning_rate": 1.952e-06, + "loss": 304.2403, + "step": 1220 + }, + { + "epoch": 0.0049693556402186515, + "grad_norm": 2954.978515625, + "learning_rate": 1.968e-06, + "loss": 363.7113, + "step": 1230 + }, + { + "epoch": 0.005009756905586283, + "grad_norm": 1665.712890625, + "learning_rate": 1.984e-06, + "loss": 380.0134, + "step": 1240 + }, + { + "epoch": 0.005050158170953914, + "grad_norm": 1413.253662109375, + "learning_rate": 2.0000000000000003e-06, + "loss": 434.5684, + "step": 1250 + }, + { + "epoch": 0.0050905594363215454, + "grad_norm": 1601.294677734375, + "learning_rate": 2.0160000000000003e-06, + "loss": 391.018, + "step": 1260 + }, + { + "epoch": 0.005130960701689177, + "grad_norm": 1728.199462890625, + "learning_rate": 2.032e-06, + "loss": 567.1912, + "step": 1270 + }, + { + "epoch": 0.005171361967056808, + "grad_norm": 1534.099853515625, + "learning_rate": 2.048e-06, + "loss": 286.9052, + "step": 1280 + }, + { + "epoch": 0.005211763232424439, + "grad_norm": 8782.30078125, + "learning_rate": 2.064e-06, + "loss": 438.2109, + "step": 1290 + }, + { + "epoch": 0.005252164497792071, + "grad_norm": 1421.4984130859375, + "learning_rate": 2.08e-06, + "loss": 350.2737, + "step": 1300 + }, + { + "epoch": 0.005292565763159702, + "grad_norm": 1040.47802734375, + "learning_rate": 2.0960000000000003e-06, + "loss": 366.0718, + "step": 1310 + }, + { + "epoch": 0.005332967028527333, + "grad_norm": 6799.5439453125, + "learning_rate": 2.1120000000000003e-06, + "loss": 536.4446, + "step": 1320 + }, + { + "epoch": 0.005373368293894965, + "grad_norm": 1544.8162841796875, + "learning_rate": 2.128e-06, + "loss": 366.8769, + "step": 1330 + }, + { + "epoch": 0.005413769559262596, + "grad_norm": 1506.4298095703125, + "learning_rate": 2.144e-06, + "loss": 368.3125, + "step": 1340 + }, + { + "epoch": 0.005454170824630227, + "grad_norm": 1189.5625, + "learning_rate": 2.16e-06, + "loss": 312.2686, + "step": 1350 + }, + { + "epoch": 0.005494572089997859, + "grad_norm": 1164.61376953125, + "learning_rate": 2.176e-06, + "loss": 280.1839, + "step": 1360 + }, + { + "epoch": 0.00553497335536549, + "grad_norm": 2103.354248046875, + "learning_rate": 2.1920000000000004e-06, + "loss": 399.5125, + "step": 1370 + }, + { + "epoch": 0.005575374620733121, + "grad_norm": 2190.36865234375, + "learning_rate": 2.2080000000000003e-06, + "loss": 426.4789, + "step": 1380 + }, + { + "epoch": 0.005615775886100753, + "grad_norm": 1544.298095703125, + "learning_rate": 2.2240000000000002e-06, + "loss": 442.533, + "step": 1390 + }, + { + "epoch": 0.005656177151468384, + "grad_norm": 1093.632080078125, + "learning_rate": 2.24e-06, + "loss": 392.0169, + "step": 1400 + }, + { + "epoch": 0.005696578416836015, + "grad_norm": 945.7317504882812, + "learning_rate": 2.256e-06, + "loss": 381.6516, + "step": 1410 + }, + { + "epoch": 0.0057369796822036466, + "grad_norm": 2683.9423828125, + "learning_rate": 2.2720000000000004e-06, + "loss": 384.2589, + "step": 1420 + }, + { + "epoch": 0.005777380947571278, + "grad_norm": 3979.19580078125, + "learning_rate": 2.2880000000000004e-06, + "loss": 392.3776, + "step": 1430 + }, + { + "epoch": 0.005817782212938909, + "grad_norm": 2824.247802734375, + "learning_rate": 2.3040000000000003e-06, + "loss": 402.7255, + "step": 1440 + }, + { + "epoch": 0.0058581834783065405, + "grad_norm": 960.5885620117188, + "learning_rate": 2.3200000000000002e-06, + "loss": 319.8677, + "step": 1450 + }, + { + "epoch": 0.005898584743674172, + "grad_norm": 2618.564697265625, + "learning_rate": 2.336e-06, + "loss": 439.8901, + "step": 1460 + }, + { + "epoch": 0.005938986009041803, + "grad_norm": 792.161376953125, + "learning_rate": 2.352e-06, + "loss": 257.2391, + "step": 1470 + }, + { + "epoch": 0.0059793872744094345, + "grad_norm": 1367.7330322265625, + "learning_rate": 2.3680000000000005e-06, + "loss": 325.2489, + "step": 1480 + }, + { + "epoch": 0.006019788539777066, + "grad_norm": 1044.9365234375, + "learning_rate": 2.3840000000000004e-06, + "loss": 401.5786, + "step": 1490 + }, + { + "epoch": 0.006060189805144697, + "grad_norm": 1188.423828125, + "learning_rate": 2.4000000000000003e-06, + "loss": 246.7422, + "step": 1500 + }, + { + "epoch": 0.0061005910705123284, + "grad_norm": 11918.841796875, + "learning_rate": 2.4160000000000002e-06, + "loss": 328.3231, + "step": 1510 + }, + { + "epoch": 0.00614099233587996, + "grad_norm": 1257.3265380859375, + "learning_rate": 2.432e-06, + "loss": 311.4827, + "step": 1520 + }, + { + "epoch": 0.006181393601247591, + "grad_norm": 2074.5869140625, + "learning_rate": 2.448e-06, + "loss": 328.7458, + "step": 1530 + }, + { + "epoch": 0.006221794866615222, + "grad_norm": 18415.935546875, + "learning_rate": 2.4640000000000005e-06, + "loss": 352.2806, + "step": 1540 + }, + { + "epoch": 0.006262196131982854, + "grad_norm": 1001.1808471679688, + "learning_rate": 2.4800000000000004e-06, + "loss": 286.1027, + "step": 1550 + }, + { + "epoch": 0.006302597397350485, + "grad_norm": 2527.68359375, + "learning_rate": 2.496e-06, + "loss": 299.6014, + "step": 1560 + }, + { + "epoch": 0.006342998662718116, + "grad_norm": 1650.5621337890625, + "learning_rate": 2.512e-06, + "loss": 343.5038, + "step": 1570 + }, + { + "epoch": 0.006383399928085748, + "grad_norm": 2738.1669921875, + "learning_rate": 2.5280000000000006e-06, + "loss": 406.5985, + "step": 1580 + }, + { + "epoch": 0.006423801193453379, + "grad_norm": 1327.51416015625, + "learning_rate": 2.5440000000000005e-06, + "loss": 345.5673, + "step": 1590 + }, + { + "epoch": 0.00646420245882101, + "grad_norm": 1616.677001953125, + "learning_rate": 2.56e-06, + "loss": 355.4767, + "step": 1600 + }, + { + "epoch": 0.006504603724188642, + "grad_norm": 1353.2659912109375, + "learning_rate": 2.576e-06, + "loss": 378.6266, + "step": 1610 + }, + { + "epoch": 0.006545004989556273, + "grad_norm": 2731.284912109375, + "learning_rate": 2.592e-06, + "loss": 405.0956, + "step": 1620 + }, + { + "epoch": 0.006585406254923904, + "grad_norm": 922.3407592773438, + "learning_rate": 2.608e-06, + "loss": 384.0765, + "step": 1630 + }, + { + "epoch": 0.006625807520291536, + "grad_norm": 1486.85986328125, + "learning_rate": 2.6240000000000006e-06, + "loss": 403.0715, + "step": 1640 + }, + { + "epoch": 0.006666208785659167, + "grad_norm": 2140.081298828125, + "learning_rate": 2.64e-06, + "loss": 432.7109, + "step": 1650 + }, + { + "epoch": 0.006706610051026798, + "grad_norm": 1448.5260009765625, + "learning_rate": 2.656e-06, + "loss": 269.9132, + "step": 1660 + }, + { + "epoch": 0.0067470113163944296, + "grad_norm": 915.2938842773438, + "learning_rate": 2.672e-06, + "loss": 357.5584, + "step": 1670 + }, + { + "epoch": 0.006787412581762061, + "grad_norm": 2771.476806640625, + "learning_rate": 2.688e-06, + "loss": 337.7158, + "step": 1680 + }, + { + "epoch": 0.006827813847129692, + "grad_norm": 1374.5445556640625, + "learning_rate": 2.704e-06, + "loss": 293.565, + "step": 1690 + }, + { + "epoch": 0.0068682151124973235, + "grad_norm": 4395.9130859375, + "learning_rate": 2.7200000000000002e-06, + "loss": 374.4429, + "step": 1700 + }, + { + "epoch": 0.006908616377864955, + "grad_norm": 1054.5625, + "learning_rate": 2.736e-06, + "loss": 323.8032, + "step": 1710 + }, + { + "epoch": 0.006949017643232586, + "grad_norm": 5906.85205078125, + "learning_rate": 2.752e-06, + "loss": 459.6569, + "step": 1720 + }, + { + "epoch": 0.0069894189086002175, + "grad_norm": 997.4058227539062, + "learning_rate": 2.768e-06, + "loss": 319.6349, + "step": 1730 + }, + { + "epoch": 0.007029820173967849, + "grad_norm": 1296.9693603515625, + "learning_rate": 2.784e-06, + "loss": 374.5422, + "step": 1740 + }, + { + "epoch": 0.00707022143933548, + "grad_norm": 1609.7955322265625, + "learning_rate": 2.8000000000000003e-06, + "loss": 371.6045, + "step": 1750 + }, + { + "epoch": 0.0071106227047031114, + "grad_norm": 1264.4715576171875, + "learning_rate": 2.8160000000000002e-06, + "loss": 407.3331, + "step": 1760 + }, + { + "epoch": 0.007151023970070743, + "grad_norm": 1074.2283935546875, + "learning_rate": 2.832e-06, + "loss": 310.4047, + "step": 1770 + }, + { + "epoch": 0.007191425235438374, + "grad_norm": 1257.7445068359375, + "learning_rate": 2.848e-06, + "loss": 429.3438, + "step": 1780 + }, + { + "epoch": 0.007231826500806005, + "grad_norm": 992.61376953125, + "learning_rate": 2.864e-06, + "loss": 246.8076, + "step": 1790 + }, + { + "epoch": 0.007272227766173637, + "grad_norm": 671.7614135742188, + "learning_rate": 2.88e-06, + "loss": 379.3301, + "step": 1800 + }, + { + "epoch": 0.007312629031541268, + "grad_norm": 910.122802734375, + "learning_rate": 2.8960000000000003e-06, + "loss": 357.4776, + "step": 1810 + }, + { + "epoch": 0.007353030296908899, + "grad_norm": 1070.190673828125, + "learning_rate": 2.9120000000000002e-06, + "loss": 376.1024, + "step": 1820 + }, + { + "epoch": 0.007393431562276531, + "grad_norm": 1258.80859375, + "learning_rate": 2.928e-06, + "loss": 408.0143, + "step": 1830 + }, + { + "epoch": 0.007433832827644162, + "grad_norm": 1531.339111328125, + "learning_rate": 2.944e-06, + "loss": 258.2354, + "step": 1840 + }, + { + "epoch": 0.007474234093011793, + "grad_norm": 1209.522216796875, + "learning_rate": 2.96e-06, + "loss": 427.6216, + "step": 1850 + }, + { + "epoch": 0.007514635358379425, + "grad_norm": 1199.561767578125, + "learning_rate": 2.976e-06, + "loss": 401.7808, + "step": 1860 + }, + { + "epoch": 0.007555036623747056, + "grad_norm": 7475.45166015625, + "learning_rate": 2.9920000000000003e-06, + "loss": 422.44, + "step": 1870 + }, + { + "epoch": 0.007595437889114687, + "grad_norm": 9618.85546875, + "learning_rate": 3.0080000000000003e-06, + "loss": 348.2636, + "step": 1880 + }, + { + "epoch": 0.007635839154482319, + "grad_norm": 5412.513671875, + "learning_rate": 3.024e-06, + "loss": 376.1331, + "step": 1890 + }, + { + "epoch": 0.00767624041984995, + "grad_norm": 1432.8302001953125, + "learning_rate": 3.04e-06, + "loss": 385.2273, + "step": 1900 + }, + { + "epoch": 0.007716641685217581, + "grad_norm": 2442.544677734375, + "learning_rate": 3.056e-06, + "loss": 371.3791, + "step": 1910 + }, + { + "epoch": 0.0077570429505852126, + "grad_norm": 860.7770385742188, + "learning_rate": 3.072e-06, + "loss": 379.6252, + "step": 1920 + }, + { + "epoch": 0.007797444215952844, + "grad_norm": 1763.4764404296875, + "learning_rate": 3.0880000000000003e-06, + "loss": 343.6517, + "step": 1930 + }, + { + "epoch": 0.007837845481320474, + "grad_norm": 2200.64501953125, + "learning_rate": 3.1040000000000003e-06, + "loss": 450.1875, + "step": 1940 + }, + { + "epoch": 0.007878246746688106, + "grad_norm": 1477.6318359375, + "learning_rate": 3.12e-06, + "loss": 298.0398, + "step": 1950 + }, + { + "epoch": 0.007918648012055737, + "grad_norm": 1309.56591796875, + "learning_rate": 3.136e-06, + "loss": 343.6609, + "step": 1960 + }, + { + "epoch": 0.007959049277423368, + "grad_norm": 5944.794921875, + "learning_rate": 3.152e-06, + "loss": 444.82, + "step": 1970 + }, + { + "epoch": 0.007999450542791, + "grad_norm": 627.3289184570312, + "learning_rate": 3.1680000000000004e-06, + "loss": 325.393, + "step": 1980 + }, + { + "epoch": 0.008039851808158631, + "grad_norm": 1257.6890869140625, + "learning_rate": 3.1840000000000003e-06, + "loss": 318.8681, + "step": 1990 + }, + { + "epoch": 0.008080253073526262, + "grad_norm": 1115.019287109375, + "learning_rate": 3.2000000000000003e-06, + "loss": 401.846, + "step": 2000 + }, + { + "epoch": 0.008120654338893894, + "grad_norm": 1869.59814453125, + "learning_rate": 3.216e-06, + "loss": 328.1058, + "step": 2010 + }, + { + "epoch": 0.008161055604261525, + "grad_norm": 6479.30029296875, + "learning_rate": 3.232e-06, + "loss": 324.0423, + "step": 2020 + }, + { + "epoch": 0.008201456869629156, + "grad_norm": 1267.346923828125, + "learning_rate": 3.248e-06, + "loss": 392.6459, + "step": 2030 + }, + { + "epoch": 0.008241858134996788, + "grad_norm": 951.6561889648438, + "learning_rate": 3.2640000000000004e-06, + "loss": 347.5508, + "step": 2040 + }, + { + "epoch": 0.008282259400364419, + "grad_norm": 1847.4453125, + "learning_rate": 3.2800000000000004e-06, + "loss": 219.8378, + "step": 2050 + }, + { + "epoch": 0.00832266066573205, + "grad_norm": 5982.748046875, + "learning_rate": 3.2960000000000003e-06, + "loss": 439.8982, + "step": 2060 + }, + { + "epoch": 0.008363061931099681, + "grad_norm": 1733.5140380859375, + "learning_rate": 3.3120000000000002e-06, + "loss": 380.0745, + "step": 2070 + }, + { + "epoch": 0.008403463196467313, + "grad_norm": 874.2711791992188, + "learning_rate": 3.328e-06, + "loss": 331.7575, + "step": 2080 + }, + { + "epoch": 0.008443864461834944, + "grad_norm": 1542.882568359375, + "learning_rate": 3.344e-06, + "loss": 363.2435, + "step": 2090 + }, + { + "epoch": 0.008484265727202575, + "grad_norm": 1524.271484375, + "learning_rate": 3.3600000000000004e-06, + "loss": 297.0619, + "step": 2100 + }, + { + "epoch": 0.008524666992570207, + "grad_norm": 930.7454223632812, + "learning_rate": 3.3760000000000004e-06, + "loss": 259.8849, + "step": 2110 + }, + { + "epoch": 0.008565068257937838, + "grad_norm": 1436.7398681640625, + "learning_rate": 3.3920000000000003e-06, + "loss": 355.7602, + "step": 2120 + }, + { + "epoch": 0.00860546952330547, + "grad_norm": 1005.0159912109375, + "learning_rate": 3.4080000000000002e-06, + "loss": 318.3575, + "step": 2130 + }, + { + "epoch": 0.0086458707886731, + "grad_norm": 1379.6585693359375, + "learning_rate": 3.424e-06, + "loss": 382.3723, + "step": 2140 + }, + { + "epoch": 0.008686272054040732, + "grad_norm": 1252.010986328125, + "learning_rate": 3.44e-06, + "loss": 285.3958, + "step": 2150 + }, + { + "epoch": 0.008726673319408363, + "grad_norm": 1142.7216796875, + "learning_rate": 3.4560000000000005e-06, + "loss": 405.48, + "step": 2160 + }, + { + "epoch": 0.008767074584775995, + "grad_norm": 1282.8763427734375, + "learning_rate": 3.4720000000000004e-06, + "loss": 340.1113, + "step": 2170 + }, + { + "epoch": 0.008807475850143626, + "grad_norm": 1098.17626953125, + "learning_rate": 3.4880000000000003e-06, + "loss": 324.9738, + "step": 2180 + }, + { + "epoch": 0.008847877115511257, + "grad_norm": 2809.671630859375, + "learning_rate": 3.5040000000000002e-06, + "loss": 256.4325, + "step": 2190 + }, + { + "epoch": 0.008888278380878889, + "grad_norm": 1207.278076171875, + "learning_rate": 3.52e-06, + "loss": 368.4686, + "step": 2200 + }, + { + "epoch": 0.00892867964624652, + "grad_norm": 840.9583129882812, + "learning_rate": 3.5360000000000005e-06, + "loss": 251.3147, + "step": 2210 + }, + { + "epoch": 0.008969080911614151, + "grad_norm": 1274.2135009765625, + "learning_rate": 3.5520000000000005e-06, + "loss": 329.7183, + "step": 2220 + }, + { + "epoch": 0.009009482176981783, + "grad_norm": 1051.3580322265625, + "learning_rate": 3.5680000000000004e-06, + "loss": 430.3931, + "step": 2230 + }, + { + "epoch": 0.009049883442349414, + "grad_norm": 1303.4356689453125, + "learning_rate": 3.5840000000000003e-06, + "loss": 378.467, + "step": 2240 + }, + { + "epoch": 0.009090284707717045, + "grad_norm": 1173.7679443359375, + "learning_rate": 3.6000000000000003e-06, + "loss": 316.8853, + "step": 2250 + }, + { + "epoch": 0.009130685973084677, + "grad_norm": 2078.344482421875, + "learning_rate": 3.616e-06, + "loss": 297.2612, + "step": 2260 + }, + { + "epoch": 0.009171087238452308, + "grad_norm": 2685.02783203125, + "learning_rate": 3.6320000000000005e-06, + "loss": 320.6362, + "step": 2270 + }, + { + "epoch": 0.00921148850381994, + "grad_norm": 1005.8909912109375, + "learning_rate": 3.6480000000000005e-06, + "loss": 288.9353, + "step": 2280 + }, + { + "epoch": 0.00925188976918757, + "grad_norm": 855.4976196289062, + "learning_rate": 3.6640000000000004e-06, + "loss": 308.0711, + "step": 2290 + }, + { + "epoch": 0.009292291034555202, + "grad_norm": 1389.6583251953125, + "learning_rate": 3.6800000000000003e-06, + "loss": 240.4098, + "step": 2300 + }, + { + "epoch": 0.009332692299922833, + "grad_norm": 1135.1143798828125, + "learning_rate": 3.6960000000000003e-06, + "loss": 363.1907, + "step": 2310 + }, + { + "epoch": 0.009373093565290464, + "grad_norm": 1591.9593505859375, + "learning_rate": 3.712e-06, + "loss": 281.7889, + "step": 2320 + }, + { + "epoch": 0.009413494830658096, + "grad_norm": 14705.8876953125, + "learning_rate": 3.7280000000000006e-06, + "loss": 311.9866, + "step": 2330 + }, + { + "epoch": 0.009453896096025727, + "grad_norm": 1404.6417236328125, + "learning_rate": 3.7440000000000005e-06, + "loss": 306.5218, + "step": 2340 + }, + { + "epoch": 0.009494297361393358, + "grad_norm": 729.1030883789062, + "learning_rate": 3.7600000000000004e-06, + "loss": 333.2889, + "step": 2350 + }, + { + "epoch": 0.00953469862676099, + "grad_norm": 1180.316162109375, + "learning_rate": 3.7760000000000004e-06, + "loss": 517.563, + "step": 2360 + }, + { + "epoch": 0.009575099892128621, + "grad_norm": 1715.1484375, + "learning_rate": 3.7920000000000003e-06, + "loss": 335.8421, + "step": 2370 + }, + { + "epoch": 0.009615501157496252, + "grad_norm": 1772.4295654296875, + "learning_rate": 3.8080000000000006e-06, + "loss": 282.402, + "step": 2380 + }, + { + "epoch": 0.009655902422863884, + "grad_norm": 1099.1082763671875, + "learning_rate": 3.824e-06, + "loss": 349.0338, + "step": 2390 + }, + { + "epoch": 0.009696303688231515, + "grad_norm": 796.462158203125, + "learning_rate": 3.8400000000000005e-06, + "loss": 392.1229, + "step": 2400 + }, + { + "epoch": 0.009736704953599146, + "grad_norm": 828.8687133789062, + "learning_rate": 3.856e-06, + "loss": 226.0539, + "step": 2410 + }, + { + "epoch": 0.009777106218966778, + "grad_norm": 1197.3853759765625, + "learning_rate": 3.872e-06, + "loss": 309.0644, + "step": 2420 + }, + { + "epoch": 0.009817507484334409, + "grad_norm": 5062.5390625, + "learning_rate": 3.888e-06, + "loss": 374.2961, + "step": 2430 + }, + { + "epoch": 0.00985790874970204, + "grad_norm": 1278.6627197265625, + "learning_rate": 3.904e-06, + "loss": 243.6835, + "step": 2440 + }, + { + "epoch": 0.009898310015069672, + "grad_norm": 1015.155517578125, + "learning_rate": 3.920000000000001e-06, + "loss": 256.9745, + "step": 2450 + }, + { + "epoch": 0.009938711280437303, + "grad_norm": 971.3543701171875, + "learning_rate": 3.936e-06, + "loss": 200.8887, + "step": 2460 + }, + { + "epoch": 0.009979112545804934, + "grad_norm": 993.352294921875, + "learning_rate": 3.9520000000000004e-06, + "loss": 371.0807, + "step": 2470 + }, + { + "epoch": 0.010019513811172566, + "grad_norm": 1381.3070068359375, + "learning_rate": 3.968e-06, + "loss": 322.5396, + "step": 2480 + }, + { + "epoch": 0.010059915076540197, + "grad_norm": 1079.4725341796875, + "learning_rate": 3.984e-06, + "loss": 409.327, + "step": 2490 + }, + { + "epoch": 0.010100316341907828, + "grad_norm": 740.3635864257812, + "learning_rate": 4.000000000000001e-06, + "loss": 197.6069, + "step": 2500 + }, + { + "epoch": 0.01014071760727546, + "grad_norm": 935.5006713867188, + "learning_rate": 4.016e-06, + "loss": 279.3181, + "step": 2510 + }, + { + "epoch": 0.010181118872643091, + "grad_norm": 682.3497314453125, + "learning_rate": 4.0320000000000005e-06, + "loss": 265.5528, + "step": 2520 + }, + { + "epoch": 0.010221520138010722, + "grad_norm": 1179.9482421875, + "learning_rate": 4.048e-06, + "loss": 227.9891, + "step": 2530 + }, + { + "epoch": 0.010261921403378354, + "grad_norm": 1141.09228515625, + "learning_rate": 4.064e-06, + "loss": 292.1676, + "step": 2540 + }, + { + "epoch": 0.010302322668745985, + "grad_norm": 1268.06103515625, + "learning_rate": 4.08e-06, + "loss": 446.4533, + "step": 2550 + }, + { + "epoch": 0.010342723934113616, + "grad_norm": 798.9641723632812, + "learning_rate": 4.096e-06, + "loss": 264.5007, + "step": 2560 + }, + { + "epoch": 0.010383125199481247, + "grad_norm": 2075.81494140625, + "learning_rate": 4.112000000000001e-06, + "loss": 328.091, + "step": 2570 + }, + { + "epoch": 0.010423526464848879, + "grad_norm": 1393.79443359375, + "learning_rate": 4.128e-06, + "loss": 306.3986, + "step": 2580 + }, + { + "epoch": 0.01046392773021651, + "grad_norm": 1503.728515625, + "learning_rate": 4.1440000000000005e-06, + "loss": 270.4054, + "step": 2590 + }, + { + "epoch": 0.010504328995584141, + "grad_norm": 1611.9930419921875, + "learning_rate": 4.16e-06, + "loss": 321.695, + "step": 2600 + }, + { + "epoch": 0.010544730260951773, + "grad_norm": 1907.9898681640625, + "learning_rate": 4.176e-06, + "loss": 289.0048, + "step": 2610 + }, + { + "epoch": 0.010585131526319404, + "grad_norm": 1175.665283203125, + "learning_rate": 4.192000000000001e-06, + "loss": 427.0176, + "step": 2620 + }, + { + "epoch": 0.010625532791687035, + "grad_norm": 976.3261108398438, + "learning_rate": 4.208e-06, + "loss": 387.4734, + "step": 2630 + }, + { + "epoch": 0.010665934057054667, + "grad_norm": 1057.5343017578125, + "learning_rate": 4.2240000000000006e-06, + "loss": 346.9802, + "step": 2640 + }, + { + "epoch": 0.010706335322422298, + "grad_norm": 2099.923583984375, + "learning_rate": 4.24e-06, + "loss": 282.0068, + "step": 2650 + }, + { + "epoch": 0.01074673658778993, + "grad_norm": 720.9117431640625, + "learning_rate": 4.256e-06, + "loss": 218.0661, + "step": 2660 + }, + { + "epoch": 0.01078713785315756, + "grad_norm": 1310.834228515625, + "learning_rate": 4.272000000000001e-06, + "loss": 342.1894, + "step": 2670 + }, + { + "epoch": 0.010827539118525192, + "grad_norm": 862.5106811523438, + "learning_rate": 4.288e-06, + "loss": 334.8636, + "step": 2680 + }, + { + "epoch": 0.010867940383892823, + "grad_norm": 4778.763671875, + "learning_rate": 4.304000000000001e-06, + "loss": 350.2313, + "step": 2690 + }, + { + "epoch": 0.010908341649260455, + "grad_norm": 1542.6527099609375, + "learning_rate": 4.32e-06, + "loss": 431.975, + "step": 2700 + }, + { + "epoch": 0.010948742914628086, + "grad_norm": 1007.1238403320312, + "learning_rate": 4.3360000000000005e-06, + "loss": 378.154, + "step": 2710 + }, + { + "epoch": 0.010989144179995717, + "grad_norm": 1046.8201904296875, + "learning_rate": 4.352e-06, + "loss": 402.8721, + "step": 2720 + }, + { + "epoch": 0.011029545445363349, + "grad_norm": 870.9686279296875, + "learning_rate": 4.368e-06, + "loss": 291.1212, + "step": 2730 + }, + { + "epoch": 0.01106994671073098, + "grad_norm": 2072.454833984375, + "learning_rate": 4.384000000000001e-06, + "loss": 281.3425, + "step": 2740 + }, + { + "epoch": 0.011110347976098611, + "grad_norm": 841.0203857421875, + "learning_rate": 4.4e-06, + "loss": 212.3396, + "step": 2750 + }, + { + "epoch": 0.011150749241466243, + "grad_norm": 4775.9990234375, + "learning_rate": 4.416000000000001e-06, + "loss": 382.9681, + "step": 2760 + }, + { + "epoch": 0.011191150506833874, + "grad_norm": 1433.2176513671875, + "learning_rate": 4.432e-06, + "loss": 251.5269, + "step": 2770 + }, + { + "epoch": 0.011231551772201505, + "grad_norm": 1079.450439453125, + "learning_rate": 4.4480000000000004e-06, + "loss": 268.7675, + "step": 2780 + }, + { + "epoch": 0.011271953037569137, + "grad_norm": 1471.5330810546875, + "learning_rate": 4.464000000000001e-06, + "loss": 299.1654, + "step": 2790 + }, + { + "epoch": 0.011312354302936768, + "grad_norm": 890.1978149414062, + "learning_rate": 4.48e-06, + "loss": 313.091, + "step": 2800 + }, + { + "epoch": 0.0113527555683044, + "grad_norm": 1240.2139892578125, + "learning_rate": 4.496000000000001e-06, + "loss": 323.7776, + "step": 2810 + }, + { + "epoch": 0.01139315683367203, + "grad_norm": 942.0856323242188, + "learning_rate": 4.512e-06, + "loss": 200.9493, + "step": 2820 + }, + { + "epoch": 0.011433558099039662, + "grad_norm": 1702.8369140625, + "learning_rate": 4.5280000000000005e-06, + "loss": 341.6231, + "step": 2830 + }, + { + "epoch": 0.011473959364407293, + "grad_norm": 821.422119140625, + "learning_rate": 4.544000000000001e-06, + "loss": 247.005, + "step": 2840 + }, + { + "epoch": 0.011514360629774924, + "grad_norm": 1013.9811401367188, + "learning_rate": 4.56e-06, + "loss": 304.1821, + "step": 2850 + }, + { + "epoch": 0.011554761895142556, + "grad_norm": 1131.206298828125, + "learning_rate": 4.576000000000001e-06, + "loss": 295.4424, + "step": 2860 + }, + { + "epoch": 0.011595163160510187, + "grad_norm": 1932.0428466796875, + "learning_rate": 4.592e-06, + "loss": 309.9476, + "step": 2870 + }, + { + "epoch": 0.011635564425877818, + "grad_norm": 861.8889770507812, + "learning_rate": 4.608000000000001e-06, + "loss": 215.0618, + "step": 2880 + }, + { + "epoch": 0.01167596569124545, + "grad_norm": 1487.9378662109375, + "learning_rate": 4.624e-06, + "loss": 366.4824, + "step": 2890 + }, + { + "epoch": 0.011716366956613081, + "grad_norm": 1422.5128173828125, + "learning_rate": 4.6400000000000005e-06, + "loss": 315.9665, + "step": 2900 + }, + { + "epoch": 0.011756768221980712, + "grad_norm": 848.3942260742188, + "learning_rate": 4.656000000000001e-06, + "loss": 356.6105, + "step": 2910 + }, + { + "epoch": 0.011797169487348344, + "grad_norm": 794.7532348632812, + "learning_rate": 4.672e-06, + "loss": 233.077, + "step": 2920 + }, + { + "epoch": 0.011837570752715975, + "grad_norm": 858.735107421875, + "learning_rate": 4.688000000000001e-06, + "loss": 237.6312, + "step": 2930 + }, + { + "epoch": 0.011877972018083606, + "grad_norm": 746.8057250976562, + "learning_rate": 4.704e-06, + "loss": 308.5849, + "step": 2940 + }, + { + "epoch": 0.011918373283451238, + "grad_norm": 8038.01904296875, + "learning_rate": 4.7200000000000005e-06, + "loss": 378.3781, + "step": 2950 + }, + { + "epoch": 0.011958774548818869, + "grad_norm": 3946.298583984375, + "learning_rate": 4.736000000000001e-06, + "loss": 539.7088, + "step": 2960 + }, + { + "epoch": 0.0119991758141865, + "grad_norm": 8127.841796875, + "learning_rate": 4.752e-06, + "loss": 346.0011, + "step": 2970 + }, + { + "epoch": 0.012039577079554132, + "grad_norm": 1171.7227783203125, + "learning_rate": 4.768000000000001e-06, + "loss": 308.8039, + "step": 2980 + }, + { + "epoch": 0.012079978344921763, + "grad_norm": 1740.8641357421875, + "learning_rate": 4.784e-06, + "loss": 355.4309, + "step": 2990 + }, + { + "epoch": 0.012120379610289394, + "grad_norm": 1133.52294921875, + "learning_rate": 4.800000000000001e-06, + "loss": 226.1112, + "step": 3000 + }, + { + "epoch": 0.012160780875657026, + "grad_norm": 2531.5517578125, + "learning_rate": 4.816e-06, + "loss": 282.87, + "step": 3010 + }, + { + "epoch": 0.012201182141024657, + "grad_norm": 817.2432861328125, + "learning_rate": 4.8320000000000005e-06, + "loss": 212.4186, + "step": 3020 + }, + { + "epoch": 0.012241583406392288, + "grad_norm": 1266.0361328125, + "learning_rate": 4.848000000000001e-06, + "loss": 311.8225, + "step": 3030 + }, + { + "epoch": 0.01228198467175992, + "grad_norm": 1253.0501708984375, + "learning_rate": 4.864e-06, + "loss": 423.4795, + "step": 3040 + }, + { + "epoch": 0.01232238593712755, + "grad_norm": 1463.8992919921875, + "learning_rate": 4.880000000000001e-06, + "loss": 360.068, + "step": 3050 + }, + { + "epoch": 0.012362787202495182, + "grad_norm": 1051.57275390625, + "learning_rate": 4.896e-06, + "loss": 320.0544, + "step": 3060 + }, + { + "epoch": 0.012403188467862813, + "grad_norm": 1166.1343994140625, + "learning_rate": 4.9120000000000006e-06, + "loss": 306.7178, + "step": 3070 + }, + { + "epoch": 0.012443589733230445, + "grad_norm": 1493.610107421875, + "learning_rate": 4.928000000000001e-06, + "loss": 294.6235, + "step": 3080 + }, + { + "epoch": 0.012483990998598076, + "grad_norm": 2005.4842529296875, + "learning_rate": 4.9440000000000004e-06, + "loss": 351.6958, + "step": 3090 + }, + { + "epoch": 0.012524392263965707, + "grad_norm": 3664.206298828125, + "learning_rate": 4.960000000000001e-06, + "loss": 392.9979, + "step": 3100 + }, + { + "epoch": 0.012564793529333339, + "grad_norm": 903.0733642578125, + "learning_rate": 4.976e-06, + "loss": 360.9281, + "step": 3110 + }, + { + "epoch": 0.01260519479470097, + "grad_norm": 1867.019775390625, + "learning_rate": 4.992e-06, + "loss": 363.9161, + "step": 3120 + }, + { + "epoch": 0.012645596060068601, + "grad_norm": 1038.0279541015625, + "learning_rate": 5.008000000000001e-06, + "loss": 231.3084, + "step": 3130 + }, + { + "epoch": 0.012685997325436233, + "grad_norm": 799.7454833984375, + "learning_rate": 5.024e-06, + "loss": 268.8613, + "step": 3140 + }, + { + "epoch": 0.012726398590803864, + "grad_norm": 4228.4765625, + "learning_rate": 5.04e-06, + "loss": 350.8994, + "step": 3150 + }, + { + "epoch": 0.012766799856171495, + "grad_norm": 1943.729248046875, + "learning_rate": 5.056000000000001e-06, + "loss": 279.6189, + "step": 3160 + }, + { + "epoch": 0.012807201121539127, + "grad_norm": 857.3060913085938, + "learning_rate": 5.072e-06, + "loss": 195.6288, + "step": 3170 + }, + { + "epoch": 0.012847602386906758, + "grad_norm": 1390.271728515625, + "learning_rate": 5.088000000000001e-06, + "loss": 389.653, + "step": 3180 + }, + { + "epoch": 0.01288800365227439, + "grad_norm": 837.6422729492188, + "learning_rate": 5.104e-06, + "loss": 238.0366, + "step": 3190 + }, + { + "epoch": 0.01292840491764202, + "grad_norm": 14303.1572265625, + "learning_rate": 5.12e-06, + "loss": 412.3738, + "step": 3200 + }, + { + "epoch": 0.012968806183009652, + "grad_norm": 1202.2548828125, + "learning_rate": 5.136e-06, + "loss": 327.0532, + "step": 3210 + }, + { + "epoch": 0.013009207448377283, + "grad_norm": 1023.9451904296875, + "learning_rate": 5.152e-06, + "loss": 245.0008, + "step": 3220 + }, + { + "epoch": 0.013049608713744915, + "grad_norm": 1453.974853515625, + "learning_rate": 5.168000000000001e-06, + "loss": 349.8419, + "step": 3230 + }, + { + "epoch": 0.013090009979112546, + "grad_norm": 2677.341064453125, + "learning_rate": 5.184e-06, + "loss": 286.7262, + "step": 3240 + }, + { + "epoch": 0.013130411244480177, + "grad_norm": 6423.32421875, + "learning_rate": 5.2e-06, + "loss": 386.8147, + "step": 3250 + }, + { + "epoch": 0.013170812509847809, + "grad_norm": 1210.386474609375, + "learning_rate": 5.216e-06, + "loss": 385.7031, + "step": 3260 + }, + { + "epoch": 0.01321121377521544, + "grad_norm": 1163.534423828125, + "learning_rate": 5.232e-06, + "loss": 374.9601, + "step": 3270 + }, + { + "epoch": 0.013251615040583071, + "grad_norm": 1429.89990234375, + "learning_rate": 5.248000000000001e-06, + "loss": 344.7292, + "step": 3280 + }, + { + "epoch": 0.013292016305950703, + "grad_norm": 2631.7470703125, + "learning_rate": 5.264e-06, + "loss": 431.3518, + "step": 3290 + }, + { + "epoch": 0.013332417571318334, + "grad_norm": 1065.1099853515625, + "learning_rate": 5.28e-06, + "loss": 353.078, + "step": 3300 + }, + { + "epoch": 0.013372818836685965, + "grad_norm": 1256.156494140625, + "learning_rate": 5.296e-06, + "loss": 294.3968, + "step": 3310 + }, + { + "epoch": 0.013413220102053596, + "grad_norm": 1270.5047607421875, + "learning_rate": 5.312e-06, + "loss": 243.3785, + "step": 3320 + }, + { + "epoch": 0.013453621367421228, + "grad_norm": 1195.8656005859375, + "learning_rate": 5.328000000000001e-06, + "loss": 279.1851, + "step": 3330 + }, + { + "epoch": 0.013494022632788859, + "grad_norm": 1271.0672607421875, + "learning_rate": 5.344e-06, + "loss": 356.859, + "step": 3340 + }, + { + "epoch": 0.01353442389815649, + "grad_norm": 2506.900390625, + "learning_rate": 5.36e-06, + "loss": 248.9634, + "step": 3350 + }, + { + "epoch": 0.013574825163524122, + "grad_norm": 995.7321166992188, + "learning_rate": 5.376e-06, + "loss": 194.334, + "step": 3360 + }, + { + "epoch": 0.013615226428891753, + "grad_norm": 1019.4531860351562, + "learning_rate": 5.392e-06, + "loss": 368.7417, + "step": 3370 + }, + { + "epoch": 0.013655627694259384, + "grad_norm": 1038.95556640625, + "learning_rate": 5.408e-06, + "loss": 217.6359, + "step": 3380 + }, + { + "epoch": 0.013696028959627016, + "grad_norm": 1638.4698486328125, + "learning_rate": 5.424e-06, + "loss": 308.5207, + "step": 3390 + }, + { + "epoch": 0.013736430224994647, + "grad_norm": 1261.9429931640625, + "learning_rate": 5.4400000000000004e-06, + "loss": 249.1452, + "step": 3400 + }, + { + "epoch": 0.013776831490362278, + "grad_norm": 1537.7099609375, + "learning_rate": 5.456e-06, + "loss": 458.407, + "step": 3410 + }, + { + "epoch": 0.01381723275572991, + "grad_norm": 1431.4654541015625, + "learning_rate": 5.472e-06, + "loss": 351.9274, + "step": 3420 + }, + { + "epoch": 0.013857634021097541, + "grad_norm": 4019.198974609375, + "learning_rate": 5.488e-06, + "loss": 288.8691, + "step": 3430 + }, + { + "epoch": 0.013898035286465172, + "grad_norm": 936.09375, + "learning_rate": 5.504e-06, + "loss": 190.8963, + "step": 3440 + }, + { + "epoch": 0.013938436551832804, + "grad_norm": 913.061767578125, + "learning_rate": 5.5200000000000005e-06, + "loss": 287.2033, + "step": 3450 + }, + { + "epoch": 0.013978837817200435, + "grad_norm": 1747.047119140625, + "learning_rate": 5.536e-06, + "loss": 305.4842, + "step": 3460 + }, + { + "epoch": 0.014019239082568066, + "grad_norm": 1835.623046875, + "learning_rate": 5.552e-06, + "loss": 259.9541, + "step": 3470 + }, + { + "epoch": 0.014059640347935698, + "grad_norm": 2608.909912109375, + "learning_rate": 5.568e-06, + "loss": 296.8973, + "step": 3480 + }, + { + "epoch": 0.014100041613303329, + "grad_norm": 1144.64306640625, + "learning_rate": 5.584e-06, + "loss": 259.4124, + "step": 3490 + }, + { + "epoch": 0.01414044287867096, + "grad_norm": 1919.8992919921875, + "learning_rate": 5.600000000000001e-06, + "loss": 180.7019, + "step": 3500 + }, + { + "epoch": 0.014180844144038592, + "grad_norm": 801.7567138671875, + "learning_rate": 5.616e-06, + "loss": 295.5063, + "step": 3510 + }, + { + "epoch": 0.014221245409406223, + "grad_norm": 1002.9140625, + "learning_rate": 5.6320000000000005e-06, + "loss": 242.374, + "step": 3520 + }, + { + "epoch": 0.014261646674773854, + "grad_norm": 1139.8887939453125, + "learning_rate": 5.648e-06, + "loss": 224.8221, + "step": 3530 + }, + { + "epoch": 0.014302047940141486, + "grad_norm": 4362.23974609375, + "learning_rate": 5.664e-06, + "loss": 213.6449, + "step": 3540 + }, + { + "epoch": 0.014342449205509117, + "grad_norm": 827.4354248046875, + "learning_rate": 5.68e-06, + "loss": 214.7403, + "step": 3550 + }, + { + "epoch": 0.014382850470876748, + "grad_norm": 661.9734497070312, + "learning_rate": 5.696e-06, + "loss": 291.5407, + "step": 3560 + }, + { + "epoch": 0.01442325173624438, + "grad_norm": 1605.50244140625, + "learning_rate": 5.7120000000000005e-06, + "loss": 238.7724, + "step": 3570 + }, + { + "epoch": 0.01446365300161201, + "grad_norm": 604.7938842773438, + "learning_rate": 5.728e-06, + "loss": 165.1484, + "step": 3580 + }, + { + "epoch": 0.014504054266979642, + "grad_norm": 1426.676513671875, + "learning_rate": 5.744e-06, + "loss": 235.9114, + "step": 3590 + }, + { + "epoch": 0.014544455532347273, + "grad_norm": 1244.5186767578125, + "learning_rate": 5.76e-06, + "loss": 257.5335, + "step": 3600 + }, + { + "epoch": 0.014584856797714905, + "grad_norm": 1244.91650390625, + "learning_rate": 5.776e-06, + "loss": 265.668, + "step": 3610 + }, + { + "epoch": 0.014625258063082536, + "grad_norm": 1108.5050048828125, + "learning_rate": 5.792000000000001e-06, + "loss": 242.1982, + "step": 3620 + }, + { + "epoch": 0.014665659328450167, + "grad_norm": 635.9376220703125, + "learning_rate": 5.808e-06, + "loss": 222.475, + "step": 3630 + }, + { + "epoch": 0.014706060593817799, + "grad_norm": 2577.629638671875, + "learning_rate": 5.8240000000000005e-06, + "loss": 268.2879, + "step": 3640 + }, + { + "epoch": 0.01474646185918543, + "grad_norm": 1114.5150146484375, + "learning_rate": 5.84e-06, + "loss": 287.5092, + "step": 3650 + }, + { + "epoch": 0.014786863124553061, + "grad_norm": 2371.987548828125, + "learning_rate": 5.856e-06, + "loss": 279.0615, + "step": 3660 + }, + { + "epoch": 0.014827264389920693, + "grad_norm": 1137.3331298828125, + "learning_rate": 5.872000000000001e-06, + "loss": 331.486, + "step": 3670 + }, + { + "epoch": 0.014867665655288324, + "grad_norm": 3668.4619140625, + "learning_rate": 5.888e-06, + "loss": 295.4681, + "step": 3680 + }, + { + "epoch": 0.014908066920655955, + "grad_norm": 1512.5174560546875, + "learning_rate": 5.9040000000000006e-06, + "loss": 436.8039, + "step": 3690 + }, + { + "epoch": 0.014948468186023587, + "grad_norm": 1788.1517333984375, + "learning_rate": 5.92e-06, + "loss": 360.8825, + "step": 3700 + }, + { + "epoch": 0.014988869451391218, + "grad_norm": 888.7390747070312, + "learning_rate": 5.9360000000000004e-06, + "loss": 252.663, + "step": 3710 + }, + { + "epoch": 0.01502927071675885, + "grad_norm": 1885.17822265625, + "learning_rate": 5.952e-06, + "loss": 255.9808, + "step": 3720 + }, + { + "epoch": 0.01506967198212648, + "grad_norm": 1226.7447509765625, + "learning_rate": 5.968e-06, + "loss": 315.3454, + "step": 3730 + }, + { + "epoch": 0.015110073247494112, + "grad_norm": 1327.62890625, + "learning_rate": 5.984000000000001e-06, + "loss": 271.0356, + "step": 3740 + }, + { + "epoch": 0.015150474512861743, + "grad_norm": 3071.997314453125, + "learning_rate": 6e-06, + "loss": 419.9461, + "step": 3750 + }, + { + "epoch": 0.015190875778229375, + "grad_norm": 1097.32421875, + "learning_rate": 6.0160000000000005e-06, + "loss": 266.7487, + "step": 3760 + }, + { + "epoch": 0.015231277043597006, + "grad_norm": 2065.014404296875, + "learning_rate": 6.032e-06, + "loss": 318.438, + "step": 3770 + }, + { + "epoch": 0.015271678308964637, + "grad_norm": 2205.13623046875, + "learning_rate": 6.048e-06, + "loss": 250.8604, + "step": 3780 + }, + { + "epoch": 0.015312079574332269, + "grad_norm": 1338.4024658203125, + "learning_rate": 6.064000000000001e-06, + "loss": 386.026, + "step": 3790 + }, + { + "epoch": 0.0153524808396999, + "grad_norm": 1298.8560791015625, + "learning_rate": 6.08e-06, + "loss": 186.9294, + "step": 3800 + }, + { + "epoch": 0.015392882105067531, + "grad_norm": 1236.49853515625, + "learning_rate": 6.096000000000001e-06, + "loss": 330.0608, + "step": 3810 + }, + { + "epoch": 0.015433283370435162, + "grad_norm": 744.3428955078125, + "learning_rate": 6.112e-06, + "loss": 232.1786, + "step": 3820 + }, + { + "epoch": 0.015473684635802794, + "grad_norm": 1531.652099609375, + "learning_rate": 6.1280000000000005e-06, + "loss": 345.5647, + "step": 3830 + }, + { + "epoch": 0.015514085901170425, + "grad_norm": 3720.118896484375, + "learning_rate": 6.144e-06, + "loss": 257.9943, + "step": 3840 + }, + { + "epoch": 0.015554487166538056, + "grad_norm": 956.6477661132812, + "learning_rate": 6.16e-06, + "loss": 219.898, + "step": 3850 + }, + { + "epoch": 0.015594888431905688, + "grad_norm": 1711.3345947265625, + "learning_rate": 6.176000000000001e-06, + "loss": 278.4205, + "step": 3860 + }, + { + "epoch": 0.01563528969727332, + "grad_norm": 1853.4608154296875, + "learning_rate": 6.192e-06, + "loss": 314.6755, + "step": 3870 + }, + { + "epoch": 0.01567569096264095, + "grad_norm": 568.2243041992188, + "learning_rate": 6.2080000000000005e-06, + "loss": 236.7114, + "step": 3880 + }, + { + "epoch": 0.01571609222800858, + "grad_norm": 929.5877685546875, + "learning_rate": 6.224e-06, + "loss": 276.6947, + "step": 3890 + }, + { + "epoch": 0.01575649349337621, + "grad_norm": 1717.252197265625, + "learning_rate": 6.24e-06, + "loss": 291.185, + "step": 3900 + }, + { + "epoch": 0.015796894758743844, + "grad_norm": 777.5992431640625, + "learning_rate": 6.256000000000001e-06, + "loss": 233.4107, + "step": 3910 + }, + { + "epoch": 0.015837296024111474, + "grad_norm": 1492.299072265625, + "learning_rate": 6.272e-06, + "loss": 295.5094, + "step": 3920 + }, + { + "epoch": 0.015877697289479107, + "grad_norm": 1894.0343017578125, + "learning_rate": 6.288000000000001e-06, + "loss": 248.6706, + "step": 3930 + }, + { + "epoch": 0.015918098554846737, + "grad_norm": 2101.454833984375, + "learning_rate": 6.304e-06, + "loss": 234.9749, + "step": 3940 + }, + { + "epoch": 0.01595849982021437, + "grad_norm": 1103.639892578125, + "learning_rate": 6.3200000000000005e-06, + "loss": 329.0602, + "step": 3950 + }, + { + "epoch": 0.015998901085582, + "grad_norm": 1020.775390625, + "learning_rate": 6.336000000000001e-06, + "loss": 282.0449, + "step": 3960 + }, + { + "epoch": 0.016039302350949632, + "grad_norm": 850.4788818359375, + "learning_rate": 6.352e-06, + "loss": 329.4406, + "step": 3970 + }, + { + "epoch": 0.016079703616317262, + "grad_norm": 914.36962890625, + "learning_rate": 6.368000000000001e-06, + "loss": 258.5006, + "step": 3980 + }, + { + "epoch": 0.016120104881684895, + "grad_norm": 1017.7191772460938, + "learning_rate": 6.384e-06, + "loss": 258.551, + "step": 3990 + }, + { + "epoch": 0.016160506147052525, + "grad_norm": 679.4682006835938, + "learning_rate": 6.4000000000000006e-06, + "loss": 296.0246, + "step": 4000 + }, + { + "epoch": 0.016200907412420158, + "grad_norm": 907.2091674804688, + "learning_rate": 6.416e-06, + "loss": 225.9439, + "step": 4010 + }, + { + "epoch": 0.016241308677787787, + "grad_norm": 2474.89794921875, + "learning_rate": 6.432e-06, + "loss": 297.3042, + "step": 4020 + }, + { + "epoch": 0.01628170994315542, + "grad_norm": 823.4977416992188, + "learning_rate": 6.448000000000001e-06, + "loss": 289.351, + "step": 4030 + }, + { + "epoch": 0.01632211120852305, + "grad_norm": 1022.7669067382812, + "learning_rate": 6.464e-06, + "loss": 351.5874, + "step": 4040 + }, + { + "epoch": 0.016362512473890683, + "grad_norm": 1488.401123046875, + "learning_rate": 6.480000000000001e-06, + "loss": 236.7259, + "step": 4050 + }, + { + "epoch": 0.016402913739258312, + "grad_norm": 1253.2579345703125, + "learning_rate": 6.496e-06, + "loss": 356.0159, + "step": 4060 + }, + { + "epoch": 0.016443315004625945, + "grad_norm": 1404.846923828125, + "learning_rate": 6.5120000000000005e-06, + "loss": 262.6719, + "step": 4070 + }, + { + "epoch": 0.016483716269993575, + "grad_norm": 1063.0316162109375, + "learning_rate": 6.528000000000001e-06, + "loss": 258.9979, + "step": 4080 + }, + { + "epoch": 0.016524117535361208, + "grad_norm": 840.8153686523438, + "learning_rate": 6.544e-06, + "loss": 362.6634, + "step": 4090 + }, + { + "epoch": 0.016564518800728838, + "grad_norm": 998.3060913085938, + "learning_rate": 6.560000000000001e-06, + "loss": 275.599, + "step": 4100 + }, + { + "epoch": 0.01660492006609647, + "grad_norm": 808.7373046875, + "learning_rate": 6.576e-06, + "loss": 218.3437, + "step": 4110 + }, + { + "epoch": 0.0166453213314641, + "grad_norm": 824.2646484375, + "learning_rate": 6.592000000000001e-06, + "loss": 344.7016, + "step": 4120 + }, + { + "epoch": 0.016685722596831733, + "grad_norm": 937.9525756835938, + "learning_rate": 6.608000000000001e-06, + "loss": 229.0095, + "step": 4130 + }, + { + "epoch": 0.016726123862199363, + "grad_norm": 1241.649169921875, + "learning_rate": 6.6240000000000004e-06, + "loss": 284.2322, + "step": 4140 + }, + { + "epoch": 0.016766525127566996, + "grad_norm": 1843.9686279296875, + "learning_rate": 6.640000000000001e-06, + "loss": 337.7978, + "step": 4150 + }, + { + "epoch": 0.016806926392934626, + "grad_norm": 1029.9886474609375, + "learning_rate": 6.656e-06, + "loss": 297.7729, + "step": 4160 + }, + { + "epoch": 0.01684732765830226, + "grad_norm": 1820.8560791015625, + "learning_rate": 6.672000000000001e-06, + "loss": 366.4, + "step": 4170 + }, + { + "epoch": 0.016887728923669888, + "grad_norm": 3317.30126953125, + "learning_rate": 6.688e-06, + "loss": 258.9138, + "step": 4180 + }, + { + "epoch": 0.01692813018903752, + "grad_norm": 806.3054809570312, + "learning_rate": 6.7040000000000005e-06, + "loss": 259.3327, + "step": 4190 + }, + { + "epoch": 0.01696853145440515, + "grad_norm": 1702.472412109375, + "learning_rate": 6.720000000000001e-06, + "loss": 245.2337, + "step": 4200 + }, + { + "epoch": 0.017008932719772784, + "grad_norm": 982.8438110351562, + "learning_rate": 6.736e-06, + "loss": 286.6888, + "step": 4210 + }, + { + "epoch": 0.017049333985140414, + "grad_norm": 586.34521484375, + "learning_rate": 6.752000000000001e-06, + "loss": 196.7898, + "step": 4220 + }, + { + "epoch": 0.017089735250508047, + "grad_norm": 1356.07568359375, + "learning_rate": 6.768e-06, + "loss": 308.8894, + "step": 4230 + }, + { + "epoch": 0.017130136515875676, + "grad_norm": 865.9857177734375, + "learning_rate": 6.784000000000001e-06, + "loss": 330.3007, + "step": 4240 + }, + { + "epoch": 0.01717053778124331, + "grad_norm": 1744.1884765625, + "learning_rate": 6.800000000000001e-06, + "loss": 396.9557, + "step": 4250 + }, + { + "epoch": 0.01721093904661094, + "grad_norm": 1406.06005859375, + "learning_rate": 6.8160000000000005e-06, + "loss": 285.9503, + "step": 4260 + }, + { + "epoch": 0.017251340311978572, + "grad_norm": 1030.517333984375, + "learning_rate": 6.832000000000001e-06, + "loss": 264.8144, + "step": 4270 + }, + { + "epoch": 0.0172917415773462, + "grad_norm": 1353.4794921875, + "learning_rate": 6.848e-06, + "loss": 349.0808, + "step": 4280 + }, + { + "epoch": 0.017332142842713835, + "grad_norm": 683.7527465820312, + "learning_rate": 6.864000000000001e-06, + "loss": 147.0283, + "step": 4290 + }, + { + "epoch": 0.017372544108081464, + "grad_norm": 1547.75146484375, + "learning_rate": 6.88e-06, + "loss": 252.7679, + "step": 4300 + }, + { + "epoch": 0.017412945373449097, + "grad_norm": 1080.0166015625, + "learning_rate": 6.8960000000000006e-06, + "loss": 353.9565, + "step": 4310 + }, + { + "epoch": 0.017453346638816727, + "grad_norm": 1531.3402099609375, + "learning_rate": 6.912000000000001e-06, + "loss": 268.7003, + "step": 4320 + }, + { + "epoch": 0.01749374790418436, + "grad_norm": 1259.77197265625, + "learning_rate": 6.928e-06, + "loss": 183.2306, + "step": 4330 + }, + { + "epoch": 0.01753414916955199, + "grad_norm": 2066.659912109375, + "learning_rate": 6.944000000000001e-06, + "loss": 267.276, + "step": 4340 + }, + { + "epoch": 0.017574550434919622, + "grad_norm": 1241.9390869140625, + "learning_rate": 6.96e-06, + "loss": 227.1713, + "step": 4350 + }, + { + "epoch": 0.017614951700287252, + "grad_norm": 1441.7127685546875, + "learning_rate": 6.976000000000001e-06, + "loss": 211.2588, + "step": 4360 + }, + { + "epoch": 0.017655352965654885, + "grad_norm": 1483.36865234375, + "learning_rate": 6.992000000000001e-06, + "loss": 261.9574, + "step": 4370 + }, + { + "epoch": 0.017695754231022515, + "grad_norm": 762.450927734375, + "learning_rate": 7.0080000000000005e-06, + "loss": 287.2083, + "step": 4380 + }, + { + "epoch": 0.017736155496390148, + "grad_norm": 1006.8515014648438, + "learning_rate": 7.024000000000001e-06, + "loss": 215.8849, + "step": 4390 + }, + { + "epoch": 0.017776556761757777, + "grad_norm": 986.262451171875, + "learning_rate": 7.04e-06, + "loss": 328.1171, + "step": 4400 + }, + { + "epoch": 0.01781695802712541, + "grad_norm": 751.958251953125, + "learning_rate": 7.056000000000001e-06, + "loss": 229.9674, + "step": 4410 + }, + { + "epoch": 0.01785735929249304, + "grad_norm": 1216.1976318359375, + "learning_rate": 7.072000000000001e-06, + "loss": 223.3181, + "step": 4420 + }, + { + "epoch": 0.017897760557860673, + "grad_norm": 1833.3480224609375, + "learning_rate": 7.088000000000001e-06, + "loss": 245.1551, + "step": 4430 + }, + { + "epoch": 0.017938161823228303, + "grad_norm": 1043.4547119140625, + "learning_rate": 7.104000000000001e-06, + "loss": 210.766, + "step": 4440 + }, + { + "epoch": 0.017978563088595936, + "grad_norm": 1301.4765625, + "learning_rate": 7.1200000000000004e-06, + "loss": 223.3755, + "step": 4450 + }, + { + "epoch": 0.018018964353963565, + "grad_norm": 977.0892944335938, + "learning_rate": 7.136000000000001e-06, + "loss": 241.8964, + "step": 4460 + }, + { + "epoch": 0.0180593656193312, + "grad_norm": 1503.373291015625, + "learning_rate": 7.152e-06, + "loss": 234.4034, + "step": 4470 + }, + { + "epoch": 0.018099766884698828, + "grad_norm": 1311.2618408203125, + "learning_rate": 7.168000000000001e-06, + "loss": 256.5997, + "step": 4480 + }, + { + "epoch": 0.01814016815006646, + "grad_norm": 1278.5242919921875, + "learning_rate": 7.184000000000001e-06, + "loss": 219.7673, + "step": 4490 + }, + { + "epoch": 0.01818056941543409, + "grad_norm": 972.2399291992188, + "learning_rate": 7.2000000000000005e-06, + "loss": 255.0958, + "step": 4500 + }, + { + "epoch": 0.018220970680801724, + "grad_norm": 1770.33544921875, + "learning_rate": 7.216000000000001e-06, + "loss": 244.025, + "step": 4510 + }, + { + "epoch": 0.018261371946169353, + "grad_norm": 805.673095703125, + "learning_rate": 7.232e-06, + "loss": 214.5408, + "step": 4520 + }, + { + "epoch": 0.018301773211536986, + "grad_norm": 1197.509521484375, + "learning_rate": 7.248000000000001e-06, + "loss": 246.9174, + "step": 4530 + }, + { + "epoch": 0.018342174476904616, + "grad_norm": 806.6310424804688, + "learning_rate": 7.264000000000001e-06, + "loss": 206.2906, + "step": 4540 + }, + { + "epoch": 0.01838257574227225, + "grad_norm": 1090.4598388671875, + "learning_rate": 7.280000000000001e-06, + "loss": 342.4046, + "step": 4550 + }, + { + "epoch": 0.01842297700763988, + "grad_norm": 1729.3848876953125, + "learning_rate": 7.296000000000001e-06, + "loss": 343.3959, + "step": 4560 + }, + { + "epoch": 0.01846337827300751, + "grad_norm": 1381.7520751953125, + "learning_rate": 7.3120000000000005e-06, + "loss": 275.0602, + "step": 4570 + }, + { + "epoch": 0.01850377953837514, + "grad_norm": 1330.241943359375, + "learning_rate": 7.328000000000001e-06, + "loss": 294.121, + "step": 4580 + }, + { + "epoch": 0.018544180803742774, + "grad_norm": 598.6343383789062, + "learning_rate": 7.344000000000001e-06, + "loss": 337.0769, + "step": 4590 + }, + { + "epoch": 0.018584582069110404, + "grad_norm": 2955.0537109375, + "learning_rate": 7.360000000000001e-06, + "loss": 328.8341, + "step": 4600 + }, + { + "epoch": 0.018624983334478037, + "grad_norm": 1042.451416015625, + "learning_rate": 7.376000000000001e-06, + "loss": 219.7405, + "step": 4610 + }, + { + "epoch": 0.018665384599845666, + "grad_norm": 645.2960815429688, + "learning_rate": 7.3920000000000005e-06, + "loss": 192.2549, + "step": 4620 + }, + { + "epoch": 0.0187057858652133, + "grad_norm": 915.4968872070312, + "learning_rate": 7.408000000000001e-06, + "loss": 201.1788, + "step": 4630 + }, + { + "epoch": 0.01874618713058093, + "grad_norm": 1424.8746337890625, + "learning_rate": 7.424e-06, + "loss": 195.0819, + "step": 4640 + }, + { + "epoch": 0.018786588395948562, + "grad_norm": 2144.802001953125, + "learning_rate": 7.440000000000001e-06, + "loss": 288.2222, + "step": 4650 + }, + { + "epoch": 0.01882698966131619, + "grad_norm": 2112.1357421875, + "learning_rate": 7.456000000000001e-06, + "loss": 309.6281, + "step": 4660 + }, + { + "epoch": 0.018867390926683825, + "grad_norm": 755.1412963867188, + "learning_rate": 7.472000000000001e-06, + "loss": 296.6433, + "step": 4670 + }, + { + "epoch": 0.018907792192051454, + "grad_norm": 1284.7757568359375, + "learning_rate": 7.488000000000001e-06, + "loss": 220.2537, + "step": 4680 + }, + { + "epoch": 0.018948193457419087, + "grad_norm": 1434.192138671875, + "learning_rate": 7.5040000000000005e-06, + "loss": 218.4309, + "step": 4690 + }, + { + "epoch": 0.018988594722786717, + "grad_norm": 1657.323486328125, + "learning_rate": 7.520000000000001e-06, + "loss": 250.5378, + "step": 4700 + }, + { + "epoch": 0.01902899598815435, + "grad_norm": 1677.9984130859375, + "learning_rate": 7.536000000000001e-06, + "loss": 263.5347, + "step": 4710 + }, + { + "epoch": 0.01906939725352198, + "grad_norm": 25112.99609375, + "learning_rate": 7.552000000000001e-06, + "loss": 288.2444, + "step": 4720 + }, + { + "epoch": 0.019109798518889613, + "grad_norm": 2217.6513671875, + "learning_rate": 7.568000000000001e-06, + "loss": 204.6235, + "step": 4730 + }, + { + "epoch": 0.019150199784257242, + "grad_norm": 2659.453369140625, + "learning_rate": 7.5840000000000006e-06, + "loss": 274.2158, + "step": 4740 + }, + { + "epoch": 0.019190601049624875, + "grad_norm": 1000.3704833984375, + "learning_rate": 7.600000000000001e-06, + "loss": 283.2697, + "step": 4750 + }, + { + "epoch": 0.019231002314992505, + "grad_norm": 762.8880004882812, + "learning_rate": 7.616000000000001e-06, + "loss": 276.9803, + "step": 4760 + }, + { + "epoch": 0.019271403580360138, + "grad_norm": 873.4342041015625, + "learning_rate": 7.632e-06, + "loss": 265.9914, + "step": 4770 + }, + { + "epoch": 0.019311804845727767, + "grad_norm": 1086.3331298828125, + "learning_rate": 7.648e-06, + "loss": 249.9756, + "step": 4780 + }, + { + "epoch": 0.0193522061110954, + "grad_norm": 667.1944580078125, + "learning_rate": 7.664e-06, + "loss": 175.7695, + "step": 4790 + }, + { + "epoch": 0.01939260737646303, + "grad_norm": 1193.744140625, + "learning_rate": 7.680000000000001e-06, + "loss": 262.291, + "step": 4800 + }, + { + "epoch": 0.019433008641830663, + "grad_norm": 978.5697021484375, + "learning_rate": 7.696e-06, + "loss": 248.4924, + "step": 4810 + }, + { + "epoch": 0.019473409907198293, + "grad_norm": 1675.151611328125, + "learning_rate": 7.712e-06, + "loss": 280.6598, + "step": 4820 + }, + { + "epoch": 0.019513811172565926, + "grad_norm": 1968.623046875, + "learning_rate": 7.728000000000001e-06, + "loss": 232.1146, + "step": 4830 + }, + { + "epoch": 0.019554212437933555, + "grad_norm": 1410.59619140625, + "learning_rate": 7.744e-06, + "loss": 236.8878, + "step": 4840 + }, + { + "epoch": 0.01959461370330119, + "grad_norm": 1673.7821044921875, + "learning_rate": 7.76e-06, + "loss": 313.7957, + "step": 4850 + }, + { + "epoch": 0.019635014968668818, + "grad_norm": 806.0775756835938, + "learning_rate": 7.776e-06, + "loss": 172.4256, + "step": 4860 + }, + { + "epoch": 0.01967541623403645, + "grad_norm": 2052.37744140625, + "learning_rate": 7.792000000000001e-06, + "loss": 304.2375, + "step": 4870 + }, + { + "epoch": 0.01971581749940408, + "grad_norm": 1913.3585205078125, + "learning_rate": 7.808e-06, + "loss": 209.8889, + "step": 4880 + }, + { + "epoch": 0.019756218764771714, + "grad_norm": 882.1005249023438, + "learning_rate": 7.824e-06, + "loss": 193.8687, + "step": 4890 + }, + { + "epoch": 0.019796620030139343, + "grad_norm": 1239.2547607421875, + "learning_rate": 7.840000000000001e-06, + "loss": 321.5689, + "step": 4900 + }, + { + "epoch": 0.019837021295506976, + "grad_norm": 1661.3309326171875, + "learning_rate": 7.856e-06, + "loss": 300.9056, + "step": 4910 + }, + { + "epoch": 0.019877422560874606, + "grad_norm": 831.425048828125, + "learning_rate": 7.872e-06, + "loss": 166.0655, + "step": 4920 + }, + { + "epoch": 0.01991782382624224, + "grad_norm": 1435.3153076171875, + "learning_rate": 7.888e-06, + "loss": 283.6547, + "step": 4930 + }, + { + "epoch": 0.01995822509160987, + "grad_norm": 1205.42041015625, + "learning_rate": 7.904000000000001e-06, + "loss": 282.5413, + "step": 4940 + }, + { + "epoch": 0.0199986263569775, + "grad_norm": 1201.89306640625, + "learning_rate": 7.92e-06, + "loss": 234.5886, + "step": 4950 + }, + { + "epoch": 0.02003902762234513, + "grad_norm": 1298.3406982421875, + "learning_rate": 7.936e-06, + "loss": 282.8375, + "step": 4960 + }, + { + "epoch": 0.020079428887712764, + "grad_norm": 9463.80078125, + "learning_rate": 7.952000000000001e-06, + "loss": 397.8148, + "step": 4970 + }, + { + "epoch": 0.020119830153080394, + "grad_norm": 1427.537353515625, + "learning_rate": 7.968e-06, + "loss": 244.7616, + "step": 4980 + }, + { + "epoch": 0.020160231418448027, + "grad_norm": 698.2767333984375, + "learning_rate": 7.984e-06, + "loss": 259.3716, + "step": 4990 + }, + { + "epoch": 0.020200632683815656, + "grad_norm": 1627.408203125, + "learning_rate": 8.000000000000001e-06, + "loss": 318.939, + "step": 5000 + }, + { + "epoch": 0.02024103394918329, + "grad_norm": 1125.5098876953125, + "learning_rate": 8.016e-06, + "loss": 241.9507, + "step": 5010 + }, + { + "epoch": 0.02028143521455092, + "grad_norm": 1155.5245361328125, + "learning_rate": 8.032e-06, + "loss": 160.3528, + "step": 5020 + }, + { + "epoch": 0.020321836479918552, + "grad_norm": 1363.8577880859375, + "learning_rate": 8.048e-06, + "loss": 275.5932, + "step": 5030 + }, + { + "epoch": 0.020362237745286182, + "grad_norm": 1199.4857177734375, + "learning_rate": 8.064000000000001e-06, + "loss": 262.2943, + "step": 5040 + }, + { + "epoch": 0.020402639010653815, + "grad_norm": 1065.540283203125, + "learning_rate": 8.08e-06, + "loss": 197.4638, + "step": 5050 + }, + { + "epoch": 0.020443040276021444, + "grad_norm": 1872.7154541015625, + "learning_rate": 8.096e-06, + "loss": 352.5233, + "step": 5060 + }, + { + "epoch": 0.020483441541389077, + "grad_norm": 1295.009033203125, + "learning_rate": 8.112000000000001e-06, + "loss": 226.5581, + "step": 5070 + }, + { + "epoch": 0.020523842806756707, + "grad_norm": 3203.97119140625, + "learning_rate": 8.128e-06, + "loss": 246.8156, + "step": 5080 + }, + { + "epoch": 0.02056424407212434, + "grad_norm": 1652.342529296875, + "learning_rate": 8.144e-06, + "loss": 309.0181, + "step": 5090 + }, + { + "epoch": 0.02060464533749197, + "grad_norm": 769.5767211914062, + "learning_rate": 8.16e-06, + "loss": 227.3183, + "step": 5100 + }, + { + "epoch": 0.020645046602859603, + "grad_norm": 1102.2655029296875, + "learning_rate": 8.176000000000001e-06, + "loss": 275.9026, + "step": 5110 + }, + { + "epoch": 0.020685447868227232, + "grad_norm": 1106.6676025390625, + "learning_rate": 8.192e-06, + "loss": 209.6418, + "step": 5120 + }, + { + "epoch": 0.020725849133594865, + "grad_norm": 4100.7841796875, + "learning_rate": 8.208e-06, + "loss": 206.917, + "step": 5130 + }, + { + "epoch": 0.020766250398962495, + "grad_norm": 784.90576171875, + "learning_rate": 8.224000000000001e-06, + "loss": 194.9155, + "step": 5140 + }, + { + "epoch": 0.020806651664330128, + "grad_norm": 964.98583984375, + "learning_rate": 8.24e-06, + "loss": 251.4826, + "step": 5150 + }, + { + "epoch": 0.020847052929697758, + "grad_norm": 956.3367309570312, + "learning_rate": 8.256e-06, + "loss": 237.7469, + "step": 5160 + }, + { + "epoch": 0.02088745419506539, + "grad_norm": 1011.2269897460938, + "learning_rate": 8.272000000000001e-06, + "loss": 256.7773, + "step": 5170 + }, + { + "epoch": 0.02092785546043302, + "grad_norm": 1508.70751953125, + "learning_rate": 8.288000000000001e-06, + "loss": 223.0477, + "step": 5180 + }, + { + "epoch": 0.020968256725800653, + "grad_norm": 1241.721435546875, + "learning_rate": 8.304e-06, + "loss": 267.6942, + "step": 5190 + }, + { + "epoch": 0.021008657991168283, + "grad_norm": 911.1593627929688, + "learning_rate": 8.32e-06, + "loss": 258.1604, + "step": 5200 + }, + { + "epoch": 0.021049059256535916, + "grad_norm": 1168.1171875, + "learning_rate": 8.336000000000001e-06, + "loss": 237.9577, + "step": 5210 + }, + { + "epoch": 0.021089460521903546, + "grad_norm": 1035.7568359375, + "learning_rate": 8.352e-06, + "loss": 244.2736, + "step": 5220 + }, + { + "epoch": 0.02112986178727118, + "grad_norm": 1639.7432861328125, + "learning_rate": 8.368e-06, + "loss": 270.4092, + "step": 5230 + }, + { + "epoch": 0.021170263052638808, + "grad_norm": 1320.6915283203125, + "learning_rate": 8.384000000000001e-06, + "loss": 328.2253, + "step": 5240 + }, + { + "epoch": 0.02121066431800644, + "grad_norm": 2665.943115234375, + "learning_rate": 8.400000000000001e-06, + "loss": 272.4385, + "step": 5250 + }, + { + "epoch": 0.02125106558337407, + "grad_norm": 1784.027099609375, + "learning_rate": 8.416e-06, + "loss": 303.7002, + "step": 5260 + }, + { + "epoch": 0.021291466848741704, + "grad_norm": 2815.6337890625, + "learning_rate": 8.432e-06, + "loss": 229.2094, + "step": 5270 + }, + { + "epoch": 0.021331868114109333, + "grad_norm": 1191.8333740234375, + "learning_rate": 8.448000000000001e-06, + "loss": 272.1726, + "step": 5280 + }, + { + "epoch": 0.021372269379476967, + "grad_norm": 1391.3675537109375, + "learning_rate": 8.464e-06, + "loss": 261.5656, + "step": 5290 + }, + { + "epoch": 0.021412670644844596, + "grad_norm": 1128.831787109375, + "learning_rate": 8.48e-06, + "loss": 289.6503, + "step": 5300 + }, + { + "epoch": 0.02145307191021223, + "grad_norm": 2228.027099609375, + "learning_rate": 8.496000000000001e-06, + "loss": 301.1018, + "step": 5310 + }, + { + "epoch": 0.02149347317557986, + "grad_norm": 1066.3154296875, + "learning_rate": 8.512e-06, + "loss": 220.922, + "step": 5320 + }, + { + "epoch": 0.021533874440947492, + "grad_norm": 889.0392456054688, + "learning_rate": 8.528e-06, + "loss": 251.4464, + "step": 5330 + }, + { + "epoch": 0.02157427570631512, + "grad_norm": 1437.4632568359375, + "learning_rate": 8.544000000000002e-06, + "loss": 213.1629, + "step": 5340 + }, + { + "epoch": 0.021614676971682754, + "grad_norm": 816.4463500976562, + "learning_rate": 8.560000000000001e-06, + "loss": 243.3332, + "step": 5350 + }, + { + "epoch": 0.021655078237050384, + "grad_norm": 793.9855346679688, + "learning_rate": 8.576e-06, + "loss": 315.989, + "step": 5360 + }, + { + "epoch": 0.021695479502418017, + "grad_norm": 713.7058715820312, + "learning_rate": 8.592e-06, + "loss": 248.082, + "step": 5370 + }, + { + "epoch": 0.021735880767785647, + "grad_norm": 1480.3760986328125, + "learning_rate": 8.608000000000001e-06, + "loss": 302.3042, + "step": 5380 + }, + { + "epoch": 0.02177628203315328, + "grad_norm": 1809.5299072265625, + "learning_rate": 8.624e-06, + "loss": 266.8355, + "step": 5390 + }, + { + "epoch": 0.02181668329852091, + "grad_norm": 2045.40478515625, + "learning_rate": 8.64e-06, + "loss": 334.1392, + "step": 5400 + }, + { + "epoch": 0.021857084563888542, + "grad_norm": 921.8992309570312, + "learning_rate": 8.656000000000001e-06, + "loss": 274.925, + "step": 5410 + }, + { + "epoch": 0.021897485829256172, + "grad_norm": 3326.086669921875, + "learning_rate": 8.672000000000001e-06, + "loss": 278.724, + "step": 5420 + }, + { + "epoch": 0.021937887094623805, + "grad_norm": 4764.14453125, + "learning_rate": 8.688e-06, + "loss": 283.5431, + "step": 5430 + }, + { + "epoch": 0.021978288359991435, + "grad_norm": 1152.634765625, + "learning_rate": 8.704e-06, + "loss": 314.8487, + "step": 5440 + }, + { + "epoch": 0.022018689625359068, + "grad_norm": 6746.86328125, + "learning_rate": 8.720000000000001e-06, + "loss": 425.5981, + "step": 5450 + }, + { + "epoch": 0.022059090890726697, + "grad_norm": 1701.89013671875, + "learning_rate": 8.736e-06, + "loss": 315.6257, + "step": 5460 + }, + { + "epoch": 0.02209949215609433, + "grad_norm": 1392.5045166015625, + "learning_rate": 8.752e-06, + "loss": 236.1939, + "step": 5470 + }, + { + "epoch": 0.02213989342146196, + "grad_norm": 2498.1298828125, + "learning_rate": 8.768000000000001e-06, + "loss": 266.4598, + "step": 5480 + }, + { + "epoch": 0.022180294686829593, + "grad_norm": 1272.607666015625, + "learning_rate": 8.784000000000001e-06, + "loss": 253.4331, + "step": 5490 + }, + { + "epoch": 0.022220695952197222, + "grad_norm": 2149.65478515625, + "learning_rate": 8.8e-06, + "loss": 277.6037, + "step": 5500 + }, + { + "epoch": 0.022261097217564856, + "grad_norm": 2761.5126953125, + "learning_rate": 8.816000000000002e-06, + "loss": 212.0202, + "step": 5510 + }, + { + "epoch": 0.022301498482932485, + "grad_norm": 820.70654296875, + "learning_rate": 8.832000000000001e-06, + "loss": 228.5635, + "step": 5520 + }, + { + "epoch": 0.022341899748300118, + "grad_norm": 3033.90283203125, + "learning_rate": 8.848e-06, + "loss": 209.9557, + "step": 5530 + }, + { + "epoch": 0.022382301013667748, + "grad_norm": 3659.462646484375, + "learning_rate": 8.864e-06, + "loss": 268.2353, + "step": 5540 + }, + { + "epoch": 0.02242270227903538, + "grad_norm": 826.8778076171875, + "learning_rate": 8.880000000000001e-06, + "loss": 265.3575, + "step": 5550 + }, + { + "epoch": 0.02246310354440301, + "grad_norm": 957.8529052734375, + "learning_rate": 8.896000000000001e-06, + "loss": 214.6107, + "step": 5560 + }, + { + "epoch": 0.022503504809770643, + "grad_norm": 804.581787109375, + "learning_rate": 8.912e-06, + "loss": 130.2942, + "step": 5570 + }, + { + "epoch": 0.022543906075138273, + "grad_norm": 5159.30078125, + "learning_rate": 8.928000000000002e-06, + "loss": 236.5193, + "step": 5580 + }, + { + "epoch": 0.022584307340505906, + "grad_norm": 543.3648681640625, + "learning_rate": 8.944000000000001e-06, + "loss": 265.9012, + "step": 5590 + }, + { + "epoch": 0.022624708605873536, + "grad_norm": 2203.660400390625, + "learning_rate": 8.96e-06, + "loss": 207.047, + "step": 5600 + }, + { + "epoch": 0.02266510987124117, + "grad_norm": 4559.3134765625, + "learning_rate": 8.976e-06, + "loss": 235.675, + "step": 5610 + }, + { + "epoch": 0.0227055111366088, + "grad_norm": 2261.79931640625, + "learning_rate": 8.992000000000001e-06, + "loss": 208.5362, + "step": 5620 + }, + { + "epoch": 0.02274591240197643, + "grad_norm": 1354.80224609375, + "learning_rate": 9.008e-06, + "loss": 318.6765, + "step": 5630 + }, + { + "epoch": 0.02278631366734406, + "grad_norm": 993.9400024414062, + "learning_rate": 9.024e-06, + "loss": 273.4192, + "step": 5640 + }, + { + "epoch": 0.022826714932711694, + "grad_norm": 1987.65625, + "learning_rate": 9.040000000000002e-06, + "loss": 247.7512, + "step": 5650 + }, + { + "epoch": 0.022867116198079324, + "grad_norm": 1504.900634765625, + "learning_rate": 9.056000000000001e-06, + "loss": 283.7123, + "step": 5660 + }, + { + "epoch": 0.022907517463446957, + "grad_norm": 1340.3714599609375, + "learning_rate": 9.072e-06, + "loss": 210.0358, + "step": 5670 + }, + { + "epoch": 0.022947918728814586, + "grad_norm": 650.5925903320312, + "learning_rate": 9.088000000000002e-06, + "loss": 197.4347, + "step": 5680 + }, + { + "epoch": 0.02298831999418222, + "grad_norm": 3964.1806640625, + "learning_rate": 9.104000000000001e-06, + "loss": 381.4996, + "step": 5690 + }, + { + "epoch": 0.02302872125954985, + "grad_norm": 1216.4332275390625, + "learning_rate": 9.12e-06, + "loss": 220.3301, + "step": 5700 + }, + { + "epoch": 0.023069122524917482, + "grad_norm": 1275.948486328125, + "learning_rate": 9.136e-06, + "loss": 233.7147, + "step": 5710 + }, + { + "epoch": 0.02310952379028511, + "grad_norm": 679.8235473632812, + "learning_rate": 9.152000000000001e-06, + "loss": 201.6441, + "step": 5720 + }, + { + "epoch": 0.023149925055652745, + "grad_norm": 894.986083984375, + "learning_rate": 9.168000000000001e-06, + "loss": 175.9701, + "step": 5730 + }, + { + "epoch": 0.023190326321020374, + "grad_norm": 970.5570068359375, + "learning_rate": 9.184e-06, + "loss": 213.3975, + "step": 5740 + }, + { + "epoch": 0.023230727586388007, + "grad_norm": 1242.9453125, + "learning_rate": 9.200000000000002e-06, + "loss": 244.8543, + "step": 5750 + }, + { + "epoch": 0.023271128851755637, + "grad_norm": 2435.6201171875, + "learning_rate": 9.216000000000001e-06, + "loss": 343.3505, + "step": 5760 + }, + { + "epoch": 0.02331153011712327, + "grad_norm": 1273.9871826171875, + "learning_rate": 9.232e-06, + "loss": 213.0056, + "step": 5770 + }, + { + "epoch": 0.0233519313824909, + "grad_norm": 1067.8134765625, + "learning_rate": 9.248e-06, + "loss": 193.8475, + "step": 5780 + }, + { + "epoch": 0.023392332647858533, + "grad_norm": 922.3757934570312, + "learning_rate": 9.264000000000001e-06, + "loss": 287.2585, + "step": 5790 + }, + { + "epoch": 0.023432733913226162, + "grad_norm": 993.0972290039062, + "learning_rate": 9.280000000000001e-06, + "loss": 243.0133, + "step": 5800 + }, + { + "epoch": 0.023473135178593795, + "grad_norm": 1248.3121337890625, + "learning_rate": 9.296e-06, + "loss": 191.7712, + "step": 5810 + }, + { + "epoch": 0.023513536443961425, + "grad_norm": 1064.8681640625, + "learning_rate": 9.312000000000002e-06, + "loss": 231.3546, + "step": 5820 + }, + { + "epoch": 0.023553937709329058, + "grad_norm": 841.469482421875, + "learning_rate": 9.328000000000001e-06, + "loss": 185.6207, + "step": 5830 + }, + { + "epoch": 0.023594338974696687, + "grad_norm": 1221.2052001953125, + "learning_rate": 9.344e-06, + "loss": 308.7472, + "step": 5840 + }, + { + "epoch": 0.02363474024006432, + "grad_norm": 2944.33056640625, + "learning_rate": 9.360000000000002e-06, + "loss": 403.0928, + "step": 5850 + }, + { + "epoch": 0.02367514150543195, + "grad_norm": 1367.17822265625, + "learning_rate": 9.376000000000001e-06, + "loss": 289.421, + "step": 5860 + }, + { + "epoch": 0.023715542770799583, + "grad_norm": 1525.758056640625, + "learning_rate": 9.392000000000001e-06, + "loss": 235.9515, + "step": 5870 + }, + { + "epoch": 0.023755944036167213, + "grad_norm": 750.6124877929688, + "learning_rate": 9.408e-06, + "loss": 213.2687, + "step": 5880 + }, + { + "epoch": 0.023796345301534846, + "grad_norm": 1718.3712158203125, + "learning_rate": 9.424000000000002e-06, + "loss": 218.4471, + "step": 5890 + }, + { + "epoch": 0.023836746566902475, + "grad_norm": 3491.82421875, + "learning_rate": 9.440000000000001e-06, + "loss": 340.9939, + "step": 5900 + }, + { + "epoch": 0.02387714783227011, + "grad_norm": 1066.6854248046875, + "learning_rate": 9.456e-06, + "loss": 258.0448, + "step": 5910 + }, + { + "epoch": 0.023917549097637738, + "grad_norm": 1190.568603515625, + "learning_rate": 9.472000000000002e-06, + "loss": 157.3825, + "step": 5920 + }, + { + "epoch": 0.02395795036300537, + "grad_norm": 2821.635009765625, + "learning_rate": 9.488000000000001e-06, + "loss": 219.8805, + "step": 5930 + }, + { + "epoch": 0.023998351628373, + "grad_norm": 1169.2569580078125, + "learning_rate": 9.504e-06, + "loss": 190.6602, + "step": 5940 + }, + { + "epoch": 0.024038752893740634, + "grad_norm": 1181.6962890625, + "learning_rate": 9.52e-06, + "loss": 231.9012, + "step": 5950 + }, + { + "epoch": 0.024079154159108263, + "grad_norm": 1127.2459716796875, + "learning_rate": 9.536000000000002e-06, + "loss": 209.0328, + "step": 5960 + }, + { + "epoch": 0.024119555424475896, + "grad_norm": 1260.97607421875, + "learning_rate": 9.552000000000001e-06, + "loss": 228.826, + "step": 5970 + }, + { + "epoch": 0.024159956689843526, + "grad_norm": 1819.3280029296875, + "learning_rate": 9.568e-06, + "loss": 252.8534, + "step": 5980 + }, + { + "epoch": 0.02420035795521116, + "grad_norm": 1801.2034912109375, + "learning_rate": 9.584000000000002e-06, + "loss": 125.3112, + "step": 5990 + }, + { + "epoch": 0.02424075922057879, + "grad_norm": 1087.2525634765625, + "learning_rate": 9.600000000000001e-06, + "loss": 226.0337, + "step": 6000 + }, + { + "epoch": 0.02428116048594642, + "grad_norm": 1611.1527099609375, + "learning_rate": 9.616e-06, + "loss": 181.9912, + "step": 6010 + }, + { + "epoch": 0.02432156175131405, + "grad_norm": 1818.4970703125, + "learning_rate": 9.632e-06, + "loss": 197.5991, + "step": 6020 + }, + { + "epoch": 0.024361963016681684, + "grad_norm": 4453.69775390625, + "learning_rate": 9.648000000000001e-06, + "loss": 261.9139, + "step": 6030 + }, + { + "epoch": 0.024402364282049314, + "grad_norm": 1599.2523193359375, + "learning_rate": 9.664000000000001e-06, + "loss": 173.9645, + "step": 6040 + }, + { + "epoch": 0.024442765547416947, + "grad_norm": 928.9969482421875, + "learning_rate": 9.68e-06, + "loss": 236.7229, + "step": 6050 + }, + { + "epoch": 0.024483166812784576, + "grad_norm": 741.7071533203125, + "learning_rate": 9.696000000000002e-06, + "loss": 237.3337, + "step": 6060 + }, + { + "epoch": 0.02452356807815221, + "grad_norm": 1199.0321044921875, + "learning_rate": 9.712e-06, + "loss": 243.3929, + "step": 6070 + }, + { + "epoch": 0.02456396934351984, + "grad_norm": 3222.19921875, + "learning_rate": 9.728e-06, + "loss": 245.7618, + "step": 6080 + }, + { + "epoch": 0.02460437060888747, + "grad_norm": 1373.3070068359375, + "learning_rate": 9.744000000000002e-06, + "loss": 247.9384, + "step": 6090 + }, + { + "epoch": 0.0246447718742551, + "grad_norm": 1312.643310546875, + "learning_rate": 9.760000000000001e-06, + "loss": 279.8946, + "step": 6100 + }, + { + "epoch": 0.02468517313962273, + "grad_norm": 1059.925537109375, + "learning_rate": 9.776000000000001e-06, + "loss": 239.1137, + "step": 6110 + }, + { + "epoch": 0.024725574404990364, + "grad_norm": 1259.3118896484375, + "learning_rate": 9.792e-06, + "loss": 374.1089, + "step": 6120 + }, + { + "epoch": 0.024765975670357994, + "grad_norm": 1288.7239990234375, + "learning_rate": 9.808000000000002e-06, + "loss": 230.3889, + "step": 6130 + }, + { + "epoch": 0.024806376935725627, + "grad_norm": 2049.273193359375, + "learning_rate": 9.824000000000001e-06, + "loss": 234.9968, + "step": 6140 + }, + { + "epoch": 0.024846778201093257, + "grad_norm": 1086.63671875, + "learning_rate": 9.84e-06, + "loss": 228.202, + "step": 6150 + }, + { + "epoch": 0.02488717946646089, + "grad_norm": 952.3794555664062, + "learning_rate": 9.856000000000002e-06, + "loss": 218.6012, + "step": 6160 + }, + { + "epoch": 0.02492758073182852, + "grad_norm": 1268.853271484375, + "learning_rate": 9.872e-06, + "loss": 217.6621, + "step": 6170 + }, + { + "epoch": 0.024967981997196152, + "grad_norm": 900.3886108398438, + "learning_rate": 9.888000000000001e-06, + "loss": 244.897, + "step": 6180 + }, + { + "epoch": 0.025008383262563782, + "grad_norm": 1264.2623291015625, + "learning_rate": 9.904e-06, + "loss": 224.502, + "step": 6190 + }, + { + "epoch": 0.025048784527931415, + "grad_norm": 1671.768798828125, + "learning_rate": 9.920000000000002e-06, + "loss": 273.6844, + "step": 6200 + }, + { + "epoch": 0.025089185793299044, + "grad_norm": 9169.4091796875, + "learning_rate": 9.936000000000001e-06, + "loss": 357.0876, + "step": 6210 + }, + { + "epoch": 0.025129587058666678, + "grad_norm": 732.900390625, + "learning_rate": 9.952e-06, + "loss": 236.1482, + "step": 6220 + }, + { + "epoch": 0.025169988324034307, + "grad_norm": 772.3697509765625, + "learning_rate": 9.968000000000002e-06, + "loss": 211.1332, + "step": 6230 + }, + { + "epoch": 0.02521038958940194, + "grad_norm": 2270.56982421875, + "learning_rate": 9.984e-06, + "loss": 196.3657, + "step": 6240 + }, + { + "epoch": 0.02525079085476957, + "grad_norm": 663.3539428710938, + "learning_rate": 1e-05, + "loss": 273.3021, + "step": 6250 + }, + { + "epoch": 0.025291192120137203, + "grad_norm": 916.4563598632812, + "learning_rate": 1.0016000000000002e-05, + "loss": 247.5321, + "step": 6260 + }, + { + "epoch": 0.025331593385504832, + "grad_norm": 3971.783447265625, + "learning_rate": 1.0032000000000002e-05, + "loss": 353.4373, + "step": 6270 + }, + { + "epoch": 0.025371994650872465, + "grad_norm": 3589.02197265625, + "learning_rate": 1.0048e-05, + "loss": 227.3804, + "step": 6280 + }, + { + "epoch": 0.025412395916240095, + "grad_norm": 967.741943359375, + "learning_rate": 1.0064e-05, + "loss": 155.3905, + "step": 6290 + }, + { + "epoch": 0.025452797181607728, + "grad_norm": 1831.8065185546875, + "learning_rate": 1.008e-05, + "loss": 372.5004, + "step": 6300 + }, + { + "epoch": 0.025493198446975358, + "grad_norm": 648.0638427734375, + "learning_rate": 1.0096000000000001e-05, + "loss": 158.9148, + "step": 6310 + }, + { + "epoch": 0.02553359971234299, + "grad_norm": 2424.837890625, + "learning_rate": 1.0112000000000002e-05, + "loss": 240.5772, + "step": 6320 + }, + { + "epoch": 0.02557400097771062, + "grad_norm": 3069.214599609375, + "learning_rate": 1.0128e-05, + "loss": 312.543, + "step": 6330 + }, + { + "epoch": 0.025614402243078253, + "grad_norm": 1035.3349609375, + "learning_rate": 1.0144e-05, + "loss": 266.0721, + "step": 6340 + }, + { + "epoch": 0.025654803508445883, + "grad_norm": 705.839599609375, + "learning_rate": 1.0160000000000001e-05, + "loss": 285.35, + "step": 6350 + }, + { + "epoch": 0.025695204773813516, + "grad_norm": 1727.034423828125, + "learning_rate": 1.0176000000000002e-05, + "loss": 239.1911, + "step": 6360 + }, + { + "epoch": 0.025735606039181146, + "grad_norm": 1072.3416748046875, + "learning_rate": 1.0192000000000002e-05, + "loss": 215.7166, + "step": 6370 + }, + { + "epoch": 0.02577600730454878, + "grad_norm": 1102.0108642578125, + "learning_rate": 1.0208e-05, + "loss": 290.4329, + "step": 6380 + }, + { + "epoch": 0.025816408569916408, + "grad_norm": 7250.52490234375, + "learning_rate": 1.0224e-05, + "loss": 395.3578, + "step": 6390 + }, + { + "epoch": 0.02585680983528404, + "grad_norm": 915.8613891601562, + "learning_rate": 1.024e-05, + "loss": 204.383, + "step": 6400 + }, + { + "epoch": 0.02589721110065167, + "grad_norm": 632.5206298828125, + "learning_rate": 1.0256000000000001e-05, + "loss": 255.6531, + "step": 6410 + }, + { + "epoch": 0.025937612366019304, + "grad_norm": 1992.5274658203125, + "learning_rate": 1.0272e-05, + "loss": 206.2615, + "step": 6420 + }, + { + "epoch": 0.025978013631386934, + "grad_norm": 769.8214721679688, + "learning_rate": 1.0288e-05, + "loss": 216.3271, + "step": 6430 + }, + { + "epoch": 0.026018414896754567, + "grad_norm": 3722.18212890625, + "learning_rate": 1.0304e-05, + "loss": 281.4038, + "step": 6440 + }, + { + "epoch": 0.026058816162122196, + "grad_norm": 1554.7005615234375, + "learning_rate": 1.0320000000000001e-05, + "loss": 277.0629, + "step": 6450 + }, + { + "epoch": 0.02609921742748983, + "grad_norm": 2398.617431640625, + "learning_rate": 1.0336000000000002e-05, + "loss": 171.7984, + "step": 6460 + }, + { + "epoch": 0.02613961869285746, + "grad_norm": 882.567138671875, + "learning_rate": 1.0352e-05, + "loss": 253.2358, + "step": 6470 + }, + { + "epoch": 0.026180019958225092, + "grad_norm": 940.6752319335938, + "learning_rate": 1.0368e-05, + "loss": 276.2956, + "step": 6480 + }, + { + "epoch": 0.02622042122359272, + "grad_norm": 2020.3470458984375, + "learning_rate": 1.0384000000000001e-05, + "loss": 227.8999, + "step": 6490 + }, + { + "epoch": 0.026260822488960354, + "grad_norm": 880.0003662109375, + "learning_rate": 1.04e-05, + "loss": 252.7301, + "step": 6500 + }, + { + "epoch": 0.026301223754327984, + "grad_norm": 1083.6527099609375, + "learning_rate": 1.0416000000000002e-05, + "loss": 226.1567, + "step": 6510 + }, + { + "epoch": 0.026341625019695617, + "grad_norm": 874.8389282226562, + "learning_rate": 1.0432e-05, + "loss": 239.6542, + "step": 6520 + }, + { + "epoch": 0.026382026285063247, + "grad_norm": 2462.361572265625, + "learning_rate": 1.0448e-05, + "loss": 266.0335, + "step": 6530 + }, + { + "epoch": 0.02642242755043088, + "grad_norm": 1575.4691162109375, + "learning_rate": 1.0464e-05, + "loss": 262.0662, + "step": 6540 + }, + { + "epoch": 0.02646282881579851, + "grad_norm": 2565.120849609375, + "learning_rate": 1.0480000000000001e-05, + "loss": 184.8124, + "step": 6550 + }, + { + "epoch": 0.026503230081166142, + "grad_norm": 840.5997314453125, + "learning_rate": 1.0496000000000003e-05, + "loss": 210.7652, + "step": 6560 + }, + { + "epoch": 0.026543631346533772, + "grad_norm": 2230.67431640625, + "learning_rate": 1.0512e-05, + "loss": 175.8737, + "step": 6570 + }, + { + "epoch": 0.026584032611901405, + "grad_norm": 896.8818969726562, + "learning_rate": 1.0528e-05, + "loss": 214.3054, + "step": 6580 + }, + { + "epoch": 0.026624433877269035, + "grad_norm": 1161.5887451171875, + "learning_rate": 1.0544000000000001e-05, + "loss": 262.6333, + "step": 6590 + }, + { + "epoch": 0.026664835142636668, + "grad_norm": 928.5606689453125, + "learning_rate": 1.056e-05, + "loss": 290.748, + "step": 6600 + }, + { + "epoch": 0.026705236408004297, + "grad_norm": 1338.7203369140625, + "learning_rate": 1.0576000000000002e-05, + "loss": 271.3372, + "step": 6610 + }, + { + "epoch": 0.02674563767337193, + "grad_norm": 1008.8140869140625, + "learning_rate": 1.0592e-05, + "loss": 193.8615, + "step": 6620 + }, + { + "epoch": 0.02678603893873956, + "grad_norm": 837.7280883789062, + "learning_rate": 1.0608e-05, + "loss": 348.6847, + "step": 6630 + }, + { + "epoch": 0.026826440204107193, + "grad_norm": 1172.448486328125, + "learning_rate": 1.0624e-05, + "loss": 181.1747, + "step": 6640 + }, + { + "epoch": 0.026866841469474823, + "grad_norm": 709.8575439453125, + "learning_rate": 1.0640000000000001e-05, + "loss": 179.9583, + "step": 6650 + }, + { + "epoch": 0.026907242734842456, + "grad_norm": 1359.3955078125, + "learning_rate": 1.0656000000000003e-05, + "loss": 217.6193, + "step": 6660 + }, + { + "epoch": 0.026947644000210085, + "grad_norm": 1660.900634765625, + "learning_rate": 1.0672e-05, + "loss": 222.8404, + "step": 6670 + }, + { + "epoch": 0.026988045265577718, + "grad_norm": 959.6134643554688, + "learning_rate": 1.0688e-05, + "loss": 181.7116, + "step": 6680 + }, + { + "epoch": 0.027028446530945348, + "grad_norm": 942.3123168945312, + "learning_rate": 1.0704000000000001e-05, + "loss": 231.1088, + "step": 6690 + }, + { + "epoch": 0.02706884779631298, + "grad_norm": 1938.5755615234375, + "learning_rate": 1.072e-05, + "loss": 296.0411, + "step": 6700 + }, + { + "epoch": 0.02710924906168061, + "grad_norm": 874.325927734375, + "learning_rate": 1.0736000000000002e-05, + "loss": 278.9008, + "step": 6710 + }, + { + "epoch": 0.027149650327048244, + "grad_norm": 1330.984619140625, + "learning_rate": 1.0752e-05, + "loss": 210.4879, + "step": 6720 + }, + { + "epoch": 0.027190051592415873, + "grad_norm": 1520.1173095703125, + "learning_rate": 1.0768000000000001e-05, + "loss": 288.6217, + "step": 6730 + }, + { + "epoch": 0.027230452857783506, + "grad_norm": 1151.8486328125, + "learning_rate": 1.0784e-05, + "loss": 370.7964, + "step": 6740 + }, + { + "epoch": 0.027270854123151136, + "grad_norm": 1811.0943603515625, + "learning_rate": 1.0800000000000002e-05, + "loss": 330.5686, + "step": 6750 + }, + { + "epoch": 0.02731125538851877, + "grad_norm": 3244.92138671875, + "learning_rate": 1.0816e-05, + "loss": 246.5774, + "step": 6760 + }, + { + "epoch": 0.0273516566538864, + "grad_norm": 925.0736694335938, + "learning_rate": 1.0832e-05, + "loss": 197.6749, + "step": 6770 + }, + { + "epoch": 0.02739205791925403, + "grad_norm": 1394.6497802734375, + "learning_rate": 1.0848e-05, + "loss": 230.4484, + "step": 6780 + }, + { + "epoch": 0.02743245918462166, + "grad_norm": 2128.161376953125, + "learning_rate": 1.0864000000000001e-05, + "loss": 232.5368, + "step": 6790 + }, + { + "epoch": 0.027472860449989294, + "grad_norm": 2484.990478515625, + "learning_rate": 1.0880000000000001e-05, + "loss": 270.1688, + "step": 6800 + }, + { + "epoch": 0.027513261715356924, + "grad_norm": 1234.1771240234375, + "learning_rate": 1.0896e-05, + "loss": 207.6368, + "step": 6810 + }, + { + "epoch": 0.027553662980724557, + "grad_norm": 753.8899536132812, + "learning_rate": 1.0912e-05, + "loss": 184.9427, + "step": 6820 + }, + { + "epoch": 0.027594064246092186, + "grad_norm": 2944.4462890625, + "learning_rate": 1.0928000000000001e-05, + "loss": 228.4994, + "step": 6830 + }, + { + "epoch": 0.02763446551145982, + "grad_norm": 2310.526123046875, + "learning_rate": 1.0944e-05, + "loss": 162.7096, + "step": 6840 + }, + { + "epoch": 0.02767486677682745, + "grad_norm": 909.5531005859375, + "learning_rate": 1.0960000000000002e-05, + "loss": 266.9992, + "step": 6850 + }, + { + "epoch": 0.027715268042195082, + "grad_norm": 1383.5738525390625, + "learning_rate": 1.0976e-05, + "loss": 178.3273, + "step": 6860 + }, + { + "epoch": 0.02775566930756271, + "grad_norm": 1543.33935546875, + "learning_rate": 1.0992e-05, + "loss": 242.4235, + "step": 6870 + }, + { + "epoch": 0.027796070572930345, + "grad_norm": 843.8203125, + "learning_rate": 1.1008e-05, + "loss": 319.9893, + "step": 6880 + }, + { + "epoch": 0.027836471838297974, + "grad_norm": 1308.6536865234375, + "learning_rate": 1.1024000000000002e-05, + "loss": 306.2465, + "step": 6890 + }, + { + "epoch": 0.027876873103665607, + "grad_norm": 2125.869140625, + "learning_rate": 1.1040000000000001e-05, + "loss": 182.2989, + "step": 6900 + }, + { + "epoch": 0.027917274369033237, + "grad_norm": 977.1426391601562, + "learning_rate": 1.1056e-05, + "loss": 223.0309, + "step": 6910 + }, + { + "epoch": 0.02795767563440087, + "grad_norm": 934.2973022460938, + "learning_rate": 1.1072e-05, + "loss": 189.5661, + "step": 6920 + }, + { + "epoch": 0.0279980768997685, + "grad_norm": 1361.2203369140625, + "learning_rate": 1.1088000000000001e-05, + "loss": 180.0254, + "step": 6930 + }, + { + "epoch": 0.028038478165136133, + "grad_norm": 1310.0604248046875, + "learning_rate": 1.1104e-05, + "loss": 202.4235, + "step": 6940 + }, + { + "epoch": 0.028078879430503762, + "grad_norm": 926.858154296875, + "learning_rate": 1.1120000000000002e-05, + "loss": 200.4343, + "step": 6950 + }, + { + "epoch": 0.028119280695871395, + "grad_norm": 1323.7728271484375, + "learning_rate": 1.1136e-05, + "loss": 167.6427, + "step": 6960 + }, + { + "epoch": 0.028159681961239025, + "grad_norm": 2827.5771484375, + "learning_rate": 1.1152000000000001e-05, + "loss": 161.715, + "step": 6970 + }, + { + "epoch": 0.028200083226606658, + "grad_norm": 4883.650390625, + "learning_rate": 1.1168e-05, + "loss": 227.341, + "step": 6980 + }, + { + "epoch": 0.028240484491974287, + "grad_norm": 1085.9722900390625, + "learning_rate": 1.1184000000000002e-05, + "loss": 220.9323, + "step": 6990 + }, + { + "epoch": 0.02828088575734192, + "grad_norm": 809.2932739257812, + "learning_rate": 1.1200000000000001e-05, + "loss": 241.921, + "step": 7000 + }, + { + "epoch": 0.02832128702270955, + "grad_norm": 1243.0458984375, + "learning_rate": 1.1216e-05, + "loss": 215.934, + "step": 7010 + }, + { + "epoch": 0.028361688288077183, + "grad_norm": 1380.5570068359375, + "learning_rate": 1.1232e-05, + "loss": 241.5136, + "step": 7020 + }, + { + "epoch": 0.028402089553444813, + "grad_norm": 779.8323364257812, + "learning_rate": 1.1248000000000001e-05, + "loss": 213.9881, + "step": 7030 + }, + { + "epoch": 0.028442490818812446, + "grad_norm": 1580.6522216796875, + "learning_rate": 1.1264000000000001e-05, + "loss": 196.6301, + "step": 7040 + }, + { + "epoch": 0.028482892084180075, + "grad_norm": 1877.567138671875, + "learning_rate": 1.128e-05, + "loss": 223.1786, + "step": 7050 + }, + { + "epoch": 0.02852329334954771, + "grad_norm": 1072.787353515625, + "learning_rate": 1.1296e-05, + "loss": 258.2605, + "step": 7060 + }, + { + "epoch": 0.028563694614915338, + "grad_norm": 1353.8275146484375, + "learning_rate": 1.1312000000000001e-05, + "loss": 157.2304, + "step": 7070 + }, + { + "epoch": 0.02860409588028297, + "grad_norm": 1045.60986328125, + "learning_rate": 1.1328e-05, + "loss": 209.2667, + "step": 7080 + }, + { + "epoch": 0.0286444971456506, + "grad_norm": 1487.217041015625, + "learning_rate": 1.1344000000000002e-05, + "loss": 221.1227, + "step": 7090 + }, + { + "epoch": 0.028684898411018234, + "grad_norm": 793.8728637695312, + "learning_rate": 1.136e-05, + "loss": 285.0365, + "step": 7100 + }, + { + "epoch": 0.028725299676385863, + "grad_norm": 890.9113159179688, + "learning_rate": 1.1376000000000001e-05, + "loss": 261.807, + "step": 7110 + }, + { + "epoch": 0.028765700941753496, + "grad_norm": 1069.0255126953125, + "learning_rate": 1.1392e-05, + "loss": 238.9624, + "step": 7120 + }, + { + "epoch": 0.028806102207121126, + "grad_norm": 924.4225463867188, + "learning_rate": 1.1408000000000002e-05, + "loss": 188.6247, + "step": 7130 + }, + { + "epoch": 0.02884650347248876, + "grad_norm": 1439.3868408203125, + "learning_rate": 1.1424000000000001e-05, + "loss": 237.2832, + "step": 7140 + }, + { + "epoch": 0.02888690473785639, + "grad_norm": 1247.814453125, + "learning_rate": 1.144e-05, + "loss": 389.9847, + "step": 7150 + }, + { + "epoch": 0.02892730600322402, + "grad_norm": 1341.184814453125, + "learning_rate": 1.1456e-05, + "loss": 271.61, + "step": 7160 + }, + { + "epoch": 0.02896770726859165, + "grad_norm": 2428.02392578125, + "learning_rate": 1.1472000000000001e-05, + "loss": 196.4588, + "step": 7170 + }, + { + "epoch": 0.029008108533959284, + "grad_norm": 26379.6015625, + "learning_rate": 1.1488e-05, + "loss": 327.9471, + "step": 7180 + }, + { + "epoch": 0.029048509799326914, + "grad_norm": 1270.9744873046875, + "learning_rate": 1.1504000000000002e-05, + "loss": 272.4246, + "step": 7190 + }, + { + "epoch": 0.029088911064694547, + "grad_norm": 6668.92041015625, + "learning_rate": 1.152e-05, + "loss": 219.1625, + "step": 7200 + }, + { + "epoch": 0.029129312330062176, + "grad_norm": 1636.249267578125, + "learning_rate": 1.1536000000000001e-05, + "loss": 203.2768, + "step": 7210 + }, + { + "epoch": 0.02916971359542981, + "grad_norm": 503.6784362792969, + "learning_rate": 1.1552e-05, + "loss": 218.1714, + "step": 7220 + }, + { + "epoch": 0.02921011486079744, + "grad_norm": 3872.6328125, + "learning_rate": 1.1568000000000002e-05, + "loss": 265.4349, + "step": 7230 + }, + { + "epoch": 0.029250516126165072, + "grad_norm": 1130.141845703125, + "learning_rate": 1.1584000000000001e-05, + "loss": 189.6052, + "step": 7240 + }, + { + "epoch": 0.029290917391532702, + "grad_norm": 1756.34326171875, + "learning_rate": 1.16e-05, + "loss": 272.0291, + "step": 7250 + }, + { + "epoch": 0.029331318656900335, + "grad_norm": 2000.3079833984375, + "learning_rate": 1.1616e-05, + "loss": 233.4273, + "step": 7260 + }, + { + "epoch": 0.029371719922267964, + "grad_norm": 1522.8970947265625, + "learning_rate": 1.1632000000000001e-05, + "loss": 183.9729, + "step": 7270 + }, + { + "epoch": 0.029412121187635597, + "grad_norm": 706.8068237304688, + "learning_rate": 1.1648000000000001e-05, + "loss": 231.8446, + "step": 7280 + }, + { + "epoch": 0.029452522453003227, + "grad_norm": 1172.4625244140625, + "learning_rate": 1.1664000000000002e-05, + "loss": 229.9296, + "step": 7290 + }, + { + "epoch": 0.02949292371837086, + "grad_norm": 1662.0928955078125, + "learning_rate": 1.168e-05, + "loss": 176.6628, + "step": 7300 + }, + { + "epoch": 0.02953332498373849, + "grad_norm": 1639.0361328125, + "learning_rate": 1.1696000000000001e-05, + "loss": 275.6672, + "step": 7310 + }, + { + "epoch": 0.029573726249106123, + "grad_norm": 1266.662353515625, + "learning_rate": 1.1712e-05, + "loss": 284.9782, + "step": 7320 + }, + { + "epoch": 0.029614127514473752, + "grad_norm": 3208.654296875, + "learning_rate": 1.1728000000000002e-05, + "loss": 235.4586, + "step": 7330 + }, + { + "epoch": 0.029654528779841385, + "grad_norm": 1359.682861328125, + "learning_rate": 1.1744000000000001e-05, + "loss": 207.6446, + "step": 7340 + }, + { + "epoch": 0.029694930045209015, + "grad_norm": 1931.232421875, + "learning_rate": 1.1760000000000001e-05, + "loss": 156.1951, + "step": 7350 + }, + { + "epoch": 0.029735331310576648, + "grad_norm": 1218.5322265625, + "learning_rate": 1.1776e-05, + "loss": 232.6144, + "step": 7360 + }, + { + "epoch": 0.029775732575944278, + "grad_norm": 1671.8291015625, + "learning_rate": 1.1792000000000002e-05, + "loss": 158.2081, + "step": 7370 + }, + { + "epoch": 0.02981613384131191, + "grad_norm": 1265.2774658203125, + "learning_rate": 1.1808000000000001e-05, + "loss": 200.4327, + "step": 7380 + }, + { + "epoch": 0.02985653510667954, + "grad_norm": 3447.01953125, + "learning_rate": 1.1824e-05, + "loss": 226.0577, + "step": 7390 + }, + { + "epoch": 0.029896936372047173, + "grad_norm": 1136.2012939453125, + "learning_rate": 1.184e-05, + "loss": 322.3941, + "step": 7400 + }, + { + "epoch": 0.029937337637414803, + "grad_norm": 1266.980224609375, + "learning_rate": 1.1856000000000001e-05, + "loss": 218.5675, + "step": 7410 + }, + { + "epoch": 0.029977738902782436, + "grad_norm": 1450.3671875, + "learning_rate": 1.1872000000000001e-05, + "loss": 217.7613, + "step": 7420 + }, + { + "epoch": 0.030018140168150065, + "grad_norm": 2214.37451171875, + "learning_rate": 1.1888000000000002e-05, + "loss": 149.0249, + "step": 7430 + }, + { + "epoch": 0.0300585414335177, + "grad_norm": 2963.03955078125, + "learning_rate": 1.1904e-05, + "loss": 178.5009, + "step": 7440 + }, + { + "epoch": 0.030098942698885328, + "grad_norm": 1158.3040771484375, + "learning_rate": 1.1920000000000001e-05, + "loss": 292.4344, + "step": 7450 + }, + { + "epoch": 0.03013934396425296, + "grad_norm": 1077.713134765625, + "learning_rate": 1.1936e-05, + "loss": 252.7388, + "step": 7460 + }, + { + "epoch": 0.03017974522962059, + "grad_norm": 1461.2139892578125, + "learning_rate": 1.1952000000000002e-05, + "loss": 197.9448, + "step": 7470 + }, + { + "epoch": 0.030220146494988224, + "grad_norm": 1840.8155517578125, + "learning_rate": 1.1968000000000001e-05, + "loss": 293.3979, + "step": 7480 + }, + { + "epoch": 0.030260547760355853, + "grad_norm": 1308.437255859375, + "learning_rate": 1.1984e-05, + "loss": 326.3665, + "step": 7490 + }, + { + "epoch": 0.030300949025723486, + "grad_norm": 3570.11767578125, + "learning_rate": 1.2e-05, + "loss": 232.8732, + "step": 7500 + }, + { + "epoch": 0.030341350291091116, + "grad_norm": 3255.725341796875, + "learning_rate": 1.2016000000000002e-05, + "loss": 316.9187, + "step": 7510 + }, + { + "epoch": 0.03038175155645875, + "grad_norm": 1589.6546630859375, + "learning_rate": 1.2032000000000001e-05, + "loss": 206.5627, + "step": 7520 + }, + { + "epoch": 0.03042215282182638, + "grad_norm": 905.2511596679688, + "learning_rate": 1.2048000000000002e-05, + "loss": 192.0826, + "step": 7530 + }, + { + "epoch": 0.030462554087194012, + "grad_norm": 1292.0413818359375, + "learning_rate": 1.2064e-05, + "loss": 264.5819, + "step": 7540 + }, + { + "epoch": 0.03050295535256164, + "grad_norm": 8189.70654296875, + "learning_rate": 1.2080000000000001e-05, + "loss": 238.1731, + "step": 7550 + }, + { + "epoch": 0.030543356617929274, + "grad_norm": 1639.5455322265625, + "learning_rate": 1.2096e-05, + "loss": 184.7429, + "step": 7560 + }, + { + "epoch": 0.030583757883296904, + "grad_norm": 2905.476318359375, + "learning_rate": 1.2112000000000002e-05, + "loss": 149.7674, + "step": 7570 + }, + { + "epoch": 0.030624159148664537, + "grad_norm": 1810.7962646484375, + "learning_rate": 1.2128000000000001e-05, + "loss": 341.0055, + "step": 7580 + }, + { + "epoch": 0.030664560414032167, + "grad_norm": 839.985595703125, + "learning_rate": 1.2144000000000001e-05, + "loss": 296.3502, + "step": 7590 + }, + { + "epoch": 0.0307049616793998, + "grad_norm": 2297.6572265625, + "learning_rate": 1.216e-05, + "loss": 289.7547, + "step": 7600 + }, + { + "epoch": 0.03074536294476743, + "grad_norm": 1198.633544921875, + "learning_rate": 1.2176000000000002e-05, + "loss": 279.8957, + "step": 7610 + }, + { + "epoch": 0.030785764210135062, + "grad_norm": 1034.1666259765625, + "learning_rate": 1.2192000000000001e-05, + "loss": 198.3937, + "step": 7620 + }, + { + "epoch": 0.030826165475502692, + "grad_norm": 1112.4034423828125, + "learning_rate": 1.2208000000000002e-05, + "loss": 219.5101, + "step": 7630 + }, + { + "epoch": 0.030866566740870325, + "grad_norm": 617.9956665039062, + "learning_rate": 1.2224e-05, + "loss": 168.187, + "step": 7640 + }, + { + "epoch": 0.030906968006237955, + "grad_norm": 1723.3265380859375, + "learning_rate": 1.2240000000000001e-05, + "loss": 240.1638, + "step": 7650 + }, + { + "epoch": 0.030947369271605588, + "grad_norm": 821.0966186523438, + "learning_rate": 1.2256000000000001e-05, + "loss": 194.6567, + "step": 7660 + }, + { + "epoch": 0.030987770536973217, + "grad_norm": 997.5595703125, + "learning_rate": 1.2272000000000002e-05, + "loss": 207.748, + "step": 7670 + }, + { + "epoch": 0.03102817180234085, + "grad_norm": 4175.6552734375, + "learning_rate": 1.2288e-05, + "loss": 279.2439, + "step": 7680 + }, + { + "epoch": 0.03106857306770848, + "grad_norm": 3162.721435546875, + "learning_rate": 1.2304000000000001e-05, + "loss": 234.8496, + "step": 7690 + }, + { + "epoch": 0.031108974333076113, + "grad_norm": 1057.8349609375, + "learning_rate": 1.232e-05, + "loss": 244.352, + "step": 7700 + }, + { + "epoch": 0.031149375598443742, + "grad_norm": 1351.04150390625, + "learning_rate": 1.2336000000000002e-05, + "loss": 217.4938, + "step": 7710 + }, + { + "epoch": 0.031189776863811376, + "grad_norm": 1227.122314453125, + "learning_rate": 1.2352000000000001e-05, + "loss": 193.3585, + "step": 7720 + }, + { + "epoch": 0.031230178129179005, + "grad_norm": 747.8841552734375, + "learning_rate": 1.2368e-05, + "loss": 212.2819, + "step": 7730 + }, + { + "epoch": 0.03127057939454664, + "grad_norm": 983.3997802734375, + "learning_rate": 1.2384e-05, + "loss": 138.8288, + "step": 7740 + }, + { + "epoch": 0.03131098065991427, + "grad_norm": 1224.09912109375, + "learning_rate": 1.2400000000000002e-05, + "loss": 269.3461, + "step": 7750 + }, + { + "epoch": 0.0313513819252819, + "grad_norm": 1891.6029052734375, + "learning_rate": 1.2416000000000001e-05, + "loss": 270.9909, + "step": 7760 + }, + { + "epoch": 0.03139178319064953, + "grad_norm": 1027.2734375, + "learning_rate": 1.2432000000000002e-05, + "loss": 175.0789, + "step": 7770 + }, + { + "epoch": 0.03143218445601716, + "grad_norm": 1044.42822265625, + "learning_rate": 1.2448e-05, + "loss": 225.8364, + "step": 7780 + }, + { + "epoch": 0.031472585721384796, + "grad_norm": 1452.5001220703125, + "learning_rate": 1.2464000000000001e-05, + "loss": 201.6204, + "step": 7790 + }, + { + "epoch": 0.03151298698675242, + "grad_norm": 1745.8421630859375, + "learning_rate": 1.248e-05, + "loss": 232.2858, + "step": 7800 + }, + { + "epoch": 0.031553388252120056, + "grad_norm": 630.3140869140625, + "learning_rate": 1.2496000000000002e-05, + "loss": 156.6699, + "step": 7810 + }, + { + "epoch": 0.03159378951748769, + "grad_norm": 2106.661865234375, + "learning_rate": 1.2512000000000002e-05, + "loss": 257.7423, + "step": 7820 + }, + { + "epoch": 0.03163419078285532, + "grad_norm": 622.233642578125, + "learning_rate": 1.2528e-05, + "loss": 167.2626, + "step": 7830 + }, + { + "epoch": 0.03167459204822295, + "grad_norm": 1709.203857421875, + "learning_rate": 1.2544e-05, + "loss": 216.348, + "step": 7840 + }, + { + "epoch": 0.03171499331359058, + "grad_norm": 2118.884033203125, + "learning_rate": 1.2560000000000002e-05, + "loss": 231.4787, + "step": 7850 + }, + { + "epoch": 0.031755394578958214, + "grad_norm": 1514.96728515625, + "learning_rate": 1.2576000000000001e-05, + "loss": 224.1166, + "step": 7860 + }, + { + "epoch": 0.03179579584432585, + "grad_norm": 1334.1695556640625, + "learning_rate": 1.2592000000000002e-05, + "loss": 184.2429, + "step": 7870 + }, + { + "epoch": 0.03183619710969347, + "grad_norm": 5447.9619140625, + "learning_rate": 1.2608e-05, + "loss": 382.5181, + "step": 7880 + }, + { + "epoch": 0.031876598375061106, + "grad_norm": 1264.90283203125, + "learning_rate": 1.2624000000000001e-05, + "loss": 166.1744, + "step": 7890 + }, + { + "epoch": 0.03191699964042874, + "grad_norm": 900.985107421875, + "learning_rate": 1.2640000000000001e-05, + "loss": 137.2021, + "step": 7900 + }, + { + "epoch": 0.03195740090579637, + "grad_norm": 804.0714111328125, + "learning_rate": 1.2656000000000002e-05, + "loss": 250.7932, + "step": 7910 + }, + { + "epoch": 0.031997802171164, + "grad_norm": 815.2825927734375, + "learning_rate": 1.2672000000000002e-05, + "loss": 242.1783, + "step": 7920 + }, + { + "epoch": 0.03203820343653163, + "grad_norm": 1228.362060546875, + "learning_rate": 1.2688e-05, + "loss": 236.474, + "step": 7930 + }, + { + "epoch": 0.032078604701899265, + "grad_norm": 8129.83251953125, + "learning_rate": 1.2704e-05, + "loss": 233.6997, + "step": 7940 + }, + { + "epoch": 0.0321190059672669, + "grad_norm": 1117.083984375, + "learning_rate": 1.2720000000000002e-05, + "loss": 259.6865, + "step": 7950 + }, + { + "epoch": 0.032159407232634524, + "grad_norm": 1470.302978515625, + "learning_rate": 1.2736000000000001e-05, + "loss": 298.7662, + "step": 7960 + }, + { + "epoch": 0.03219980849800216, + "grad_norm": 2743.2978515625, + "learning_rate": 1.2752e-05, + "loss": 280.2184, + "step": 7970 + }, + { + "epoch": 0.03224020976336979, + "grad_norm": 1516.951416015625, + "learning_rate": 1.2768e-05, + "loss": 230.1135, + "step": 7980 + }, + { + "epoch": 0.03228061102873742, + "grad_norm": 1107.3660888671875, + "learning_rate": 1.2784000000000002e-05, + "loss": 186.2382, + "step": 7990 + }, + { + "epoch": 0.03232101229410505, + "grad_norm": 837.2027587890625, + "learning_rate": 1.2800000000000001e-05, + "loss": 249.5209, + "step": 8000 + }, + { + "epoch": 0.03236141355947268, + "grad_norm": 1267.685791015625, + "learning_rate": 1.2816000000000002e-05, + "loss": 237.431, + "step": 8010 + }, + { + "epoch": 0.032401814824840315, + "grad_norm": 2935.21923828125, + "learning_rate": 1.2832e-05, + "loss": 420.8199, + "step": 8020 + }, + { + "epoch": 0.03244221609020795, + "grad_norm": 1302.763427734375, + "learning_rate": 1.2848e-05, + "loss": 258.2448, + "step": 8030 + }, + { + "epoch": 0.032482617355575574, + "grad_norm": 696.9424438476562, + "learning_rate": 1.2864e-05, + "loss": 228.1104, + "step": 8040 + }, + { + "epoch": 0.03252301862094321, + "grad_norm": 908.2662963867188, + "learning_rate": 1.2880000000000002e-05, + "loss": 199.276, + "step": 8050 + }, + { + "epoch": 0.03256341988631084, + "grad_norm": 1485.172607421875, + "learning_rate": 1.2896000000000002e-05, + "loss": 278.001, + "step": 8060 + }, + { + "epoch": 0.03260382115167847, + "grad_norm": 11724.9384765625, + "learning_rate": 1.2912e-05, + "loss": 188.8018, + "step": 8070 + }, + { + "epoch": 0.0326442224170461, + "grad_norm": 1197.557861328125, + "learning_rate": 1.2928e-05, + "loss": 223.0534, + "step": 8080 + }, + { + "epoch": 0.03268462368241373, + "grad_norm": 752.60498046875, + "learning_rate": 1.2944000000000002e-05, + "loss": 267.4496, + "step": 8090 + }, + { + "epoch": 0.032725024947781366, + "grad_norm": 1948.71240234375, + "learning_rate": 1.2960000000000001e-05, + "loss": 148.4794, + "step": 8100 + }, + { + "epoch": 0.032765426213149, + "grad_norm": 1441.8746337890625, + "learning_rate": 1.2976000000000002e-05, + "loss": 242.6129, + "step": 8110 + }, + { + "epoch": 0.032805827478516625, + "grad_norm": 1611.25390625, + "learning_rate": 1.2992e-05, + "loss": 215.4656, + "step": 8120 + }, + { + "epoch": 0.03284622874388426, + "grad_norm": 1201.208740234375, + "learning_rate": 1.3008e-05, + "loss": 214.0668, + "step": 8130 + }, + { + "epoch": 0.03288663000925189, + "grad_norm": 685.611572265625, + "learning_rate": 1.3024000000000001e-05, + "loss": 163.0182, + "step": 8140 + }, + { + "epoch": 0.032927031274619524, + "grad_norm": 859.1142578125, + "learning_rate": 1.3040000000000002e-05, + "loss": 151.7978, + "step": 8150 + }, + { + "epoch": 0.03296743253998715, + "grad_norm": 1467.9376220703125, + "learning_rate": 1.3056000000000002e-05, + "loss": 216.9644, + "step": 8160 + }, + { + "epoch": 0.03300783380535478, + "grad_norm": 1050.6201171875, + "learning_rate": 1.3072e-05, + "loss": 167.4997, + "step": 8170 + }, + { + "epoch": 0.033048235070722416, + "grad_norm": 1382.6781005859375, + "learning_rate": 1.3088e-05, + "loss": 224.0113, + "step": 8180 + }, + { + "epoch": 0.03308863633609005, + "grad_norm": 1020.5779418945312, + "learning_rate": 1.3104000000000002e-05, + "loss": 205.4031, + "step": 8190 + }, + { + "epoch": 0.033129037601457675, + "grad_norm": 2299.712646484375, + "learning_rate": 1.3120000000000001e-05, + "loss": 283.4845, + "step": 8200 + }, + { + "epoch": 0.03316943886682531, + "grad_norm": 1020.414794921875, + "learning_rate": 1.3136000000000003e-05, + "loss": 201.5658, + "step": 8210 + }, + { + "epoch": 0.03320984013219294, + "grad_norm": 589.2197875976562, + "learning_rate": 1.3152e-05, + "loss": 214.5626, + "step": 8220 + }, + { + "epoch": 0.033250241397560575, + "grad_norm": 1860.8743896484375, + "learning_rate": 1.3168e-05, + "loss": 192.4475, + "step": 8230 + }, + { + "epoch": 0.0332906426629282, + "grad_norm": 984.2620849609375, + "learning_rate": 1.3184000000000001e-05, + "loss": 211.2297, + "step": 8240 + }, + { + "epoch": 0.033331043928295834, + "grad_norm": 1026.0506591796875, + "learning_rate": 1.3200000000000002e-05, + "loss": 215.6478, + "step": 8250 + }, + { + "epoch": 0.03337144519366347, + "grad_norm": 829.0204467773438, + "learning_rate": 1.3216000000000002e-05, + "loss": 277.9813, + "step": 8260 + }, + { + "epoch": 0.0334118464590311, + "grad_norm": 1361.8870849609375, + "learning_rate": 1.3232e-05, + "loss": 247.2565, + "step": 8270 + }, + { + "epoch": 0.033452247724398726, + "grad_norm": 1092.4251708984375, + "learning_rate": 1.3248000000000001e-05, + "loss": 281.5903, + "step": 8280 + }, + { + "epoch": 0.03349264898976636, + "grad_norm": 1257.5577392578125, + "learning_rate": 1.3264000000000002e-05, + "loss": 246.065, + "step": 8290 + }, + { + "epoch": 0.03353305025513399, + "grad_norm": 519.0601196289062, + "learning_rate": 1.3280000000000002e-05, + "loss": 188.7281, + "step": 8300 + }, + { + "epoch": 0.033573451520501625, + "grad_norm": 945.4765625, + "learning_rate": 1.3296e-05, + "loss": 184.6031, + "step": 8310 + }, + { + "epoch": 0.03361385278586925, + "grad_norm": 4682.99072265625, + "learning_rate": 1.3312e-05, + "loss": 238.8773, + "step": 8320 + }, + { + "epoch": 0.033654254051236884, + "grad_norm": 787.5958251953125, + "learning_rate": 1.3328e-05, + "loss": 195.3185, + "step": 8330 + }, + { + "epoch": 0.03369465531660452, + "grad_norm": 1163.9930419921875, + "learning_rate": 1.3344000000000001e-05, + "loss": 189.7157, + "step": 8340 + }, + { + "epoch": 0.03373505658197215, + "grad_norm": 2459.8876953125, + "learning_rate": 1.3360000000000003e-05, + "loss": 245.5701, + "step": 8350 + }, + { + "epoch": 0.033775457847339777, + "grad_norm": 1014.5933837890625, + "learning_rate": 1.3376e-05, + "loss": 217.3412, + "step": 8360 + }, + { + "epoch": 0.03381585911270741, + "grad_norm": 1133.9744873046875, + "learning_rate": 1.3392e-05, + "loss": 179.2957, + "step": 8370 + }, + { + "epoch": 0.03385626037807504, + "grad_norm": 1001.8255004882812, + "learning_rate": 1.3408000000000001e-05, + "loss": 165.8568, + "step": 8380 + }, + { + "epoch": 0.033896661643442676, + "grad_norm": 2458.19287109375, + "learning_rate": 1.3424000000000002e-05, + "loss": 256.5106, + "step": 8390 + }, + { + "epoch": 0.0339370629088103, + "grad_norm": 2593.06201171875, + "learning_rate": 1.3440000000000002e-05, + "loss": 179.1338, + "step": 8400 + }, + { + "epoch": 0.033977464174177935, + "grad_norm": 1281.1871337890625, + "learning_rate": 1.3456e-05, + "loss": 228.2931, + "step": 8410 + }, + { + "epoch": 0.03401786543954557, + "grad_norm": 856.2692260742188, + "learning_rate": 1.3472e-05, + "loss": 185.6773, + "step": 8420 + }, + { + "epoch": 0.0340582667049132, + "grad_norm": 712.3380737304688, + "learning_rate": 1.3488e-05, + "loss": 205.9499, + "step": 8430 + }, + { + "epoch": 0.03409866797028083, + "grad_norm": 1178.1876220703125, + "learning_rate": 1.3504000000000001e-05, + "loss": 193.1317, + "step": 8440 + }, + { + "epoch": 0.03413906923564846, + "grad_norm": 900.8714599609375, + "learning_rate": 1.3520000000000003e-05, + "loss": 128.0489, + "step": 8450 + }, + { + "epoch": 0.03417947050101609, + "grad_norm": 2454.438720703125, + "learning_rate": 1.3536e-05, + "loss": 209.9976, + "step": 8460 + }, + { + "epoch": 0.034219871766383726, + "grad_norm": 962.5108032226562, + "learning_rate": 1.3552e-05, + "loss": 213.2886, + "step": 8470 + }, + { + "epoch": 0.03426027303175135, + "grad_norm": 1213.9720458984375, + "learning_rate": 1.3568000000000001e-05, + "loss": 223.5978, + "step": 8480 + }, + { + "epoch": 0.034300674297118985, + "grad_norm": 743.6492919921875, + "learning_rate": 1.3584000000000002e-05, + "loss": 263.924, + "step": 8490 + }, + { + "epoch": 0.03434107556248662, + "grad_norm": 522.6319580078125, + "learning_rate": 1.3600000000000002e-05, + "loss": 216.7719, + "step": 8500 + }, + { + "epoch": 0.03438147682785425, + "grad_norm": 879.3556518554688, + "learning_rate": 1.3616e-05, + "loss": 274.4282, + "step": 8510 + }, + { + "epoch": 0.03442187809322188, + "grad_norm": 943.1896362304688, + "learning_rate": 1.3632000000000001e-05, + "loss": 202.9436, + "step": 8520 + }, + { + "epoch": 0.03446227935858951, + "grad_norm": 1110.2197265625, + "learning_rate": 1.3648e-05, + "loss": 208.5962, + "step": 8530 + }, + { + "epoch": 0.034502680623957144, + "grad_norm": 2381.78515625, + "learning_rate": 1.3664000000000002e-05, + "loss": 176.3575, + "step": 8540 + }, + { + "epoch": 0.03454308188932478, + "grad_norm": 3647.009521484375, + "learning_rate": 1.3680000000000003e-05, + "loss": 283.3665, + "step": 8550 + }, + { + "epoch": 0.0345834831546924, + "grad_norm": 1435.9195556640625, + "learning_rate": 1.3696e-05, + "loss": 227.4015, + "step": 8560 + }, + { + "epoch": 0.034623884420060036, + "grad_norm": 1176.7342529296875, + "learning_rate": 1.3712e-05, + "loss": 245.9342, + "step": 8570 + }, + { + "epoch": 0.03466428568542767, + "grad_norm": 1413.023681640625, + "learning_rate": 1.3728000000000001e-05, + "loss": 195.9061, + "step": 8580 + }, + { + "epoch": 0.0347046869507953, + "grad_norm": 2758.99755859375, + "learning_rate": 1.3744000000000003e-05, + "loss": 262.7178, + "step": 8590 + }, + { + "epoch": 0.03474508821616293, + "grad_norm": 1194.412109375, + "learning_rate": 1.376e-05, + "loss": 373.9838, + "step": 8600 + }, + { + "epoch": 0.03478548948153056, + "grad_norm": 843.9379272460938, + "learning_rate": 1.3776e-05, + "loss": 267.3049, + "step": 8610 + }, + { + "epoch": 0.034825890746898194, + "grad_norm": 1113.7880859375, + "learning_rate": 1.3792000000000001e-05, + "loss": 153.8107, + "step": 8620 + }, + { + "epoch": 0.03486629201226583, + "grad_norm": 1733.0443115234375, + "learning_rate": 1.3808e-05, + "loss": 225.0372, + "step": 8630 + }, + { + "epoch": 0.034906693277633453, + "grad_norm": 1744.4527587890625, + "learning_rate": 1.3824000000000002e-05, + "loss": 231.3959, + "step": 8640 + }, + { + "epoch": 0.03494709454300109, + "grad_norm": 1106.697265625, + "learning_rate": 1.384e-05, + "loss": 184.543, + "step": 8650 + }, + { + "epoch": 0.03498749580836872, + "grad_norm": 1037.7171630859375, + "learning_rate": 1.3856e-05, + "loss": 201.8886, + "step": 8660 + }, + { + "epoch": 0.03502789707373635, + "grad_norm": 1436.6629638671875, + "learning_rate": 1.3872e-05, + "loss": 177.0296, + "step": 8670 + }, + { + "epoch": 0.03506829833910398, + "grad_norm": 1424.4892578125, + "learning_rate": 1.3888000000000002e-05, + "loss": 217.8064, + "step": 8680 + }, + { + "epoch": 0.03510869960447161, + "grad_norm": 1294.1304931640625, + "learning_rate": 1.3904000000000003e-05, + "loss": 145.3549, + "step": 8690 + }, + { + "epoch": 0.035149100869839245, + "grad_norm": 916.2075805664062, + "learning_rate": 1.392e-05, + "loss": 290.2213, + "step": 8700 + }, + { + "epoch": 0.03518950213520688, + "grad_norm": 3773.789306640625, + "learning_rate": 1.3936e-05, + "loss": 231.5456, + "step": 8710 + }, + { + "epoch": 0.035229903400574504, + "grad_norm": 1352.9482421875, + "learning_rate": 1.3952000000000001e-05, + "loss": 421.8093, + "step": 8720 + }, + { + "epoch": 0.03527030466594214, + "grad_norm": 2713.65185546875, + "learning_rate": 1.3968e-05, + "loss": 232.2097, + "step": 8730 + }, + { + "epoch": 0.03531070593130977, + "grad_norm": 1764.9998779296875, + "learning_rate": 1.3984000000000002e-05, + "loss": 279.4601, + "step": 8740 + }, + { + "epoch": 0.0353511071966774, + "grad_norm": 1381.18798828125, + "learning_rate": 1.4e-05, + "loss": 252.4392, + "step": 8750 + }, + { + "epoch": 0.03539150846204503, + "grad_norm": 2131.35693359375, + "learning_rate": 1.4016000000000001e-05, + "loss": 185.1542, + "step": 8760 + }, + { + "epoch": 0.03543190972741266, + "grad_norm": 907.8837890625, + "learning_rate": 1.4032e-05, + "loss": 205.1052, + "step": 8770 + }, + { + "epoch": 0.035472310992780295, + "grad_norm": 1220.7232666015625, + "learning_rate": 1.4048000000000002e-05, + "loss": 203.8967, + "step": 8780 + }, + { + "epoch": 0.03551271225814793, + "grad_norm": 1194.7598876953125, + "learning_rate": 1.4064000000000003e-05, + "loss": 203.3675, + "step": 8790 + }, + { + "epoch": 0.035553113523515555, + "grad_norm": 1051.851318359375, + "learning_rate": 1.408e-05, + "loss": 204.3004, + "step": 8800 + }, + { + "epoch": 0.03559351478888319, + "grad_norm": 3935.65673828125, + "learning_rate": 1.4096e-05, + "loss": 237.0045, + "step": 8810 + }, + { + "epoch": 0.03563391605425082, + "grad_norm": 2239.27197265625, + "learning_rate": 1.4112000000000001e-05, + "loss": 166.9596, + "step": 8820 + }, + { + "epoch": 0.035674317319618454, + "grad_norm": 942.9476318359375, + "learning_rate": 1.4128000000000001e-05, + "loss": 242.4803, + "step": 8830 + }, + { + "epoch": 0.03571471858498608, + "grad_norm": 2720.481689453125, + "learning_rate": 1.4144000000000002e-05, + "loss": 174.7576, + "step": 8840 + }, + { + "epoch": 0.03575511985035371, + "grad_norm": 1388.336181640625, + "learning_rate": 1.416e-05, + "loss": 190.7032, + "step": 8850 + }, + { + "epoch": 0.035795521115721346, + "grad_norm": 701.8078002929688, + "learning_rate": 1.4176000000000001e-05, + "loss": 271.7066, + "step": 8860 + }, + { + "epoch": 0.03583592238108898, + "grad_norm": 1016.6552124023438, + "learning_rate": 1.4192e-05, + "loss": 169.5349, + "step": 8870 + }, + { + "epoch": 0.035876323646456605, + "grad_norm": 823.756591796875, + "learning_rate": 1.4208000000000002e-05, + "loss": 212.081, + "step": 8880 + }, + { + "epoch": 0.03591672491182424, + "grad_norm": 1106.405517578125, + "learning_rate": 1.4224000000000003e-05, + "loss": 240.7371, + "step": 8890 + }, + { + "epoch": 0.03595712617719187, + "grad_norm": 1155.3194580078125, + "learning_rate": 1.4240000000000001e-05, + "loss": 190.037, + "step": 8900 + }, + { + "epoch": 0.035997527442559504, + "grad_norm": 3984.18359375, + "learning_rate": 1.4256e-05, + "loss": 181.0243, + "step": 8910 + }, + { + "epoch": 0.03603792870792713, + "grad_norm": 2156.81201171875, + "learning_rate": 1.4272000000000002e-05, + "loss": 187.6192, + "step": 8920 + }, + { + "epoch": 0.036078329973294763, + "grad_norm": 954.7111206054688, + "learning_rate": 1.4288000000000001e-05, + "loss": 225.2702, + "step": 8930 + }, + { + "epoch": 0.0361187312386624, + "grad_norm": 691.8679809570312, + "learning_rate": 1.4304e-05, + "loss": 163.6612, + "step": 8940 + }, + { + "epoch": 0.03615913250403003, + "grad_norm": 1024.98388671875, + "learning_rate": 1.432e-05, + "loss": 235.0917, + "step": 8950 + }, + { + "epoch": 0.036199533769397656, + "grad_norm": 1150.90576171875, + "learning_rate": 1.4336000000000001e-05, + "loss": 270.4742, + "step": 8960 + }, + { + "epoch": 0.03623993503476529, + "grad_norm": 2572.743408203125, + "learning_rate": 1.4352e-05, + "loss": 190.2871, + "step": 8970 + }, + { + "epoch": 0.03628033630013292, + "grad_norm": 902.4169921875, + "learning_rate": 1.4368000000000002e-05, + "loss": 183.4619, + "step": 8980 + }, + { + "epoch": 0.036320737565500555, + "grad_norm": 3213.55712890625, + "learning_rate": 1.4384e-05, + "loss": 215.3503, + "step": 8990 + }, + { + "epoch": 0.03636113883086818, + "grad_norm": 1932.8109130859375, + "learning_rate": 1.4400000000000001e-05, + "loss": 217.3919, + "step": 9000 + }, + { + "epoch": 0.036401540096235814, + "grad_norm": 2212.296630859375, + "learning_rate": 1.4416e-05, + "loss": 232.1556, + "step": 9010 + }, + { + "epoch": 0.03644194136160345, + "grad_norm": 1298.98046875, + "learning_rate": 1.4432000000000002e-05, + "loss": 183.4323, + "step": 9020 + }, + { + "epoch": 0.03648234262697108, + "grad_norm": 1277.2611083984375, + "learning_rate": 1.4448000000000001e-05, + "loss": 98.1742, + "step": 9030 + }, + { + "epoch": 0.036522743892338706, + "grad_norm": 1488.0601806640625, + "learning_rate": 1.4464e-05, + "loss": 277.6159, + "step": 9040 + }, + { + "epoch": 0.03656314515770634, + "grad_norm": 1120.574462890625, + "learning_rate": 1.448e-05, + "loss": 183.5172, + "step": 9050 + }, + { + "epoch": 0.03660354642307397, + "grad_norm": 2185.33740234375, + "learning_rate": 1.4496000000000001e-05, + "loss": 309.2836, + "step": 9060 + }, + { + "epoch": 0.036643947688441605, + "grad_norm": 1752.278076171875, + "learning_rate": 1.4512000000000001e-05, + "loss": 262.3955, + "step": 9070 + }, + { + "epoch": 0.03668434895380923, + "grad_norm": 883.2100830078125, + "learning_rate": 1.4528000000000002e-05, + "loss": 162.0975, + "step": 9080 + }, + { + "epoch": 0.036724750219176865, + "grad_norm": 626.6256713867188, + "learning_rate": 1.4544e-05, + "loss": 157.2177, + "step": 9090 + }, + { + "epoch": 0.0367651514845445, + "grad_norm": 2931.751953125, + "learning_rate": 1.4560000000000001e-05, + "loss": 274.4264, + "step": 9100 + }, + { + "epoch": 0.03680555274991213, + "grad_norm": 907.1854248046875, + "learning_rate": 1.4576e-05, + "loss": 152.5162, + "step": 9110 + }, + { + "epoch": 0.03684595401527976, + "grad_norm": 3278.87939453125, + "learning_rate": 1.4592000000000002e-05, + "loss": 155.2305, + "step": 9120 + }, + { + "epoch": 0.03688635528064739, + "grad_norm": 899.1883544921875, + "learning_rate": 1.4608000000000001e-05, + "loss": 143.828, + "step": 9130 + }, + { + "epoch": 0.03692675654601502, + "grad_norm": 1202.1689453125, + "learning_rate": 1.4624000000000001e-05, + "loss": 181.9541, + "step": 9140 + }, + { + "epoch": 0.03696715781138265, + "grad_norm": 853.435791015625, + "learning_rate": 1.464e-05, + "loss": 198.377, + "step": 9150 + }, + { + "epoch": 0.03700755907675028, + "grad_norm": 889.3649291992188, + "learning_rate": 1.4656000000000002e-05, + "loss": 261.6317, + "step": 9160 + }, + { + "epoch": 0.037047960342117915, + "grad_norm": 1236.63671875, + "learning_rate": 1.4672000000000001e-05, + "loss": 204.7869, + "step": 9170 + }, + { + "epoch": 0.03708836160748555, + "grad_norm": 1246.18505859375, + "learning_rate": 1.4688000000000002e-05, + "loss": 158.8947, + "step": 9180 + }, + { + "epoch": 0.037128762872853174, + "grad_norm": 1364.5712890625, + "learning_rate": 1.4704e-05, + "loss": 210.2889, + "step": 9190 + }, + { + "epoch": 0.03716916413822081, + "grad_norm": 5140.61083984375, + "learning_rate": 1.4720000000000001e-05, + "loss": 197.377, + "step": 9200 + }, + { + "epoch": 0.03720956540358844, + "grad_norm": 6164.064453125, + "learning_rate": 1.4736000000000001e-05, + "loss": 241.0238, + "step": 9210 + }, + { + "epoch": 0.037249966668956074, + "grad_norm": 852.1778564453125, + "learning_rate": 1.4752000000000002e-05, + "loss": 170.4549, + "step": 9220 + }, + { + "epoch": 0.0372903679343237, + "grad_norm": 1821.447998046875, + "learning_rate": 1.4768e-05, + "loss": 200.1077, + "step": 9230 + }, + { + "epoch": 0.03733076919969133, + "grad_norm": 2571.024658203125, + "learning_rate": 1.4784000000000001e-05, + "loss": 215.1601, + "step": 9240 + }, + { + "epoch": 0.037371170465058966, + "grad_norm": 1704.7711181640625, + "learning_rate": 1.48e-05, + "loss": 218.4184, + "step": 9250 + }, + { + "epoch": 0.0374115717304266, + "grad_norm": 1928.3663330078125, + "learning_rate": 1.4816000000000002e-05, + "loss": 182.9694, + "step": 9260 + }, + { + "epoch": 0.037451972995794225, + "grad_norm": 686.9199829101562, + "learning_rate": 1.4832000000000001e-05, + "loss": 234.8578, + "step": 9270 + }, + { + "epoch": 0.03749237426116186, + "grad_norm": 798.2464599609375, + "learning_rate": 1.4848e-05, + "loss": 205.3515, + "step": 9280 + }, + { + "epoch": 0.03753277552652949, + "grad_norm": 2308.01904296875, + "learning_rate": 1.4864e-05, + "loss": 196.125, + "step": 9290 + }, + { + "epoch": 0.037573176791897124, + "grad_norm": 1089.538330078125, + "learning_rate": 1.4880000000000002e-05, + "loss": 202.0252, + "step": 9300 + }, + { + "epoch": 0.03761357805726475, + "grad_norm": 892.7774658203125, + "learning_rate": 1.4896000000000001e-05, + "loss": 184.2696, + "step": 9310 + }, + { + "epoch": 0.03765397932263238, + "grad_norm": 1094.7523193359375, + "learning_rate": 1.4912000000000002e-05, + "loss": 262.6469, + "step": 9320 + }, + { + "epoch": 0.037694380588000016, + "grad_norm": 1282.604736328125, + "learning_rate": 1.4928e-05, + "loss": 277.4276, + "step": 9330 + }, + { + "epoch": 0.03773478185336765, + "grad_norm": 996.060546875, + "learning_rate": 1.4944000000000001e-05, + "loss": 189.5843, + "step": 9340 + }, + { + "epoch": 0.037775183118735275, + "grad_norm": 710.2938842773438, + "learning_rate": 1.496e-05, + "loss": 114.5144, + "step": 9350 + }, + { + "epoch": 0.03781558438410291, + "grad_norm": 1297.0830078125, + "learning_rate": 1.4976000000000002e-05, + "loss": 253.5173, + "step": 9360 + }, + { + "epoch": 0.03785598564947054, + "grad_norm": 1217.3671875, + "learning_rate": 1.4992000000000001e-05, + "loss": 155.0307, + "step": 9370 + }, + { + "epoch": 0.037896386914838175, + "grad_norm": 1255.056640625, + "learning_rate": 1.5008000000000001e-05, + "loss": 223.6899, + "step": 9380 + }, + { + "epoch": 0.0379367881802058, + "grad_norm": 2711.6982421875, + "learning_rate": 1.5024e-05, + "loss": 150.2173, + "step": 9390 + }, + { + "epoch": 0.037977189445573434, + "grad_norm": 891.5656127929688, + "learning_rate": 1.5040000000000002e-05, + "loss": 180.6558, + "step": 9400 + }, + { + "epoch": 0.03801759071094107, + "grad_norm": 1900.5667724609375, + "learning_rate": 1.5056000000000001e-05, + "loss": 241.3778, + "step": 9410 + }, + { + "epoch": 0.0380579919763087, + "grad_norm": 1446.021484375, + "learning_rate": 1.5072000000000002e-05, + "loss": 158.8824, + "step": 9420 + }, + { + "epoch": 0.038098393241676326, + "grad_norm": 1388.30322265625, + "learning_rate": 1.5088e-05, + "loss": 144.1863, + "step": 9430 + }, + { + "epoch": 0.03813879450704396, + "grad_norm": 3226.86474609375, + "learning_rate": 1.5104000000000001e-05, + "loss": 225.304, + "step": 9440 + }, + { + "epoch": 0.03817919577241159, + "grad_norm": 3067.704345703125, + "learning_rate": 1.5120000000000001e-05, + "loss": 210.9746, + "step": 9450 + }, + { + "epoch": 0.038219597037779225, + "grad_norm": 1225.9576416015625, + "learning_rate": 1.5136000000000002e-05, + "loss": 246.3059, + "step": 9460 + }, + { + "epoch": 0.03825999830314685, + "grad_norm": 3088.927490234375, + "learning_rate": 1.5152000000000002e-05, + "loss": 234.4156, + "step": 9470 + }, + { + "epoch": 0.038300399568514484, + "grad_norm": 1701.68701171875, + "learning_rate": 1.5168000000000001e-05, + "loss": 225.9531, + "step": 9480 + }, + { + "epoch": 0.03834080083388212, + "grad_norm": 1059.787109375, + "learning_rate": 1.5184e-05, + "loss": 208.0672, + "step": 9490 + }, + { + "epoch": 0.03838120209924975, + "grad_norm": 938.8010864257812, + "learning_rate": 1.5200000000000002e-05, + "loss": 224.4758, + "step": 9500 + }, + { + "epoch": 0.03842160336461738, + "grad_norm": 1132.7548828125, + "learning_rate": 1.5216000000000001e-05, + "loss": 189.2962, + "step": 9510 + }, + { + "epoch": 0.03846200462998501, + "grad_norm": 1225.3050537109375, + "learning_rate": 1.5232000000000003e-05, + "loss": 198.6186, + "step": 9520 + }, + { + "epoch": 0.03850240589535264, + "grad_norm": 4305.90625, + "learning_rate": 1.5248e-05, + "loss": 195.6211, + "step": 9530 + }, + { + "epoch": 0.038542807160720276, + "grad_norm": 1116.0740966796875, + "learning_rate": 1.5264e-05, + "loss": 135.6407, + "step": 9540 + }, + { + "epoch": 0.0385832084260879, + "grad_norm": 912.4844360351562, + "learning_rate": 1.5280000000000003e-05, + "loss": 239.9203, + "step": 9550 + }, + { + "epoch": 0.038623609691455535, + "grad_norm": 2883.529541015625, + "learning_rate": 1.5296e-05, + "loss": 193.9749, + "step": 9560 + }, + { + "epoch": 0.03866401095682317, + "grad_norm": 4977.65869140625, + "learning_rate": 1.5312000000000002e-05, + "loss": 202.0481, + "step": 9570 + }, + { + "epoch": 0.0387044122221908, + "grad_norm": 977.49853515625, + "learning_rate": 1.5328e-05, + "loss": 176.2566, + "step": 9580 + }, + { + "epoch": 0.03874481348755843, + "grad_norm": 683.21923828125, + "learning_rate": 1.5344e-05, + "loss": 146.2518, + "step": 9590 + }, + { + "epoch": 0.03878521475292606, + "grad_norm": 848.5620727539062, + "learning_rate": 1.5360000000000002e-05, + "loss": 196.1823, + "step": 9600 + }, + { + "epoch": 0.03882561601829369, + "grad_norm": 1314.88232421875, + "learning_rate": 1.5376000000000003e-05, + "loss": 156.7792, + "step": 9610 + }, + { + "epoch": 0.038866017283661326, + "grad_norm": 1219.1102294921875, + "learning_rate": 1.5392e-05, + "loss": 188.2459, + "step": 9620 + }, + { + "epoch": 0.03890641854902895, + "grad_norm": 20253.77734375, + "learning_rate": 1.5408000000000002e-05, + "loss": 278.1915, + "step": 9630 + }, + { + "epoch": 0.038946819814396585, + "grad_norm": 1241.70654296875, + "learning_rate": 1.5424e-05, + "loss": 182.1567, + "step": 9640 + }, + { + "epoch": 0.03898722107976422, + "grad_norm": 1074.184326171875, + "learning_rate": 1.544e-05, + "loss": 240.2341, + "step": 9650 + }, + { + "epoch": 0.03902762234513185, + "grad_norm": 2607.8154296875, + "learning_rate": 1.5456000000000002e-05, + "loss": 229.6473, + "step": 9660 + }, + { + "epoch": 0.03906802361049948, + "grad_norm": 1443.951171875, + "learning_rate": 1.5472e-05, + "loss": 171.401, + "step": 9670 + }, + { + "epoch": 0.03910842487586711, + "grad_norm": 954.8650512695312, + "learning_rate": 1.5488e-05, + "loss": 185.0721, + "step": 9680 + }, + { + "epoch": 0.039148826141234744, + "grad_norm": 1078.0, + "learning_rate": 1.5504000000000003e-05, + "loss": 193.9556, + "step": 9690 + }, + { + "epoch": 0.03918922740660238, + "grad_norm": 4547.8095703125, + "learning_rate": 1.552e-05, + "loss": 207.8449, + "step": 9700 + }, + { + "epoch": 0.03922962867197, + "grad_norm": 747.661865234375, + "learning_rate": 1.5536e-05, + "loss": 244.3708, + "step": 9710 + }, + { + "epoch": 0.039270029937337636, + "grad_norm": 2333.675537109375, + "learning_rate": 1.5552e-05, + "loss": 212.5508, + "step": 9720 + }, + { + "epoch": 0.03931043120270527, + "grad_norm": 1033.94482421875, + "learning_rate": 1.5568e-05, + "loss": 127.1845, + "step": 9730 + }, + { + "epoch": 0.0393508324680729, + "grad_norm": 846.934814453125, + "learning_rate": 1.5584000000000002e-05, + "loss": 195.1059, + "step": 9740 + }, + { + "epoch": 0.03939123373344053, + "grad_norm": 3039.0673828125, + "learning_rate": 1.5600000000000003e-05, + "loss": 233.4147, + "step": 9750 + }, + { + "epoch": 0.03943163499880816, + "grad_norm": 1813.5833740234375, + "learning_rate": 1.5616e-05, + "loss": 182.3817, + "step": 9760 + }, + { + "epoch": 0.039472036264175794, + "grad_norm": 1834.684326171875, + "learning_rate": 1.5632000000000002e-05, + "loss": 195.9725, + "step": 9770 + }, + { + "epoch": 0.03951243752954343, + "grad_norm": 874.73388671875, + "learning_rate": 1.5648e-05, + "loss": 134.0578, + "step": 9780 + }, + { + "epoch": 0.039552838794911054, + "grad_norm": 1855.9298095703125, + "learning_rate": 1.5664e-05, + "loss": 217.2446, + "step": 9790 + }, + { + "epoch": 0.03959324006027869, + "grad_norm": 1064.878173828125, + "learning_rate": 1.5680000000000002e-05, + "loss": 149.1444, + "step": 9800 + }, + { + "epoch": 0.03963364132564632, + "grad_norm": 1991.914306640625, + "learning_rate": 1.5696000000000004e-05, + "loss": 246.7844, + "step": 9810 + }, + { + "epoch": 0.03967404259101395, + "grad_norm": 683.0676879882812, + "learning_rate": 1.5712e-05, + "loss": 190.3318, + "step": 9820 + }, + { + "epoch": 0.03971444385638158, + "grad_norm": 580.6885375976562, + "learning_rate": 1.5728000000000003e-05, + "loss": 231.7323, + "step": 9830 + }, + { + "epoch": 0.03975484512174921, + "grad_norm": 4240.0634765625, + "learning_rate": 1.5744e-05, + "loss": 229.1006, + "step": 9840 + }, + { + "epoch": 0.039795246387116845, + "grad_norm": 1275.5081787109375, + "learning_rate": 1.576e-05, + "loss": 172.6093, + "step": 9850 + }, + { + "epoch": 0.03983564765248448, + "grad_norm": 1786.3184814453125, + "learning_rate": 1.5776e-05, + "loss": 233.943, + "step": 9860 + }, + { + "epoch": 0.039876048917852104, + "grad_norm": 1124.0838623046875, + "learning_rate": 1.5792e-05, + "loss": 203.0486, + "step": 9870 + }, + { + "epoch": 0.03991645018321974, + "grad_norm": 642.4536743164062, + "learning_rate": 1.5808000000000002e-05, + "loss": 147.0342, + "step": 9880 + }, + { + "epoch": 0.03995685144858737, + "grad_norm": 489.4228210449219, + "learning_rate": 1.5824000000000003e-05, + "loss": 192.0327, + "step": 9890 + }, + { + "epoch": 0.039997252713955, + "grad_norm": 885.2113037109375, + "learning_rate": 1.584e-05, + "loss": 200.9227, + "step": 9900 + }, + { + "epoch": 0.04003765397932263, + "grad_norm": 521.3983154296875, + "learning_rate": 1.5856e-05, + "loss": 283.9972, + "step": 9910 + }, + { + "epoch": 0.04007805524469026, + "grad_norm": 1437.3319091796875, + "learning_rate": 1.5872e-05, + "loss": 218.0835, + "step": 9920 + }, + { + "epoch": 0.040118456510057895, + "grad_norm": 1913.59619140625, + "learning_rate": 1.5888e-05, + "loss": 254.5269, + "step": 9930 + }, + { + "epoch": 0.04015885777542553, + "grad_norm": 896.2860107421875, + "learning_rate": 1.5904000000000002e-05, + "loss": 132.8344, + "step": 9940 + }, + { + "epoch": 0.040199259040793155, + "grad_norm": 2285.6181640625, + "learning_rate": 1.5920000000000003e-05, + "loss": 191.0284, + "step": 9950 + }, + { + "epoch": 0.04023966030616079, + "grad_norm": 938.1224975585938, + "learning_rate": 1.5936e-05, + "loss": 197.0898, + "step": 9960 + }, + { + "epoch": 0.04028006157152842, + "grad_norm": 1346.6063232421875, + "learning_rate": 1.5952000000000002e-05, + "loss": 216.3762, + "step": 9970 + }, + { + "epoch": 0.040320462836896054, + "grad_norm": 886.7606811523438, + "learning_rate": 1.5968e-05, + "loss": 197.1003, + "step": 9980 + }, + { + "epoch": 0.04036086410226368, + "grad_norm": 797.737060546875, + "learning_rate": 1.5984e-05, + "loss": 194.6938, + "step": 9990 + }, + { + "epoch": 0.04040126536763131, + "grad_norm": 917.8461303710938, + "learning_rate": 1.6000000000000003e-05, + "loss": 134.3666, + "step": 10000 + }, + { + "epoch": 0.040441666632998946, + "grad_norm": 568.01025390625, + "learning_rate": 1.6016e-05, + "loss": 160.5006, + "step": 10010 + }, + { + "epoch": 0.04048206789836658, + "grad_norm": 1355.7574462890625, + "learning_rate": 1.6032e-05, + "loss": 190.391, + "step": 10020 + }, + { + "epoch": 0.040522469163734205, + "grad_norm": 880.2090454101562, + "learning_rate": 1.6048000000000003e-05, + "loss": 218.8174, + "step": 10030 + }, + { + "epoch": 0.04056287042910184, + "grad_norm": 914.6061401367188, + "learning_rate": 1.6064e-05, + "loss": 182.8882, + "step": 10040 + }, + { + "epoch": 0.04060327169446947, + "grad_norm": 1641.039306640625, + "learning_rate": 1.6080000000000002e-05, + "loss": 145.9343, + "step": 10050 + }, + { + "epoch": 0.040643672959837104, + "grad_norm": 4381.224609375, + "learning_rate": 1.6096e-05, + "loss": 197.7235, + "step": 10060 + }, + { + "epoch": 0.04068407422520473, + "grad_norm": 1169.22607421875, + "learning_rate": 1.6112e-05, + "loss": 186.1444, + "step": 10070 + }, + { + "epoch": 0.040724475490572364, + "grad_norm": 1742.910400390625, + "learning_rate": 1.6128000000000002e-05, + "loss": 229.4802, + "step": 10080 + }, + { + "epoch": 0.04076487675594, + "grad_norm": 850.3650512695312, + "learning_rate": 1.6144000000000003e-05, + "loss": 182.2731, + "step": 10090 + }, + { + "epoch": 0.04080527802130763, + "grad_norm": 914.3860473632812, + "learning_rate": 1.616e-05, + "loss": 180.5242, + "step": 10100 + }, + { + "epoch": 0.040845679286675256, + "grad_norm": 1357.98388671875, + "learning_rate": 1.6176e-05, + "loss": 196.0637, + "step": 10110 + }, + { + "epoch": 0.04088608055204289, + "grad_norm": 1019.5401000976562, + "learning_rate": 1.6192e-05, + "loss": 170.7992, + "step": 10120 + }, + { + "epoch": 0.04092648181741052, + "grad_norm": 1082.9794921875, + "learning_rate": 1.6208e-05, + "loss": 194.5852, + "step": 10130 + }, + { + "epoch": 0.040966883082778155, + "grad_norm": 2204.076904296875, + "learning_rate": 1.6224000000000003e-05, + "loss": 199.9022, + "step": 10140 + }, + { + "epoch": 0.04100728434814578, + "grad_norm": 1093.7376708984375, + "learning_rate": 1.6240000000000004e-05, + "loss": 187.0371, + "step": 10150 + }, + { + "epoch": 0.041047685613513414, + "grad_norm": 2062.820068359375, + "learning_rate": 1.6256e-05, + "loss": 227.5364, + "step": 10160 + }, + { + "epoch": 0.04108808687888105, + "grad_norm": 5836.3212890625, + "learning_rate": 1.6272000000000003e-05, + "loss": 205.0402, + "step": 10170 + }, + { + "epoch": 0.04112848814424868, + "grad_norm": 722.3588256835938, + "learning_rate": 1.6288e-05, + "loss": 180.361, + "step": 10180 + }, + { + "epoch": 0.041168889409616306, + "grad_norm": 2181.044921875, + "learning_rate": 1.6304000000000002e-05, + "loss": 319.5832, + "step": 10190 + }, + { + "epoch": 0.04120929067498394, + "grad_norm": 848.5480346679688, + "learning_rate": 1.632e-05, + "loss": 164.5065, + "step": 10200 + }, + { + "epoch": 0.04124969194035157, + "grad_norm": 788.4448852539062, + "learning_rate": 1.6336e-05, + "loss": 171.2751, + "step": 10210 + }, + { + "epoch": 0.041290093205719205, + "grad_norm": 1239.2464599609375, + "learning_rate": 1.6352000000000002e-05, + "loss": 207.898, + "step": 10220 + }, + { + "epoch": 0.04133049447108683, + "grad_norm": 891.822021484375, + "learning_rate": 1.6368000000000003e-05, + "loss": 138.2572, + "step": 10230 + }, + { + "epoch": 0.041370895736454465, + "grad_norm": 1925.04248046875, + "learning_rate": 1.6384e-05, + "loss": 192.0386, + "step": 10240 + }, + { + "epoch": 0.0414112970018221, + "grad_norm": 1509.3814697265625, + "learning_rate": 1.64e-05, + "loss": 291.9804, + "step": 10250 + }, + { + "epoch": 0.04145169826718973, + "grad_norm": 873.1856689453125, + "learning_rate": 1.6416e-05, + "loss": 172.815, + "step": 10260 + }, + { + "epoch": 0.04149209953255736, + "grad_norm": 937.602783203125, + "learning_rate": 1.6432e-05, + "loss": 165.7745, + "step": 10270 + }, + { + "epoch": 0.04153250079792499, + "grad_norm": 1097.0872802734375, + "learning_rate": 1.6448000000000002e-05, + "loss": 213.3541, + "step": 10280 + }, + { + "epoch": 0.04157290206329262, + "grad_norm": 825.9088745117188, + "learning_rate": 1.6464000000000004e-05, + "loss": 123.3682, + "step": 10290 + }, + { + "epoch": 0.041613303328660256, + "grad_norm": 1783.4136962890625, + "learning_rate": 1.648e-05, + "loss": 212.5186, + "step": 10300 + }, + { + "epoch": 0.04165370459402788, + "grad_norm": 1184.926025390625, + "learning_rate": 1.6496e-05, + "loss": 204.5421, + "step": 10310 + }, + { + "epoch": 0.041694105859395515, + "grad_norm": 992.3025512695312, + "learning_rate": 1.6512e-05, + "loss": 233.5088, + "step": 10320 + }, + { + "epoch": 0.04173450712476315, + "grad_norm": 666.0808715820312, + "learning_rate": 1.6528e-05, + "loss": 142.7806, + "step": 10330 + }, + { + "epoch": 0.04177490839013078, + "grad_norm": 1052.9947509765625, + "learning_rate": 1.6544000000000003e-05, + "loss": 145.6399, + "step": 10340 + }, + { + "epoch": 0.04181530965549841, + "grad_norm": 4691.17822265625, + "learning_rate": 1.656e-05, + "loss": 159.7967, + "step": 10350 + }, + { + "epoch": 0.04185571092086604, + "grad_norm": 1791.358154296875, + "learning_rate": 1.6576000000000002e-05, + "loss": 205.874, + "step": 10360 + }, + { + "epoch": 0.041896112186233674, + "grad_norm": 2564.51953125, + "learning_rate": 1.6592000000000003e-05, + "loss": 270.0561, + "step": 10370 + }, + { + "epoch": 0.04193651345160131, + "grad_norm": 798.0155639648438, + "learning_rate": 1.6608e-05, + "loss": 234.4749, + "step": 10380 + }, + { + "epoch": 0.04197691471696893, + "grad_norm": 430.9671325683594, + "learning_rate": 1.6624000000000002e-05, + "loss": 149.8516, + "step": 10390 + }, + { + "epoch": 0.042017315982336566, + "grad_norm": 898.3456420898438, + "learning_rate": 1.664e-05, + "loss": 207.0033, + "step": 10400 + }, + { + "epoch": 0.0420577172477042, + "grad_norm": 875.125244140625, + "learning_rate": 1.6656e-05, + "loss": 174.8937, + "step": 10410 + }, + { + "epoch": 0.04209811851307183, + "grad_norm": 2299.7353515625, + "learning_rate": 1.6672000000000002e-05, + "loss": 171.9774, + "step": 10420 + }, + { + "epoch": 0.04213851977843946, + "grad_norm": 1638.010498046875, + "learning_rate": 1.6688000000000004e-05, + "loss": 192.5341, + "step": 10430 + }, + { + "epoch": 0.04217892104380709, + "grad_norm": 1130.6561279296875, + "learning_rate": 1.6704e-05, + "loss": 227.2423, + "step": 10440 + }, + { + "epoch": 0.042219322309174724, + "grad_norm": 1451.8306884765625, + "learning_rate": 1.672e-05, + "loss": 202.0446, + "step": 10450 + }, + { + "epoch": 0.04225972357454236, + "grad_norm": 2114.2548828125, + "learning_rate": 1.6736e-05, + "loss": 165.5462, + "step": 10460 + }, + { + "epoch": 0.04230012483990998, + "grad_norm": 1656.0751953125, + "learning_rate": 1.6752e-05, + "loss": 125.0506, + "step": 10470 + }, + { + "epoch": 0.042340526105277616, + "grad_norm": 1022.1192626953125, + "learning_rate": 1.6768000000000003e-05, + "loss": 145.8449, + "step": 10480 + }, + { + "epoch": 0.04238092737064525, + "grad_norm": 1345.9271240234375, + "learning_rate": 1.6784e-05, + "loss": 189.086, + "step": 10490 + }, + { + "epoch": 0.04242132863601288, + "grad_norm": 1947.4927978515625, + "learning_rate": 1.6800000000000002e-05, + "loss": 200.4193, + "step": 10500 + }, + { + "epoch": 0.04246172990138051, + "grad_norm": 739.9483032226562, + "learning_rate": 1.6816e-05, + "loss": 163.9967, + "step": 10510 + }, + { + "epoch": 0.04250213116674814, + "grad_norm": 1815.6961669921875, + "learning_rate": 1.6832e-05, + "loss": 186.5023, + "step": 10520 + }, + { + "epoch": 0.042542532432115775, + "grad_norm": 725.1267700195312, + "learning_rate": 1.6848000000000002e-05, + "loss": 181.0342, + "step": 10530 + }, + { + "epoch": 0.04258293369748341, + "grad_norm": 767.8426513671875, + "learning_rate": 1.6864e-05, + "loss": 169.7085, + "step": 10540 + }, + { + "epoch": 0.042623334962851034, + "grad_norm": 542.227783203125, + "learning_rate": 1.688e-05, + "loss": 131.3597, + "step": 10550 + }, + { + "epoch": 0.04266373622821867, + "grad_norm": 2249.716796875, + "learning_rate": 1.6896000000000002e-05, + "loss": 176.4151, + "step": 10560 + }, + { + "epoch": 0.0427041374935863, + "grad_norm": 1699.3236083984375, + "learning_rate": 1.6912000000000003e-05, + "loss": 158.2107, + "step": 10570 + }, + { + "epoch": 0.04274453875895393, + "grad_norm": 1464.567138671875, + "learning_rate": 1.6928e-05, + "loss": 191.7402, + "step": 10580 + }, + { + "epoch": 0.04278494002432156, + "grad_norm": 2373.8994140625, + "learning_rate": 1.6944e-05, + "loss": 189.2257, + "step": 10590 + }, + { + "epoch": 0.04282534128968919, + "grad_norm": 2877.680908203125, + "learning_rate": 1.696e-05, + "loss": 165.4342, + "step": 10600 + }, + { + "epoch": 0.042865742555056825, + "grad_norm": 1328.87109375, + "learning_rate": 1.6976e-05, + "loss": 225.7768, + "step": 10610 + }, + { + "epoch": 0.04290614382042446, + "grad_norm": 1556.5284423828125, + "learning_rate": 1.6992000000000003e-05, + "loss": 235.6774, + "step": 10620 + }, + { + "epoch": 0.042946545085792084, + "grad_norm": 2441.867919921875, + "learning_rate": 1.7008000000000004e-05, + "loss": 161.8504, + "step": 10630 + }, + { + "epoch": 0.04298694635115972, + "grad_norm": 1901.5953369140625, + "learning_rate": 1.7024e-05, + "loss": 202.435, + "step": 10640 + }, + { + "epoch": 0.04302734761652735, + "grad_norm": 1466.30126953125, + "learning_rate": 1.704e-05, + "loss": 192.916, + "step": 10650 + }, + { + "epoch": 0.043067748881894984, + "grad_norm": 1073.0615234375, + "learning_rate": 1.7056e-05, + "loss": 211.4328, + "step": 10660 + }, + { + "epoch": 0.04310815014726261, + "grad_norm": 902.869384765625, + "learning_rate": 1.7072000000000002e-05, + "loss": 164.3338, + "step": 10670 + }, + { + "epoch": 0.04314855141263024, + "grad_norm": 898.3487548828125, + "learning_rate": 1.7088000000000003e-05, + "loss": 168.7474, + "step": 10680 + }, + { + "epoch": 0.043188952677997876, + "grad_norm": 857.4998779296875, + "learning_rate": 1.7104e-05, + "loss": 219.368, + "step": 10690 + }, + { + "epoch": 0.04322935394336551, + "grad_norm": 1758.626708984375, + "learning_rate": 1.7120000000000002e-05, + "loss": 250.1015, + "step": 10700 + }, + { + "epoch": 0.043269755208733135, + "grad_norm": 602.1392211914062, + "learning_rate": 1.7136e-05, + "loss": 200.5211, + "step": 10710 + }, + { + "epoch": 0.04331015647410077, + "grad_norm": 1285.5726318359375, + "learning_rate": 1.7152e-05, + "loss": 242.9574, + "step": 10720 + }, + { + "epoch": 0.0433505577394684, + "grad_norm": 2100.295166015625, + "learning_rate": 1.7168000000000002e-05, + "loss": 219.7286, + "step": 10730 + }, + { + "epoch": 0.043390959004836034, + "grad_norm": 1247.1678466796875, + "learning_rate": 1.7184e-05, + "loss": 158.9266, + "step": 10740 + }, + { + "epoch": 0.04343136027020366, + "grad_norm": 624.0576782226562, + "learning_rate": 1.72e-05, + "loss": 220.0825, + "step": 10750 + }, + { + "epoch": 0.04347176153557129, + "grad_norm": 12828.1884765625, + "learning_rate": 1.7216000000000003e-05, + "loss": 263.4318, + "step": 10760 + }, + { + "epoch": 0.043512162800938926, + "grad_norm": 957.2345581054688, + "learning_rate": 1.7232000000000004e-05, + "loss": 198.2909, + "step": 10770 + }, + { + "epoch": 0.04355256406630656, + "grad_norm": 1323.8814697265625, + "learning_rate": 1.7248e-05, + "loss": 170.8916, + "step": 10780 + }, + { + "epoch": 0.043592965331674186, + "grad_norm": 1541.7998046875, + "learning_rate": 1.7264e-05, + "loss": 187.1392, + "step": 10790 + }, + { + "epoch": 0.04363336659704182, + "grad_norm": 2205.041015625, + "learning_rate": 1.728e-05, + "loss": 179.3903, + "step": 10800 + }, + { + "epoch": 0.04367376786240945, + "grad_norm": 1271.7928466796875, + "learning_rate": 1.7296000000000002e-05, + "loss": 277.045, + "step": 10810 + }, + { + "epoch": 0.043714169127777085, + "grad_norm": 1563.8782958984375, + "learning_rate": 1.7312000000000003e-05, + "loss": 207.0989, + "step": 10820 + }, + { + "epoch": 0.04375457039314471, + "grad_norm": 757.2264404296875, + "learning_rate": 1.7328e-05, + "loss": 219.7403, + "step": 10830 + }, + { + "epoch": 0.043794971658512344, + "grad_norm": 1248.6348876953125, + "learning_rate": 1.7344000000000002e-05, + "loss": 235.4911, + "step": 10840 + }, + { + "epoch": 0.04383537292387998, + "grad_norm": 1227.9400634765625, + "learning_rate": 1.736e-05, + "loss": 227.7675, + "step": 10850 + }, + { + "epoch": 0.04387577418924761, + "grad_norm": 1044.2037353515625, + "learning_rate": 1.7376e-05, + "loss": 172.205, + "step": 10860 + }, + { + "epoch": 0.043916175454615236, + "grad_norm": 1646.6480712890625, + "learning_rate": 1.7392000000000002e-05, + "loss": 214.103, + "step": 10870 + }, + { + "epoch": 0.04395657671998287, + "grad_norm": 1624.4903564453125, + "learning_rate": 1.7408e-05, + "loss": 206.3534, + "step": 10880 + }, + { + "epoch": 0.0439969779853505, + "grad_norm": 1734.4029541015625, + "learning_rate": 1.7424e-05, + "loss": 211.9735, + "step": 10890 + }, + { + "epoch": 0.044037379250718135, + "grad_norm": 1137.95849609375, + "learning_rate": 1.7440000000000002e-05, + "loss": 175.277, + "step": 10900 + }, + { + "epoch": 0.04407778051608576, + "grad_norm": 1093.4541015625, + "learning_rate": 1.7456e-05, + "loss": 133.2124, + "step": 10910 + }, + { + "epoch": 0.044118181781453394, + "grad_norm": 822.2950439453125, + "learning_rate": 1.7472e-05, + "loss": 151.2747, + "step": 10920 + }, + { + "epoch": 0.04415858304682103, + "grad_norm": 1287.9150390625, + "learning_rate": 1.7488e-05, + "loss": 163.0298, + "step": 10930 + }, + { + "epoch": 0.04419898431218866, + "grad_norm": 1741.5506591796875, + "learning_rate": 1.7504e-05, + "loss": 187.158, + "step": 10940 + }, + { + "epoch": 0.04423938557755629, + "grad_norm": 895.549072265625, + "learning_rate": 1.752e-05, + "loss": 146.7228, + "step": 10950 + }, + { + "epoch": 0.04427978684292392, + "grad_norm": 1032.099609375, + "learning_rate": 1.7536000000000003e-05, + "loss": 123.5247, + "step": 10960 + }, + { + "epoch": 0.04432018810829155, + "grad_norm": 310.9967956542969, + "learning_rate": 1.7552e-05, + "loss": 155.5804, + "step": 10970 + }, + { + "epoch": 0.044360589373659186, + "grad_norm": 4743.1220703125, + "learning_rate": 1.7568000000000002e-05, + "loss": 261.0576, + "step": 10980 + }, + { + "epoch": 0.04440099063902681, + "grad_norm": 974.1761474609375, + "learning_rate": 1.7584e-05, + "loss": 342.8138, + "step": 10990 + }, + { + "epoch": 0.044441391904394445, + "grad_norm": 4220.9404296875, + "learning_rate": 1.76e-05, + "loss": 191.6163, + "step": 11000 + }, + { + "epoch": 0.04448179316976208, + "grad_norm": 3995.680419921875, + "learning_rate": 1.7616000000000002e-05, + "loss": 225.9993, + "step": 11010 + }, + { + "epoch": 0.04452219443512971, + "grad_norm": 1169.3814697265625, + "learning_rate": 1.7632000000000003e-05, + "loss": 177.0123, + "step": 11020 + }, + { + "epoch": 0.04456259570049734, + "grad_norm": 1551.4967041015625, + "learning_rate": 1.7648e-05, + "loss": 200.4899, + "step": 11030 + }, + { + "epoch": 0.04460299696586497, + "grad_norm": 785.3912963867188, + "learning_rate": 1.7664000000000002e-05, + "loss": 198.2486, + "step": 11040 + }, + { + "epoch": 0.0446433982312326, + "grad_norm": 486.1355285644531, + "learning_rate": 1.768e-05, + "loss": 158.5898, + "step": 11050 + }, + { + "epoch": 0.044683799496600236, + "grad_norm": 855.1481323242188, + "learning_rate": 1.7696e-05, + "loss": 130.4719, + "step": 11060 + }, + { + "epoch": 0.04472420076196786, + "grad_norm": 862.826416015625, + "learning_rate": 1.7712000000000003e-05, + "loss": 136.3901, + "step": 11070 + }, + { + "epoch": 0.044764602027335496, + "grad_norm": 1168.069580078125, + "learning_rate": 1.7728e-05, + "loss": 171.204, + "step": 11080 + }, + { + "epoch": 0.04480500329270313, + "grad_norm": 1272.8150634765625, + "learning_rate": 1.7744e-05, + "loss": 143.0124, + "step": 11090 + }, + { + "epoch": 0.04484540455807076, + "grad_norm": 1238.567626953125, + "learning_rate": 1.7760000000000003e-05, + "loss": 233.3269, + "step": 11100 + }, + { + "epoch": 0.04488580582343839, + "grad_norm": 2443.775146484375, + "learning_rate": 1.7776e-05, + "loss": 189.0202, + "step": 11110 + }, + { + "epoch": 0.04492620708880602, + "grad_norm": 1311.806396484375, + "learning_rate": 1.7792000000000002e-05, + "loss": 180.7428, + "step": 11120 + }, + { + "epoch": 0.044966608354173654, + "grad_norm": 1038.2191162109375, + "learning_rate": 1.7808e-05, + "loss": 104.5157, + "step": 11130 + }, + { + "epoch": 0.04500700961954129, + "grad_norm": 2097.45361328125, + "learning_rate": 1.7824e-05, + "loss": 205.8537, + "step": 11140 + }, + { + "epoch": 0.04504741088490891, + "grad_norm": 901.9558715820312, + "learning_rate": 1.7840000000000002e-05, + "loss": 217.5247, + "step": 11150 + }, + { + "epoch": 0.045087812150276546, + "grad_norm": 1766.9412841796875, + "learning_rate": 1.7856000000000003e-05, + "loss": 323.4518, + "step": 11160 + }, + { + "epoch": 0.04512821341564418, + "grad_norm": 6102.6259765625, + "learning_rate": 1.7872e-05, + "loss": 158.3255, + "step": 11170 + }, + { + "epoch": 0.04516861468101181, + "grad_norm": 1260.8720703125, + "learning_rate": 1.7888000000000002e-05, + "loss": 230.2031, + "step": 11180 + }, + { + "epoch": 0.04520901594637944, + "grad_norm": 3071.31640625, + "learning_rate": 1.7904e-05, + "loss": 180.546, + "step": 11190 + }, + { + "epoch": 0.04524941721174707, + "grad_norm": 2505.195068359375, + "learning_rate": 1.792e-05, + "loss": 283.195, + "step": 11200 + }, + { + "epoch": 0.045289818477114704, + "grad_norm": 1028.5045166015625, + "learning_rate": 1.7936000000000002e-05, + "loss": 175.4311, + "step": 11210 + }, + { + "epoch": 0.04533021974248234, + "grad_norm": 1085.4832763671875, + "learning_rate": 1.7952e-05, + "loss": 207.6404, + "step": 11220 + }, + { + "epoch": 0.045370621007849964, + "grad_norm": 1391.8634033203125, + "learning_rate": 1.7968e-05, + "loss": 173.6953, + "step": 11230 + }, + { + "epoch": 0.0454110222732176, + "grad_norm": 1743.35400390625, + "learning_rate": 1.7984000000000003e-05, + "loss": 275.3905, + "step": 11240 + }, + { + "epoch": 0.04545142353858523, + "grad_norm": 614.23095703125, + "learning_rate": 1.8e-05, + "loss": 150.855, + "step": 11250 + }, + { + "epoch": 0.04549182480395286, + "grad_norm": 1272.5782470703125, + "learning_rate": 1.8016e-05, + "loss": 197.9765, + "step": 11260 + }, + { + "epoch": 0.04553222606932049, + "grad_norm": 1688.277099609375, + "learning_rate": 1.8032e-05, + "loss": 187.209, + "step": 11270 + }, + { + "epoch": 0.04557262733468812, + "grad_norm": 1177.5115966796875, + "learning_rate": 1.8048e-05, + "loss": 221.8554, + "step": 11280 + }, + { + "epoch": 0.045613028600055755, + "grad_norm": 3072.171142578125, + "learning_rate": 1.8064000000000002e-05, + "loss": 246.0302, + "step": 11290 + }, + { + "epoch": 0.04565342986542339, + "grad_norm": 1316.9097900390625, + "learning_rate": 1.8080000000000003e-05, + "loss": 166.4383, + "step": 11300 + }, + { + "epoch": 0.045693831130791014, + "grad_norm": 781.8187255859375, + "learning_rate": 1.8096e-05, + "loss": 193.5794, + "step": 11310 + }, + { + "epoch": 0.04573423239615865, + "grad_norm": 1052.5584716796875, + "learning_rate": 1.8112000000000002e-05, + "loss": 203.9555, + "step": 11320 + }, + { + "epoch": 0.04577463366152628, + "grad_norm": 5756.6005859375, + "learning_rate": 1.8128e-05, + "loss": 235.0596, + "step": 11330 + }, + { + "epoch": 0.04581503492689391, + "grad_norm": 1227.9710693359375, + "learning_rate": 1.8144e-05, + "loss": 195.1405, + "step": 11340 + }, + { + "epoch": 0.04585543619226154, + "grad_norm": 1084.5941162109375, + "learning_rate": 1.8160000000000002e-05, + "loss": 196.6385, + "step": 11350 + }, + { + "epoch": 0.04589583745762917, + "grad_norm": 1819.8359375, + "learning_rate": 1.8176000000000004e-05, + "loss": 228.4734, + "step": 11360 + }, + { + "epoch": 0.045936238722996806, + "grad_norm": 1329.8941650390625, + "learning_rate": 1.8192e-05, + "loss": 155.7496, + "step": 11370 + }, + { + "epoch": 0.04597663998836444, + "grad_norm": 642.3541259765625, + "learning_rate": 1.8208000000000003e-05, + "loss": 173.2344, + "step": 11380 + }, + { + "epoch": 0.046017041253732065, + "grad_norm": 763.05224609375, + "learning_rate": 1.8224e-05, + "loss": 216.9143, + "step": 11390 + }, + { + "epoch": 0.0460574425190997, + "grad_norm": 881.7998657226562, + "learning_rate": 1.824e-05, + "loss": 167.4916, + "step": 11400 + }, + { + "epoch": 0.04609784378446733, + "grad_norm": 1753.553955078125, + "learning_rate": 1.8256e-05, + "loss": 151.0477, + "step": 11410 + }, + { + "epoch": 0.046138245049834964, + "grad_norm": 2091.069580078125, + "learning_rate": 1.8272e-05, + "loss": 194.0207, + "step": 11420 + }, + { + "epoch": 0.04617864631520259, + "grad_norm": 1622.05908203125, + "learning_rate": 1.8288000000000002e-05, + "loss": 178.2394, + "step": 11430 + }, + { + "epoch": 0.04621904758057022, + "grad_norm": 1063.04296875, + "learning_rate": 1.8304000000000003e-05, + "loss": 195.1976, + "step": 11440 + }, + { + "epoch": 0.046259448845937856, + "grad_norm": 2237.779296875, + "learning_rate": 1.832e-05, + "loss": 175.5735, + "step": 11450 + }, + { + "epoch": 0.04629985011130549, + "grad_norm": 1211.247802734375, + "learning_rate": 1.8336000000000002e-05, + "loss": 222.7697, + "step": 11460 + }, + { + "epoch": 0.046340251376673115, + "grad_norm": 1457.9163818359375, + "learning_rate": 1.8352e-05, + "loss": 178.4207, + "step": 11470 + }, + { + "epoch": 0.04638065264204075, + "grad_norm": 1436.1390380859375, + "learning_rate": 1.8368e-05, + "loss": 227.2467, + "step": 11480 + }, + { + "epoch": 0.04642105390740838, + "grad_norm": 1160.2974853515625, + "learning_rate": 1.8384000000000002e-05, + "loss": 180.2748, + "step": 11490 + }, + { + "epoch": 0.046461455172776014, + "grad_norm": 970.9029541015625, + "learning_rate": 1.8400000000000003e-05, + "loss": 179.3989, + "step": 11500 + }, + { + "epoch": 0.04650185643814364, + "grad_norm": 722.5786743164062, + "learning_rate": 1.8416e-05, + "loss": 179.3075, + "step": 11510 + }, + { + "epoch": 0.046542257703511274, + "grad_norm": 511.67254638671875, + "learning_rate": 1.8432000000000002e-05, + "loss": 173.6855, + "step": 11520 + }, + { + "epoch": 0.04658265896887891, + "grad_norm": 851.8318481445312, + "learning_rate": 1.8448e-05, + "loss": 154.7247, + "step": 11530 + }, + { + "epoch": 0.04662306023424654, + "grad_norm": 1083.90576171875, + "learning_rate": 1.8464e-05, + "loss": 219.9375, + "step": 11540 + }, + { + "epoch": 0.046663461499614166, + "grad_norm": 1282.5338134765625, + "learning_rate": 1.8480000000000003e-05, + "loss": 217.2527, + "step": 11550 + }, + { + "epoch": 0.0467038627649818, + "grad_norm": 1417.5146484375, + "learning_rate": 1.8496e-05, + "loss": 267.0972, + "step": 11560 + }, + { + "epoch": 0.04674426403034943, + "grad_norm": 1039.891845703125, + "learning_rate": 1.8512e-05, + "loss": 214.6447, + "step": 11570 + }, + { + "epoch": 0.046784665295717065, + "grad_norm": 758.0857543945312, + "learning_rate": 1.8528000000000003e-05, + "loss": 110.7705, + "step": 11580 + }, + { + "epoch": 0.04682506656108469, + "grad_norm": 1202.7984619140625, + "learning_rate": 1.8544e-05, + "loss": 170.7587, + "step": 11590 + }, + { + "epoch": 0.046865467826452324, + "grad_norm": 1095.6929931640625, + "learning_rate": 1.8560000000000002e-05, + "loss": 149.9107, + "step": 11600 + }, + { + "epoch": 0.04690586909181996, + "grad_norm": 1829.067138671875, + "learning_rate": 1.8576e-05, + "loss": 111.017, + "step": 11610 + }, + { + "epoch": 0.04694627035718759, + "grad_norm": 1352.06005859375, + "learning_rate": 1.8592e-05, + "loss": 174.0706, + "step": 11620 + }, + { + "epoch": 0.046986671622555216, + "grad_norm": 1412.03173828125, + "learning_rate": 1.8608000000000002e-05, + "loss": 221.9794, + "step": 11630 + }, + { + "epoch": 0.04702707288792285, + "grad_norm": 863.0822143554688, + "learning_rate": 1.8624000000000003e-05, + "loss": 192.9344, + "step": 11640 + }, + { + "epoch": 0.04706747415329048, + "grad_norm": 1331.062255859375, + "learning_rate": 1.864e-05, + "loss": 192.0491, + "step": 11650 + }, + { + "epoch": 0.047107875418658116, + "grad_norm": 772.4974365234375, + "learning_rate": 1.8656000000000002e-05, + "loss": 105.1091, + "step": 11660 + }, + { + "epoch": 0.04714827668402574, + "grad_norm": 677.456787109375, + "learning_rate": 1.8672e-05, + "loss": 175.4434, + "step": 11670 + }, + { + "epoch": 0.047188677949393375, + "grad_norm": 1157.3990478515625, + "learning_rate": 1.8688e-05, + "loss": 275.8079, + "step": 11680 + }, + { + "epoch": 0.04722907921476101, + "grad_norm": 2144.724609375, + "learning_rate": 1.8704000000000003e-05, + "loss": 278.5781, + "step": 11690 + }, + { + "epoch": 0.04726948048012864, + "grad_norm": 1037.315673828125, + "learning_rate": 1.8720000000000004e-05, + "loss": 166.4819, + "step": 11700 + }, + { + "epoch": 0.04730988174549627, + "grad_norm": 1168.1617431640625, + "learning_rate": 1.8736e-05, + "loss": 144.9721, + "step": 11710 + }, + { + "epoch": 0.0473502830108639, + "grad_norm": 1252.8349609375, + "learning_rate": 1.8752000000000003e-05, + "loss": 169.4472, + "step": 11720 + }, + { + "epoch": 0.04739068427623153, + "grad_norm": 751.3773803710938, + "learning_rate": 1.8768e-05, + "loss": 177.1455, + "step": 11730 + }, + { + "epoch": 0.047431085541599166, + "grad_norm": 1462.7012939453125, + "learning_rate": 1.8784000000000002e-05, + "loss": 157.9597, + "step": 11740 + }, + { + "epoch": 0.04747148680696679, + "grad_norm": 1613.37890625, + "learning_rate": 1.88e-05, + "loss": 259.0997, + "step": 11750 + }, + { + "epoch": 0.047511888072334425, + "grad_norm": 802.1982421875, + "learning_rate": 1.8816e-05, + "loss": 122.4745, + "step": 11760 + }, + { + "epoch": 0.04755228933770206, + "grad_norm": 2038.0235595703125, + "learning_rate": 1.8832000000000002e-05, + "loss": 196.6302, + "step": 11770 + }, + { + "epoch": 0.04759269060306969, + "grad_norm": 1135.935546875, + "learning_rate": 1.8848000000000003e-05, + "loss": 147.5192, + "step": 11780 + }, + { + "epoch": 0.04763309186843732, + "grad_norm": 1667.1392822265625, + "learning_rate": 1.8864e-05, + "loss": 173.6265, + "step": 11790 + }, + { + "epoch": 0.04767349313380495, + "grad_norm": 619.2572631835938, + "learning_rate": 1.8880000000000002e-05, + "loss": 181.9072, + "step": 11800 + }, + { + "epoch": 0.047713894399172584, + "grad_norm": 527.74609375, + "learning_rate": 1.8896e-05, + "loss": 193.8452, + "step": 11810 + }, + { + "epoch": 0.04775429566454022, + "grad_norm": 1670.20703125, + "learning_rate": 1.8912e-05, + "loss": 173.6703, + "step": 11820 + }, + { + "epoch": 0.04779469692990784, + "grad_norm": 1012.37744140625, + "learning_rate": 1.8928000000000002e-05, + "loss": 188.7732, + "step": 11830 + }, + { + "epoch": 0.047835098195275476, + "grad_norm": 1409.4901123046875, + "learning_rate": 1.8944000000000004e-05, + "loss": 182.8402, + "step": 11840 + }, + { + "epoch": 0.04787549946064311, + "grad_norm": 1092.1893310546875, + "learning_rate": 1.896e-05, + "loss": 181.5261, + "step": 11850 + }, + { + "epoch": 0.04791590072601074, + "grad_norm": 1987.2816162109375, + "learning_rate": 1.8976000000000003e-05, + "loss": 163.3646, + "step": 11860 + }, + { + "epoch": 0.04795630199137837, + "grad_norm": 1601.680419921875, + "learning_rate": 1.8992e-05, + "loss": 193.0259, + "step": 11870 + }, + { + "epoch": 0.047996703256746, + "grad_norm": 1483.77294921875, + "learning_rate": 1.9008e-05, + "loss": 229.6702, + "step": 11880 + }, + { + "epoch": 0.048037104522113634, + "grad_norm": 1122.0908203125, + "learning_rate": 1.9024000000000003e-05, + "loss": 215.6896, + "step": 11890 + }, + { + "epoch": 0.04807750578748127, + "grad_norm": 6117.84326171875, + "learning_rate": 1.904e-05, + "loss": 217.3147, + "step": 11900 + }, + { + "epoch": 0.04811790705284889, + "grad_norm": 1037.3994140625, + "learning_rate": 1.9056000000000002e-05, + "loss": 152.7146, + "step": 11910 + }, + { + "epoch": 0.048158308318216526, + "grad_norm": 1529.576904296875, + "learning_rate": 1.9072000000000003e-05, + "loss": 149.3212, + "step": 11920 + }, + { + "epoch": 0.04819870958358416, + "grad_norm": 1108.2249755859375, + "learning_rate": 1.9088e-05, + "loss": 163.1594, + "step": 11930 + }, + { + "epoch": 0.04823911084895179, + "grad_norm": 1377.23486328125, + "learning_rate": 1.9104000000000002e-05, + "loss": 203.4149, + "step": 11940 + }, + { + "epoch": 0.04827951211431942, + "grad_norm": 1377.9066162109375, + "learning_rate": 1.912e-05, + "loss": 243.0216, + "step": 11950 + }, + { + "epoch": 0.04831991337968705, + "grad_norm": 922.9132080078125, + "learning_rate": 1.9136e-05, + "loss": 177.7692, + "step": 11960 + }, + { + "epoch": 0.048360314645054685, + "grad_norm": 654.232421875, + "learning_rate": 1.9152000000000002e-05, + "loss": 124.7906, + "step": 11970 + }, + { + "epoch": 0.04840071591042232, + "grad_norm": 790.6004028320312, + "learning_rate": 1.9168000000000004e-05, + "loss": 186.0344, + "step": 11980 + }, + { + "epoch": 0.048441117175789944, + "grad_norm": 1846.2784423828125, + "learning_rate": 1.9184e-05, + "loss": 237.3549, + "step": 11990 + }, + { + "epoch": 0.04848151844115758, + "grad_norm": 570.1096801757812, + "learning_rate": 1.9200000000000003e-05, + "loss": 194.0104, + "step": 12000 + }, + { + "epoch": 0.04852191970652521, + "grad_norm": 937.447265625, + "learning_rate": 1.9216e-05, + "loss": 178.9326, + "step": 12010 + }, + { + "epoch": 0.04856232097189284, + "grad_norm": 1087.5257568359375, + "learning_rate": 1.9232e-05, + "loss": 178.5785, + "step": 12020 + }, + { + "epoch": 0.04860272223726047, + "grad_norm": 956.5505981445312, + "learning_rate": 1.9248000000000003e-05, + "loss": 227.7713, + "step": 12030 + }, + { + "epoch": 0.0486431235026281, + "grad_norm": 2038.4173583984375, + "learning_rate": 1.9264e-05, + "loss": 238.1603, + "step": 12040 + }, + { + "epoch": 0.048683524767995735, + "grad_norm": 531.2886962890625, + "learning_rate": 1.9280000000000002e-05, + "loss": 181.6406, + "step": 12050 + }, + { + "epoch": 0.04872392603336337, + "grad_norm": 1353.986083984375, + "learning_rate": 1.9296000000000003e-05, + "loss": 174.8207, + "step": 12060 + }, + { + "epoch": 0.048764327298730994, + "grad_norm": 1425.5430908203125, + "learning_rate": 1.9312e-05, + "loss": 173.2292, + "step": 12070 + }, + { + "epoch": 0.04880472856409863, + "grad_norm": 2490.211669921875, + "learning_rate": 1.9328000000000002e-05, + "loss": 143.7661, + "step": 12080 + }, + { + "epoch": 0.04884512982946626, + "grad_norm": 1828.46826171875, + "learning_rate": 1.9344e-05, + "loss": 269.7314, + "step": 12090 + }, + { + "epoch": 0.048885531094833894, + "grad_norm": 1306.793212890625, + "learning_rate": 1.936e-05, + "loss": 235.9968, + "step": 12100 + }, + { + "epoch": 0.04892593236020152, + "grad_norm": 4388.6669921875, + "learning_rate": 1.9376000000000002e-05, + "loss": 238.4837, + "step": 12110 + }, + { + "epoch": 0.04896633362556915, + "grad_norm": 1477.3648681640625, + "learning_rate": 1.9392000000000003e-05, + "loss": 227.0377, + "step": 12120 + }, + { + "epoch": 0.049006734890936786, + "grad_norm": 1481.6533203125, + "learning_rate": 1.9408e-05, + "loss": 182.3262, + "step": 12130 + }, + { + "epoch": 0.04904713615630442, + "grad_norm": 844.3226318359375, + "learning_rate": 1.9424e-05, + "loss": 191.5139, + "step": 12140 + }, + { + "epoch": 0.049087537421672045, + "grad_norm": 1366.0286865234375, + "learning_rate": 1.944e-05, + "loss": 208.0362, + "step": 12150 + }, + { + "epoch": 0.04912793868703968, + "grad_norm": 3024.863525390625, + "learning_rate": 1.9456e-05, + "loss": 233.3925, + "step": 12160 + }, + { + "epoch": 0.04916833995240731, + "grad_norm": 2226.45556640625, + "learning_rate": 1.9472000000000003e-05, + "loss": 224.0713, + "step": 12170 + }, + { + "epoch": 0.04920874121777494, + "grad_norm": 1692.7799072265625, + "learning_rate": 1.9488000000000004e-05, + "loss": 215.6674, + "step": 12180 + }, + { + "epoch": 0.04924914248314257, + "grad_norm": 2897.811767578125, + "learning_rate": 1.9504e-05, + "loss": 213.6838, + "step": 12190 + }, + { + "epoch": 0.0492895437485102, + "grad_norm": 1696.8310546875, + "learning_rate": 1.9520000000000003e-05, + "loss": 150.5408, + "step": 12200 + }, + { + "epoch": 0.049329945013877836, + "grad_norm": 1064.9481201171875, + "learning_rate": 1.9536e-05, + "loss": 138.7587, + "step": 12210 + }, + { + "epoch": 0.04937034627924546, + "grad_norm": 1600.6160888671875, + "learning_rate": 1.9552000000000002e-05, + "loss": 157.3246, + "step": 12220 + }, + { + "epoch": 0.049410747544613096, + "grad_norm": 1261.3167724609375, + "learning_rate": 1.9568000000000003e-05, + "loss": 221.7355, + "step": 12230 + }, + { + "epoch": 0.04945114880998073, + "grad_norm": 975.0956420898438, + "learning_rate": 1.9584e-05, + "loss": 224.1319, + "step": 12240 + }, + { + "epoch": 0.04949155007534836, + "grad_norm": 5199.57470703125, + "learning_rate": 1.9600000000000002e-05, + "loss": 210.9458, + "step": 12250 + }, + { + "epoch": 0.04953195134071599, + "grad_norm": 698.9437866210938, + "learning_rate": 1.9616000000000003e-05, + "loss": 245.2341, + "step": 12260 + }, + { + "epoch": 0.04957235260608362, + "grad_norm": 1187.3856201171875, + "learning_rate": 1.9632e-05, + "loss": 154.5711, + "step": 12270 + }, + { + "epoch": 0.049612753871451254, + "grad_norm": 1397.740234375, + "learning_rate": 1.9648000000000002e-05, + "loss": 194.6827, + "step": 12280 + }, + { + "epoch": 0.04965315513681889, + "grad_norm": 796.8277587890625, + "learning_rate": 1.9664e-05, + "loss": 127.2304, + "step": 12290 + }, + { + "epoch": 0.04969355640218651, + "grad_norm": 2081.7509765625, + "learning_rate": 1.968e-05, + "loss": 186.8516, + "step": 12300 + }, + { + "epoch": 0.049733957667554146, + "grad_norm": 1857.604736328125, + "learning_rate": 1.9696000000000003e-05, + "loss": 134.1692, + "step": 12310 + }, + { + "epoch": 0.04977435893292178, + "grad_norm": 1383.986083984375, + "learning_rate": 1.9712000000000004e-05, + "loss": 171.506, + "step": 12320 + }, + { + "epoch": 0.04981476019828941, + "grad_norm": 1471.198486328125, + "learning_rate": 1.9728e-05, + "loss": 168.7988, + "step": 12330 + }, + { + "epoch": 0.04985516146365704, + "grad_norm": 740.1288452148438, + "learning_rate": 1.9744e-05, + "loss": 302.5353, + "step": 12340 + }, + { + "epoch": 0.04989556272902467, + "grad_norm": 815.28466796875, + "learning_rate": 1.976e-05, + "loss": 197.1072, + "step": 12350 + }, + { + "epoch": 0.049935963994392304, + "grad_norm": 2422.0498046875, + "learning_rate": 1.9776000000000002e-05, + "loss": 180.8438, + "step": 12360 + }, + { + "epoch": 0.04997636525975994, + "grad_norm": 2446.563232421875, + "learning_rate": 1.9792000000000003e-05, + "loss": 198.2974, + "step": 12370 + }, + { + "epoch": 0.050016766525127564, + "grad_norm": 1570.3228759765625, + "learning_rate": 1.9808e-05, + "loss": 177.1226, + "step": 12380 + }, + { + "epoch": 0.0500571677904952, + "grad_norm": 1578.998046875, + "learning_rate": 1.9824000000000002e-05, + "loss": 261.3436, + "step": 12390 + }, + { + "epoch": 0.05009756905586283, + "grad_norm": 7546.8681640625, + "learning_rate": 1.9840000000000003e-05, + "loss": 204.3114, + "step": 12400 + }, + { + "epoch": 0.05013797032123046, + "grad_norm": 2055.9130859375, + "learning_rate": 1.9856e-05, + "loss": 256.7999, + "step": 12410 + }, + { + "epoch": 0.05017837158659809, + "grad_norm": 2208.657958984375, + "learning_rate": 1.9872000000000002e-05, + "loss": 200.8016, + "step": 12420 + }, + { + "epoch": 0.05021877285196572, + "grad_norm": 1317.6824951171875, + "learning_rate": 1.9888e-05, + "loss": 220.5376, + "step": 12430 + }, + { + "epoch": 0.050259174117333355, + "grad_norm": 2095.187255859375, + "learning_rate": 1.9904e-05, + "loss": 281.2883, + "step": 12440 + }, + { + "epoch": 0.05029957538270099, + "grad_norm": 1588.065673828125, + "learning_rate": 1.9920000000000002e-05, + "loss": 161.9087, + "step": 12450 + }, + { + "epoch": 0.050339976648068614, + "grad_norm": 1335.4488525390625, + "learning_rate": 1.9936000000000004e-05, + "loss": 133.9564, + "step": 12460 + }, + { + "epoch": 0.05038037791343625, + "grad_norm": 871.8245849609375, + "learning_rate": 1.9952e-05, + "loss": 135.2258, + "step": 12470 + }, + { + "epoch": 0.05042077917880388, + "grad_norm": 890.5375366210938, + "learning_rate": 1.9968e-05, + "loss": 232.5945, + "step": 12480 + }, + { + "epoch": 0.05046118044417151, + "grad_norm": 952.8308715820312, + "learning_rate": 1.9984e-05, + "loss": 193.3496, + "step": 12490 + }, + { + "epoch": 0.05050158170953914, + "grad_norm": 1141.022216796875, + "learning_rate": 2e-05, + "loss": 134.1613, + "step": 12500 + }, + { + "epoch": 0.05054198297490677, + "grad_norm": 5427.45703125, + "learning_rate": 2.0016e-05, + "loss": 170.2803, + "step": 12510 + }, + { + "epoch": 0.050582384240274406, + "grad_norm": 2970.39404296875, + "learning_rate": 2.0032000000000004e-05, + "loss": 247.3435, + "step": 12520 + }, + { + "epoch": 0.05062278550564204, + "grad_norm": 1330.0194091796875, + "learning_rate": 2.0048000000000002e-05, + "loss": 140.8657, + "step": 12530 + }, + { + "epoch": 0.050663186771009665, + "grad_norm": 2494.57080078125, + "learning_rate": 2.0064000000000003e-05, + "loss": 219.5636, + "step": 12540 + }, + { + "epoch": 0.0507035880363773, + "grad_norm": 1306.2696533203125, + "learning_rate": 2.008e-05, + "loss": 193.2349, + "step": 12550 + }, + { + "epoch": 0.05074398930174493, + "grad_norm": 1253.730224609375, + "learning_rate": 2.0096e-05, + "loss": 227.9398, + "step": 12560 + }, + { + "epoch": 0.050784390567112564, + "grad_norm": 705.5384521484375, + "learning_rate": 2.0112000000000003e-05, + "loss": 238.5294, + "step": 12570 + }, + { + "epoch": 0.05082479183248019, + "grad_norm": 645.6691284179688, + "learning_rate": 2.0128e-05, + "loss": 221.1015, + "step": 12580 + }, + { + "epoch": 0.05086519309784782, + "grad_norm": 592.714599609375, + "learning_rate": 2.0144000000000002e-05, + "loss": 224.4367, + "step": 12590 + }, + { + "epoch": 0.050905594363215456, + "grad_norm": 1261.10009765625, + "learning_rate": 2.016e-05, + "loss": 134.0163, + "step": 12600 + }, + { + "epoch": 0.05094599562858309, + "grad_norm": 845.070068359375, + "learning_rate": 2.0176e-05, + "loss": 196.1403, + "step": 12610 + }, + { + "epoch": 0.050986396893950715, + "grad_norm": 1156.7633056640625, + "learning_rate": 2.0192000000000003e-05, + "loss": 231.5698, + "step": 12620 + }, + { + "epoch": 0.05102679815931835, + "grad_norm": 1330.7601318359375, + "learning_rate": 2.0208e-05, + "loss": 186.6144, + "step": 12630 + }, + { + "epoch": 0.05106719942468598, + "grad_norm": 720.6015014648438, + "learning_rate": 2.0224000000000005e-05, + "loss": 226.8308, + "step": 12640 + }, + { + "epoch": 0.051107600690053615, + "grad_norm": 8073.99658203125, + "learning_rate": 2.0240000000000003e-05, + "loss": 249.4775, + "step": 12650 + }, + { + "epoch": 0.05114800195542124, + "grad_norm": 1235.8389892578125, + "learning_rate": 2.0256e-05, + "loss": 171.4731, + "step": 12660 + }, + { + "epoch": 0.051188403220788874, + "grad_norm": 2477.793212890625, + "learning_rate": 2.0272000000000002e-05, + "loss": 197.8061, + "step": 12670 + }, + { + "epoch": 0.05122880448615651, + "grad_norm": 1520.5277099609375, + "learning_rate": 2.0288e-05, + "loss": 175.9295, + "step": 12680 + }, + { + "epoch": 0.05126920575152414, + "grad_norm": 1211.2568359375, + "learning_rate": 2.0304000000000004e-05, + "loss": 206.4819, + "step": 12690 + }, + { + "epoch": 0.051309607016891766, + "grad_norm": 630.6969604492188, + "learning_rate": 2.0320000000000002e-05, + "loss": 148.9793, + "step": 12700 + }, + { + "epoch": 0.0513500082822594, + "grad_norm": 1107.3011474609375, + "learning_rate": 2.0336e-05, + "loss": 125.4718, + "step": 12710 + }, + { + "epoch": 0.05139040954762703, + "grad_norm": 2056.63916015625, + "learning_rate": 2.0352000000000004e-05, + "loss": 150.7492, + "step": 12720 + }, + { + "epoch": 0.051430810812994665, + "grad_norm": 1140.64306640625, + "learning_rate": 2.0368000000000002e-05, + "loss": 194.1706, + "step": 12730 + }, + { + "epoch": 0.05147121207836229, + "grad_norm": 1373.2354736328125, + "learning_rate": 2.0384000000000003e-05, + "loss": 163.663, + "step": 12740 + }, + { + "epoch": 0.051511613343729924, + "grad_norm": 756.5211181640625, + "learning_rate": 2.04e-05, + "loss": 146.4551, + "step": 12750 + }, + { + "epoch": 0.05155201460909756, + "grad_norm": 876.0762939453125, + "learning_rate": 2.0416e-05, + "loss": 229.9929, + "step": 12760 + }, + { + "epoch": 0.05159241587446519, + "grad_norm": 2348.40478515625, + "learning_rate": 2.0432000000000004e-05, + "loss": 228.7333, + "step": 12770 + }, + { + "epoch": 0.051632817139832816, + "grad_norm": 923.1288452148438, + "learning_rate": 2.0448e-05, + "loss": 189.5097, + "step": 12780 + }, + { + "epoch": 0.05167321840520045, + "grad_norm": 1004.810302734375, + "learning_rate": 2.0464000000000003e-05, + "loss": 208.2617, + "step": 12790 + }, + { + "epoch": 0.05171361967056808, + "grad_norm": 2119.18408203125, + "learning_rate": 2.048e-05, + "loss": 188.597, + "step": 12800 + }, + { + "epoch": 0.051754020935935716, + "grad_norm": 1794.5849609375, + "learning_rate": 2.0496e-05, + "loss": 270.7183, + "step": 12810 + }, + { + "epoch": 0.05179442220130334, + "grad_norm": 1166.134765625, + "learning_rate": 2.0512000000000003e-05, + "loss": 182.1483, + "step": 12820 + }, + { + "epoch": 0.051834823466670975, + "grad_norm": 1420.9755859375, + "learning_rate": 2.0528e-05, + "loss": 226.6588, + "step": 12830 + }, + { + "epoch": 0.05187522473203861, + "grad_norm": 1052.05126953125, + "learning_rate": 2.0544e-05, + "loss": 167.9875, + "step": 12840 + }, + { + "epoch": 0.05191562599740624, + "grad_norm": 1419.9871826171875, + "learning_rate": 2.0560000000000003e-05, + "loss": 152.5945, + "step": 12850 + }, + { + "epoch": 0.05195602726277387, + "grad_norm": 830.4024047851562, + "learning_rate": 2.0576e-05, + "loss": 134.2175, + "step": 12860 + }, + { + "epoch": 0.0519964285281415, + "grad_norm": 1610.5947265625, + "learning_rate": 2.0592000000000002e-05, + "loss": 176.2794, + "step": 12870 + }, + { + "epoch": 0.05203682979350913, + "grad_norm": 1784.611572265625, + "learning_rate": 2.0608e-05, + "loss": 179.1411, + "step": 12880 + }, + { + "epoch": 0.052077231058876766, + "grad_norm": 3010.37451171875, + "learning_rate": 2.0624e-05, + "loss": 171.2819, + "step": 12890 + }, + { + "epoch": 0.05211763232424439, + "grad_norm": 1471.1204833984375, + "learning_rate": 2.0640000000000002e-05, + "loss": 173.6358, + "step": 12900 + }, + { + "epoch": 0.052158033589612025, + "grad_norm": 900.1693115234375, + "learning_rate": 2.0656e-05, + "loss": 179.1041, + "step": 12910 + }, + { + "epoch": 0.05219843485497966, + "grad_norm": 1350.265380859375, + "learning_rate": 2.0672000000000005e-05, + "loss": 209.9245, + "step": 12920 + }, + { + "epoch": 0.05223883612034729, + "grad_norm": 1520.382568359375, + "learning_rate": 2.0688000000000003e-05, + "loss": 193.5073, + "step": 12930 + }, + { + "epoch": 0.05227923738571492, + "grad_norm": 1882.950927734375, + "learning_rate": 2.0704e-05, + "loss": 196.5522, + "step": 12940 + }, + { + "epoch": 0.05231963865108255, + "grad_norm": 2268.6953125, + "learning_rate": 2.072e-05, + "loss": 194.9114, + "step": 12950 + }, + { + "epoch": 0.052360039916450184, + "grad_norm": 1628.5189208984375, + "learning_rate": 2.0736e-05, + "loss": 244.7537, + "step": 12960 + }, + { + "epoch": 0.05240044118181782, + "grad_norm": 667.3389282226562, + "learning_rate": 2.0752000000000004e-05, + "loss": 172.5676, + "step": 12970 + }, + { + "epoch": 0.05244084244718544, + "grad_norm": 646.96337890625, + "learning_rate": 2.0768000000000002e-05, + "loss": 131.0811, + "step": 12980 + }, + { + "epoch": 0.052481243712553076, + "grad_norm": 738.0167236328125, + "learning_rate": 2.0784e-05, + "loss": 118.1357, + "step": 12990 + }, + { + "epoch": 0.05252164497792071, + "grad_norm": 1130.4447021484375, + "learning_rate": 2.08e-05, + "loss": 194.3002, + "step": 13000 + }, + { + "epoch": 0.05256204624328834, + "grad_norm": 2203.624267578125, + "learning_rate": 2.0816000000000002e-05, + "loss": 218.3121, + "step": 13010 + }, + { + "epoch": 0.05260244750865597, + "grad_norm": 1162.6181640625, + "learning_rate": 2.0832000000000003e-05, + "loss": 151.4725, + "step": 13020 + }, + { + "epoch": 0.0526428487740236, + "grad_norm": 532.7477416992188, + "learning_rate": 2.0848e-05, + "loss": 165.4832, + "step": 13030 + }, + { + "epoch": 0.052683250039391234, + "grad_norm": 1619.633544921875, + "learning_rate": 2.0864e-05, + "loss": 154.237, + "step": 13040 + }, + { + "epoch": 0.05272365130475887, + "grad_norm": 973.4949340820312, + "learning_rate": 2.0880000000000003e-05, + "loss": 123.1408, + "step": 13050 + }, + { + "epoch": 0.05276405257012649, + "grad_norm": 948.999267578125, + "learning_rate": 2.0896e-05, + "loss": 178.1217, + "step": 13060 + }, + { + "epoch": 0.052804453835494126, + "grad_norm": 3517.909423828125, + "learning_rate": 2.0912000000000002e-05, + "loss": 296.0514, + "step": 13070 + }, + { + "epoch": 0.05284485510086176, + "grad_norm": 1285.1912841796875, + "learning_rate": 2.0928e-05, + "loss": 153.6933, + "step": 13080 + }, + { + "epoch": 0.05288525636622939, + "grad_norm": 748.8801879882812, + "learning_rate": 2.0944e-05, + "loss": 140.8422, + "step": 13090 + }, + { + "epoch": 0.05292565763159702, + "grad_norm": 3364.9150390625, + "learning_rate": 2.0960000000000003e-05, + "loss": 132.2271, + "step": 13100 + }, + { + "epoch": 0.05296605889696465, + "grad_norm": 993.73193359375, + "learning_rate": 2.0976e-05, + "loss": 152.2985, + "step": 13110 + }, + { + "epoch": 0.053006460162332285, + "grad_norm": 1169.735595703125, + "learning_rate": 2.0992000000000005e-05, + "loss": 181.4158, + "step": 13120 + }, + { + "epoch": 0.05304686142769992, + "grad_norm": 976.997802734375, + "learning_rate": 2.1008000000000003e-05, + "loss": 237.1122, + "step": 13130 + }, + { + "epoch": 0.053087262693067544, + "grad_norm": 841.9802856445312, + "learning_rate": 2.1024e-05, + "loss": 208.393, + "step": 13140 + }, + { + "epoch": 0.05312766395843518, + "grad_norm": 960.3524780273438, + "learning_rate": 2.1040000000000002e-05, + "loss": 177.4316, + "step": 13150 + }, + { + "epoch": 0.05316806522380281, + "grad_norm": 2525.95947265625, + "learning_rate": 2.1056e-05, + "loss": 271.0917, + "step": 13160 + }, + { + "epoch": 0.05320846648917044, + "grad_norm": 2130.737060546875, + "learning_rate": 2.1072000000000004e-05, + "loss": 183.3373, + "step": 13170 + }, + { + "epoch": 0.05324886775453807, + "grad_norm": 2632.150390625, + "learning_rate": 2.1088000000000002e-05, + "loss": 255.7671, + "step": 13180 + }, + { + "epoch": 0.0532892690199057, + "grad_norm": 656.5384521484375, + "learning_rate": 2.1104e-05, + "loss": 126.492, + "step": 13190 + }, + { + "epoch": 0.053329670285273335, + "grad_norm": 1420.99951171875, + "learning_rate": 2.112e-05, + "loss": 153.6445, + "step": 13200 + }, + { + "epoch": 0.05337007155064097, + "grad_norm": 872.6952514648438, + "learning_rate": 2.1136000000000002e-05, + "loss": 255.6181, + "step": 13210 + }, + { + "epoch": 0.053410472816008595, + "grad_norm": 1159.687255859375, + "learning_rate": 2.1152000000000003e-05, + "loss": 116.3886, + "step": 13220 + }, + { + "epoch": 0.05345087408137623, + "grad_norm": 550.2531127929688, + "learning_rate": 2.1168e-05, + "loss": 189.7449, + "step": 13230 + }, + { + "epoch": 0.05349127534674386, + "grad_norm": 963.4672241210938, + "learning_rate": 2.1184e-05, + "loss": 180.2963, + "step": 13240 + }, + { + "epoch": 0.053531676612111494, + "grad_norm": 1583.5225830078125, + "learning_rate": 2.1200000000000004e-05, + "loss": 200.6723, + "step": 13250 + }, + { + "epoch": 0.05357207787747912, + "grad_norm": 845.2804565429688, + "learning_rate": 2.1216e-05, + "loss": 125.6591, + "step": 13260 + }, + { + "epoch": 0.05361247914284675, + "grad_norm": 1241.0216064453125, + "learning_rate": 2.1232000000000003e-05, + "loss": 142.6547, + "step": 13270 + }, + { + "epoch": 0.053652880408214386, + "grad_norm": 1114.7105712890625, + "learning_rate": 2.1248e-05, + "loss": 193.9212, + "step": 13280 + }, + { + "epoch": 0.05369328167358202, + "grad_norm": 1324.7198486328125, + "learning_rate": 2.1264000000000002e-05, + "loss": 145.549, + "step": 13290 + }, + { + "epoch": 0.053733682938949645, + "grad_norm": 1778.501953125, + "learning_rate": 2.1280000000000003e-05, + "loss": 208.9834, + "step": 13300 + }, + { + "epoch": 0.05377408420431728, + "grad_norm": 778.5745239257812, + "learning_rate": 2.1296e-05, + "loss": 107.1739, + "step": 13310 + }, + { + "epoch": 0.05381448546968491, + "grad_norm": 1214.580322265625, + "learning_rate": 2.1312000000000005e-05, + "loss": 150.8649, + "step": 13320 + }, + { + "epoch": 0.053854886735052544, + "grad_norm": 1418.6300048828125, + "learning_rate": 2.1328000000000003e-05, + "loss": 263.4652, + "step": 13330 + }, + { + "epoch": 0.05389528800042017, + "grad_norm": 1241.6441650390625, + "learning_rate": 2.1344e-05, + "loss": 205.1126, + "step": 13340 + }, + { + "epoch": 0.0539356892657878, + "grad_norm": 827.6177978515625, + "learning_rate": 2.1360000000000002e-05, + "loss": 160.892, + "step": 13350 + }, + { + "epoch": 0.053976090531155436, + "grad_norm": 1029.709716796875, + "learning_rate": 2.1376e-05, + "loss": 278.0147, + "step": 13360 + }, + { + "epoch": 0.05401649179652307, + "grad_norm": 2159.714111328125, + "learning_rate": 2.1392000000000005e-05, + "loss": 223.6389, + "step": 13370 + }, + { + "epoch": 0.054056893061890696, + "grad_norm": 1305.162109375, + "learning_rate": 2.1408000000000002e-05, + "loss": 139.3859, + "step": 13380 + }, + { + "epoch": 0.05409729432725833, + "grad_norm": 1482.531005859375, + "learning_rate": 2.1424e-05, + "loss": 159.6899, + "step": 13390 + }, + { + "epoch": 0.05413769559262596, + "grad_norm": 865.8165893554688, + "learning_rate": 2.144e-05, + "loss": 119.9288, + "step": 13400 + }, + { + "epoch": 0.054178096857993595, + "grad_norm": 1252.372314453125, + "learning_rate": 2.1456000000000003e-05, + "loss": 164.3742, + "step": 13410 + }, + { + "epoch": 0.05421849812336122, + "grad_norm": 1579.6357421875, + "learning_rate": 2.1472000000000004e-05, + "loss": 204.9973, + "step": 13420 + }, + { + "epoch": 0.054258899388728854, + "grad_norm": 1164.062744140625, + "learning_rate": 2.1488e-05, + "loss": 121.8512, + "step": 13430 + }, + { + "epoch": 0.05429930065409649, + "grad_norm": 884.1082153320312, + "learning_rate": 2.1504e-05, + "loss": 178.3608, + "step": 13440 + }, + { + "epoch": 0.05433970191946412, + "grad_norm": 1767.695556640625, + "learning_rate": 2.1520000000000004e-05, + "loss": 193.1272, + "step": 13450 + }, + { + "epoch": 0.054380103184831746, + "grad_norm": 928.2390747070312, + "learning_rate": 2.1536000000000002e-05, + "loss": 137.8263, + "step": 13460 + }, + { + "epoch": 0.05442050445019938, + "grad_norm": 2138.448974609375, + "learning_rate": 2.1552e-05, + "loss": 188.621, + "step": 13470 + }, + { + "epoch": 0.05446090571556701, + "grad_norm": 723.1905517578125, + "learning_rate": 2.1568e-05, + "loss": 167.0574, + "step": 13480 + }, + { + "epoch": 0.054501306980934645, + "grad_norm": 1055.7220458984375, + "learning_rate": 2.1584000000000002e-05, + "loss": 210.8758, + "step": 13490 + }, + { + "epoch": 0.05454170824630227, + "grad_norm": 1112.051513671875, + "learning_rate": 2.1600000000000003e-05, + "loss": 232.0652, + "step": 13500 + }, + { + "epoch": 0.054582109511669905, + "grad_norm": 1572.6038818359375, + "learning_rate": 2.1616e-05, + "loss": 164.4113, + "step": 13510 + }, + { + "epoch": 0.05462251077703754, + "grad_norm": 842.176025390625, + "learning_rate": 2.1632e-05, + "loss": 137.7064, + "step": 13520 + }, + { + "epoch": 0.05466291204240517, + "grad_norm": 983.318603515625, + "learning_rate": 2.1648000000000003e-05, + "loss": 127.5733, + "step": 13530 + }, + { + "epoch": 0.0547033133077728, + "grad_norm": 2376.21630859375, + "learning_rate": 2.1664e-05, + "loss": 196.741, + "step": 13540 + }, + { + "epoch": 0.05474371457314043, + "grad_norm": 1190.033447265625, + "learning_rate": 2.1680000000000002e-05, + "loss": 234.4224, + "step": 13550 + }, + { + "epoch": 0.05478411583850806, + "grad_norm": 957.0660400390625, + "learning_rate": 2.1696e-05, + "loss": 165.5409, + "step": 13560 + }, + { + "epoch": 0.054824517103875696, + "grad_norm": 1211.7044677734375, + "learning_rate": 2.1711999999999998e-05, + "loss": 189.8176, + "step": 13570 + }, + { + "epoch": 0.05486491836924332, + "grad_norm": 5962.564453125, + "learning_rate": 2.1728000000000003e-05, + "loss": 148.0188, + "step": 13580 + }, + { + "epoch": 0.054905319634610955, + "grad_norm": 754.4962768554688, + "learning_rate": 2.1744e-05, + "loss": 199.2494, + "step": 13590 + }, + { + "epoch": 0.05494572089997859, + "grad_norm": 1386.749267578125, + "learning_rate": 2.1760000000000002e-05, + "loss": 142.1957, + "step": 13600 + }, + { + "epoch": 0.05498612216534622, + "grad_norm": 1429.7833251953125, + "learning_rate": 2.1776000000000003e-05, + "loss": 268.8451, + "step": 13610 + }, + { + "epoch": 0.05502652343071385, + "grad_norm": 906.3463134765625, + "learning_rate": 2.1792e-05, + "loss": 129.6575, + "step": 13620 + }, + { + "epoch": 0.05506692469608148, + "grad_norm": 1607.627197265625, + "learning_rate": 2.1808000000000002e-05, + "loss": 182.4304, + "step": 13630 + }, + { + "epoch": 0.05510732596144911, + "grad_norm": 1129.2894287109375, + "learning_rate": 2.1824e-05, + "loss": 133.5052, + "step": 13640 + }, + { + "epoch": 0.055147727226816746, + "grad_norm": 1315.9169921875, + "learning_rate": 2.1840000000000004e-05, + "loss": 214.8286, + "step": 13650 + }, + { + "epoch": 0.05518812849218437, + "grad_norm": 669.73095703125, + "learning_rate": 2.1856000000000002e-05, + "loss": 186.4369, + "step": 13660 + }, + { + "epoch": 0.055228529757552006, + "grad_norm": 848.6827392578125, + "learning_rate": 2.1872e-05, + "loss": 137.5223, + "step": 13670 + }, + { + "epoch": 0.05526893102291964, + "grad_norm": 923.869873046875, + "learning_rate": 2.1888e-05, + "loss": 116.3321, + "step": 13680 + }, + { + "epoch": 0.05530933228828727, + "grad_norm": 1344.929931640625, + "learning_rate": 2.1904000000000002e-05, + "loss": 160.1196, + "step": 13690 + }, + { + "epoch": 0.0553497335536549, + "grad_norm": 1019.1683349609375, + "learning_rate": 2.1920000000000004e-05, + "loss": 181.8014, + "step": 13700 + }, + { + "epoch": 0.05539013481902253, + "grad_norm": 2222.51318359375, + "learning_rate": 2.1936e-05, + "loss": 208.4223, + "step": 13710 + }, + { + "epoch": 0.055430536084390164, + "grad_norm": 927.0543212890625, + "learning_rate": 2.1952e-05, + "loss": 140.6365, + "step": 13720 + }, + { + "epoch": 0.0554709373497578, + "grad_norm": 1362.4456787109375, + "learning_rate": 2.1968000000000004e-05, + "loss": 170.5639, + "step": 13730 + }, + { + "epoch": 0.05551133861512542, + "grad_norm": 1279.76611328125, + "learning_rate": 2.1984e-05, + "loss": 173.4673, + "step": 13740 + }, + { + "epoch": 0.055551739880493056, + "grad_norm": 917.115478515625, + "learning_rate": 2.2000000000000003e-05, + "loss": 157.9273, + "step": 13750 + }, + { + "epoch": 0.05559214114586069, + "grad_norm": 2153.1689453125, + "learning_rate": 2.2016e-05, + "loss": 162.4645, + "step": 13760 + }, + { + "epoch": 0.05563254241122832, + "grad_norm": 1180.8828125, + "learning_rate": 2.2032e-05, + "loss": 151.051, + "step": 13770 + }, + { + "epoch": 0.05567294367659595, + "grad_norm": 843.3110961914062, + "learning_rate": 2.2048000000000003e-05, + "loss": 142.8383, + "step": 13780 + }, + { + "epoch": 0.05571334494196358, + "grad_norm": 850.1922607421875, + "learning_rate": 2.2064e-05, + "loss": 157.7047, + "step": 13790 + }, + { + "epoch": 0.055753746207331215, + "grad_norm": 1347.325439453125, + "learning_rate": 2.2080000000000002e-05, + "loss": 210.5811, + "step": 13800 + }, + { + "epoch": 0.05579414747269885, + "grad_norm": 1018.7650146484375, + "learning_rate": 2.2096000000000003e-05, + "loss": 128.0224, + "step": 13810 + }, + { + "epoch": 0.055834548738066474, + "grad_norm": 1624.7498779296875, + "learning_rate": 2.2112e-05, + "loss": 188.9533, + "step": 13820 + }, + { + "epoch": 0.05587495000343411, + "grad_norm": 497.349853515625, + "learning_rate": 2.2128000000000002e-05, + "loss": 184.5132, + "step": 13830 + }, + { + "epoch": 0.05591535126880174, + "grad_norm": 1310.5782470703125, + "learning_rate": 2.2144e-05, + "loss": 165.6104, + "step": 13840 + }, + { + "epoch": 0.05595575253416937, + "grad_norm": 973.9716186523438, + "learning_rate": 2.2160000000000005e-05, + "loss": 186.9863, + "step": 13850 + }, + { + "epoch": 0.055996153799537, + "grad_norm": 1529.08251953125, + "learning_rate": 2.2176000000000002e-05, + "loss": 153.5246, + "step": 13860 + }, + { + "epoch": 0.05603655506490463, + "grad_norm": 1039.033203125, + "learning_rate": 2.2192e-05, + "loss": 210.1795, + "step": 13870 + }, + { + "epoch": 0.056076956330272265, + "grad_norm": 1540.9727783203125, + "learning_rate": 2.2208e-05, + "loss": 164.4645, + "step": 13880 + }, + { + "epoch": 0.0561173575956399, + "grad_norm": 769.8643798828125, + "learning_rate": 2.2224000000000003e-05, + "loss": 108.0734, + "step": 13890 + }, + { + "epoch": 0.056157758861007524, + "grad_norm": 979.2266235351562, + "learning_rate": 2.2240000000000004e-05, + "loss": 134.403, + "step": 13900 + }, + { + "epoch": 0.05619816012637516, + "grad_norm": 774.3818359375, + "learning_rate": 2.2256000000000002e-05, + "loss": 155.3155, + "step": 13910 + }, + { + "epoch": 0.05623856139174279, + "grad_norm": 1617.9088134765625, + "learning_rate": 2.2272e-05, + "loss": 215.7632, + "step": 13920 + }, + { + "epoch": 0.05627896265711042, + "grad_norm": 888.803466796875, + "learning_rate": 2.2288000000000004e-05, + "loss": 150.6828, + "step": 13930 + }, + { + "epoch": 0.05631936392247805, + "grad_norm": 1158.1151123046875, + "learning_rate": 2.2304000000000002e-05, + "loss": 155.592, + "step": 13940 + }, + { + "epoch": 0.05635976518784568, + "grad_norm": 963.2562866210938, + "learning_rate": 2.2320000000000003e-05, + "loss": 224.5231, + "step": 13950 + }, + { + "epoch": 0.056400166453213316, + "grad_norm": 980.2925415039062, + "learning_rate": 2.2336e-05, + "loss": 170.7537, + "step": 13960 + }, + { + "epoch": 0.05644056771858095, + "grad_norm": 2531.272216796875, + "learning_rate": 2.2352e-05, + "loss": 149.718, + "step": 13970 + }, + { + "epoch": 0.056480968983948575, + "grad_norm": 1683.53466796875, + "learning_rate": 2.2368000000000003e-05, + "loss": 155.013, + "step": 13980 + }, + { + "epoch": 0.05652137024931621, + "grad_norm": 444.4203796386719, + "learning_rate": 2.2384e-05, + "loss": 181.3527, + "step": 13990 + }, + { + "epoch": 0.05656177151468384, + "grad_norm": 523.1892700195312, + "learning_rate": 2.2400000000000002e-05, + "loss": 130.8022, + "step": 14000 + }, + { + "epoch": 0.056602172780051474, + "grad_norm": 2049.889892578125, + "learning_rate": 2.2416000000000004e-05, + "loss": 238.2498, + "step": 14010 + }, + { + "epoch": 0.0566425740454191, + "grad_norm": 1004.218505859375, + "learning_rate": 2.2432e-05, + "loss": 177.9359, + "step": 14020 + }, + { + "epoch": 0.05668297531078673, + "grad_norm": 512.943115234375, + "learning_rate": 2.2448000000000003e-05, + "loss": 161.8063, + "step": 14030 + }, + { + "epoch": 0.056723376576154366, + "grad_norm": 1076.265869140625, + "learning_rate": 2.2464e-05, + "loss": 170.4555, + "step": 14040 + }, + { + "epoch": 0.056763777841522, + "grad_norm": 2569.2158203125, + "learning_rate": 2.2480000000000005e-05, + "loss": 167.7969, + "step": 14050 + }, + { + "epoch": 0.056804179106889625, + "grad_norm": 1953.2723388671875, + "learning_rate": 2.2496000000000003e-05, + "loss": 179.4131, + "step": 14060 + }, + { + "epoch": 0.05684458037225726, + "grad_norm": 1441.91552734375, + "learning_rate": 2.2512e-05, + "loss": 217.0521, + "step": 14070 + }, + { + "epoch": 0.05688498163762489, + "grad_norm": 813.6728515625, + "learning_rate": 2.2528000000000002e-05, + "loss": 150.9464, + "step": 14080 + }, + { + "epoch": 0.056925382902992525, + "grad_norm": 1349.012451171875, + "learning_rate": 2.2544000000000003e-05, + "loss": 135.9318, + "step": 14090 + }, + { + "epoch": 0.05696578416836015, + "grad_norm": 3673.123779296875, + "learning_rate": 2.256e-05, + "loss": 226.9448, + "step": 14100 + }, + { + "epoch": 0.057006185433727784, + "grad_norm": 3659.591552734375, + "learning_rate": 2.2576000000000002e-05, + "loss": 136.1684, + "step": 14110 + }, + { + "epoch": 0.05704658669909542, + "grad_norm": 722.1444702148438, + "learning_rate": 2.2592e-05, + "loss": 154.254, + "step": 14120 + }, + { + "epoch": 0.05708698796446305, + "grad_norm": 1161.3231201171875, + "learning_rate": 2.2608000000000004e-05, + "loss": 174.1355, + "step": 14130 + }, + { + "epoch": 0.057127389229830676, + "grad_norm": 1642.2625732421875, + "learning_rate": 2.2624000000000002e-05, + "loss": 233.051, + "step": 14140 + }, + { + "epoch": 0.05716779049519831, + "grad_norm": 852.204345703125, + "learning_rate": 2.264e-05, + "loss": 157.8276, + "step": 14150 + }, + { + "epoch": 0.05720819176056594, + "grad_norm": 815.2117309570312, + "learning_rate": 2.2656e-05, + "loss": 138.3866, + "step": 14160 + }, + { + "epoch": 0.057248593025933575, + "grad_norm": 1296.7962646484375, + "learning_rate": 2.2672e-05, + "loss": 155.6856, + "step": 14170 + }, + { + "epoch": 0.0572889942913012, + "grad_norm": 871.2222290039062, + "learning_rate": 2.2688000000000004e-05, + "loss": 134.7186, + "step": 14180 + }, + { + "epoch": 0.057329395556668834, + "grad_norm": 882.4166870117188, + "learning_rate": 2.2704e-05, + "loss": 163.1565, + "step": 14190 + }, + { + "epoch": 0.05736979682203647, + "grad_norm": 998.6654052734375, + "learning_rate": 2.272e-05, + "loss": 148.3887, + "step": 14200 + }, + { + "epoch": 0.0574101980874041, + "grad_norm": 2970.6875, + "learning_rate": 2.2736000000000004e-05, + "loss": 135.9019, + "step": 14210 + }, + { + "epoch": 0.057450599352771727, + "grad_norm": 631.6978149414062, + "learning_rate": 2.2752000000000002e-05, + "loss": 131.613, + "step": 14220 + }, + { + "epoch": 0.05749100061813936, + "grad_norm": 2225.521728515625, + "learning_rate": 2.2768000000000003e-05, + "loss": 168.8019, + "step": 14230 + }, + { + "epoch": 0.05753140188350699, + "grad_norm": 1400.9620361328125, + "learning_rate": 2.2784e-05, + "loss": 217.0065, + "step": 14240 + }, + { + "epoch": 0.057571803148874626, + "grad_norm": 1512.5889892578125, + "learning_rate": 2.28e-05, + "loss": 209.6466, + "step": 14250 + }, + { + "epoch": 0.05761220441424225, + "grad_norm": 1179.3431396484375, + "learning_rate": 2.2816000000000003e-05, + "loss": 163.7062, + "step": 14260 + }, + { + "epoch": 0.057652605679609885, + "grad_norm": 532.9493408203125, + "learning_rate": 2.2832e-05, + "loss": 135.8228, + "step": 14270 + }, + { + "epoch": 0.05769300694497752, + "grad_norm": 581.2265625, + "learning_rate": 2.2848000000000002e-05, + "loss": 213.1013, + "step": 14280 + }, + { + "epoch": 0.05773340821034515, + "grad_norm": 691.206298828125, + "learning_rate": 2.2864000000000003e-05, + "loss": 233.0184, + "step": 14290 + }, + { + "epoch": 0.05777380947571278, + "grad_norm": 3031.611572265625, + "learning_rate": 2.288e-05, + "loss": 201.7538, + "step": 14300 + }, + { + "epoch": 0.05781421074108041, + "grad_norm": 1775.6068115234375, + "learning_rate": 2.2896000000000002e-05, + "loss": 177.8716, + "step": 14310 + }, + { + "epoch": 0.05785461200644804, + "grad_norm": 1103.681396484375, + "learning_rate": 2.2912e-05, + "loss": 175.4909, + "step": 14320 + }, + { + "epoch": 0.057895013271815676, + "grad_norm": 796.7500610351562, + "learning_rate": 2.2928000000000005e-05, + "loss": 117.4096, + "step": 14330 + }, + { + "epoch": 0.0579354145371833, + "grad_norm": 610.9031982421875, + "learning_rate": 2.2944000000000003e-05, + "loss": 200.1034, + "step": 14340 + }, + { + "epoch": 0.057975815802550935, + "grad_norm": 614.3745727539062, + "learning_rate": 2.296e-05, + "loss": 200.4539, + "step": 14350 + }, + { + "epoch": 0.05801621706791857, + "grad_norm": 1712.98291015625, + "learning_rate": 2.2976e-05, + "loss": 206.194, + "step": 14360 + }, + { + "epoch": 0.0580566183332862, + "grad_norm": 3228.22314453125, + "learning_rate": 2.2992e-05, + "loss": 153.2533, + "step": 14370 + }, + { + "epoch": 0.05809701959865383, + "grad_norm": 978.1925048828125, + "learning_rate": 2.3008000000000004e-05, + "loss": 195.0918, + "step": 14380 + }, + { + "epoch": 0.05813742086402146, + "grad_norm": 1873.992919921875, + "learning_rate": 2.3024000000000002e-05, + "loss": 202.0891, + "step": 14390 + }, + { + "epoch": 0.058177822129389094, + "grad_norm": 848.6489868164062, + "learning_rate": 2.304e-05, + "loss": 121.6125, + "step": 14400 + }, + { + "epoch": 0.05821822339475673, + "grad_norm": 1428.3878173828125, + "learning_rate": 2.3056000000000004e-05, + "loss": 237.805, + "step": 14410 + }, + { + "epoch": 0.05825862466012435, + "grad_norm": 853.1028442382812, + "learning_rate": 2.3072000000000002e-05, + "loss": 164.4051, + "step": 14420 + }, + { + "epoch": 0.058299025925491986, + "grad_norm": 949.3483276367188, + "learning_rate": 2.3088000000000003e-05, + "loss": 136.4663, + "step": 14430 + }, + { + "epoch": 0.05833942719085962, + "grad_norm": 881.7730712890625, + "learning_rate": 2.3104e-05, + "loss": 155.6771, + "step": 14440 + }, + { + "epoch": 0.05837982845622725, + "grad_norm": 1134.35498046875, + "learning_rate": 2.312e-05, + "loss": 170.5195, + "step": 14450 + }, + { + "epoch": 0.05842022972159488, + "grad_norm": 1189.0462646484375, + "learning_rate": 2.3136000000000003e-05, + "loss": 159.4148, + "step": 14460 + }, + { + "epoch": 0.05846063098696251, + "grad_norm": 1090.5531005859375, + "learning_rate": 2.3152e-05, + "loss": 130.2826, + "step": 14470 + }, + { + "epoch": 0.058501032252330144, + "grad_norm": 1131.265380859375, + "learning_rate": 2.3168000000000002e-05, + "loss": 201.0883, + "step": 14480 + }, + { + "epoch": 0.05854143351769778, + "grad_norm": 1238.070556640625, + "learning_rate": 2.3184000000000004e-05, + "loss": 167.0402, + "step": 14490 + }, + { + "epoch": 0.058581834783065403, + "grad_norm": 574.5208740234375, + "learning_rate": 2.32e-05, + "loss": 145.9335, + "step": 14500 + }, + { + "epoch": 0.05862223604843304, + "grad_norm": 882.7974853515625, + "learning_rate": 2.3216000000000003e-05, + "loss": 108.9534, + "step": 14510 + }, + { + "epoch": 0.05866263731380067, + "grad_norm": 1392.5626220703125, + "learning_rate": 2.3232e-05, + "loss": 226.2098, + "step": 14520 + }, + { + "epoch": 0.0587030385791683, + "grad_norm": 563.4628295898438, + "learning_rate": 2.3248000000000005e-05, + "loss": 176.6275, + "step": 14530 + }, + { + "epoch": 0.05874343984453593, + "grad_norm": 934.815673828125, + "learning_rate": 2.3264000000000003e-05, + "loss": 125.4536, + "step": 14540 + }, + { + "epoch": 0.05878384110990356, + "grad_norm": 925.1941528320312, + "learning_rate": 2.328e-05, + "loss": 125.0822, + "step": 14550 + }, + { + "epoch": 0.058824242375271195, + "grad_norm": 2865.814453125, + "learning_rate": 2.3296000000000002e-05, + "loss": 175.1995, + "step": 14560 + }, + { + "epoch": 0.05886464364063883, + "grad_norm": 683.4678344726562, + "learning_rate": 2.3312e-05, + "loss": 127.679, + "step": 14570 + }, + { + "epoch": 0.058905044906006454, + "grad_norm": 1186.405517578125, + "learning_rate": 2.3328000000000004e-05, + "loss": 140.4038, + "step": 14580 + }, + { + "epoch": 0.05894544617137409, + "grad_norm": 2086.564453125, + "learning_rate": 2.3344000000000002e-05, + "loss": 147.0434, + "step": 14590 + }, + { + "epoch": 0.05898584743674172, + "grad_norm": 1319.7452392578125, + "learning_rate": 2.336e-05, + "loss": 203.6223, + "step": 14600 + }, + { + "epoch": 0.05902624870210935, + "grad_norm": 936.6981201171875, + "learning_rate": 2.3376000000000005e-05, + "loss": 135.9501, + "step": 14610 + }, + { + "epoch": 0.05906664996747698, + "grad_norm": 753.2698364257812, + "learning_rate": 2.3392000000000002e-05, + "loss": 159.6841, + "step": 14620 + }, + { + "epoch": 0.05910705123284461, + "grad_norm": 1491.01953125, + "learning_rate": 2.3408000000000004e-05, + "loss": 106.1771, + "step": 14630 + }, + { + "epoch": 0.059147452498212245, + "grad_norm": 1340.7762451171875, + "learning_rate": 2.3424e-05, + "loss": 174.7433, + "step": 14640 + }, + { + "epoch": 0.05918785376357988, + "grad_norm": 1028.75, + "learning_rate": 2.344e-05, + "loss": 124.6454, + "step": 14650 + }, + { + "epoch": 0.059228255028947505, + "grad_norm": 617.19970703125, + "learning_rate": 2.3456000000000004e-05, + "loss": 172.6548, + "step": 14660 + }, + { + "epoch": 0.05926865629431514, + "grad_norm": 1720.14111328125, + "learning_rate": 2.3472e-05, + "loss": 141.3971, + "step": 14670 + }, + { + "epoch": 0.05930905755968277, + "grad_norm": 1395.69287109375, + "learning_rate": 2.3488000000000003e-05, + "loss": 172.2831, + "step": 14680 + }, + { + "epoch": 0.059349458825050404, + "grad_norm": 1455.822265625, + "learning_rate": 2.3504000000000004e-05, + "loss": 153.6807, + "step": 14690 + }, + { + "epoch": 0.05938986009041803, + "grad_norm": 537.3853149414062, + "learning_rate": 2.3520000000000002e-05, + "loss": 176.454, + "step": 14700 + }, + { + "epoch": 0.05943026135578566, + "grad_norm": 853.8223266601562, + "learning_rate": 2.3536000000000003e-05, + "loss": 146.2107, + "step": 14710 + }, + { + "epoch": 0.059470662621153296, + "grad_norm": 1736.2396240234375, + "learning_rate": 2.3552e-05, + "loss": 188.5528, + "step": 14720 + }, + { + "epoch": 0.05951106388652093, + "grad_norm": 1702.821044921875, + "learning_rate": 2.3568e-05, + "loss": 201.6108, + "step": 14730 + }, + { + "epoch": 0.059551465151888555, + "grad_norm": 1466.798583984375, + "learning_rate": 2.3584000000000003e-05, + "loss": 164.1399, + "step": 14740 + }, + { + "epoch": 0.05959186641725619, + "grad_norm": 556.3617553710938, + "learning_rate": 2.36e-05, + "loss": 165.3902, + "step": 14750 + }, + { + "epoch": 0.05963226768262382, + "grad_norm": 1122.3865966796875, + "learning_rate": 2.3616000000000002e-05, + "loss": 175.2408, + "step": 14760 + }, + { + "epoch": 0.059672668947991454, + "grad_norm": 866.0825805664062, + "learning_rate": 2.3632e-05, + "loss": 164.229, + "step": 14770 + }, + { + "epoch": 0.05971307021335908, + "grad_norm": 837.1140747070312, + "learning_rate": 2.3648e-05, + "loss": 134.0628, + "step": 14780 + }, + { + "epoch": 0.059753471478726713, + "grad_norm": 1831.6705322265625, + "learning_rate": 2.3664000000000002e-05, + "loss": 174.2087, + "step": 14790 + }, + { + "epoch": 0.05979387274409435, + "grad_norm": 1126.463623046875, + "learning_rate": 2.368e-05, + "loss": 190.7878, + "step": 14800 + }, + { + "epoch": 0.05983427400946198, + "grad_norm": 740.3558959960938, + "learning_rate": 2.3696000000000005e-05, + "loss": 171.0372, + "step": 14810 + }, + { + "epoch": 0.059874675274829606, + "grad_norm": 771.722412109375, + "learning_rate": 2.3712000000000003e-05, + "loss": 211.4884, + "step": 14820 + }, + { + "epoch": 0.05991507654019724, + "grad_norm": 1478.09375, + "learning_rate": 2.3728e-05, + "loss": 130.7289, + "step": 14830 + }, + { + "epoch": 0.05995547780556487, + "grad_norm": 882.0974731445312, + "learning_rate": 2.3744000000000002e-05, + "loss": 157.9321, + "step": 14840 + }, + { + "epoch": 0.059995879070932505, + "grad_norm": 547.9676513671875, + "learning_rate": 2.376e-05, + "loss": 176.8159, + "step": 14850 + }, + { + "epoch": 0.06003628033630013, + "grad_norm": 1962.8756103515625, + "learning_rate": 2.3776000000000004e-05, + "loss": 201.2807, + "step": 14860 + }, + { + "epoch": 0.060076681601667764, + "grad_norm": 755.8889770507812, + "learning_rate": 2.3792000000000002e-05, + "loss": 216.7161, + "step": 14870 + }, + { + "epoch": 0.0601170828670354, + "grad_norm": 852.0759887695312, + "learning_rate": 2.3808e-05, + "loss": 135.1358, + "step": 14880 + }, + { + "epoch": 0.06015748413240303, + "grad_norm": 1270.6697998046875, + "learning_rate": 2.3824000000000004e-05, + "loss": 175.1427, + "step": 14890 + }, + { + "epoch": 0.060197885397770656, + "grad_norm": 641.8917846679688, + "learning_rate": 2.3840000000000002e-05, + "loss": 174.6843, + "step": 14900 + }, + { + "epoch": 0.06023828666313829, + "grad_norm": 2870.410888671875, + "learning_rate": 2.3856000000000003e-05, + "loss": 176.0589, + "step": 14910 + }, + { + "epoch": 0.06027868792850592, + "grad_norm": 1056.04296875, + "learning_rate": 2.3872e-05, + "loss": 213.278, + "step": 14920 + }, + { + "epoch": 0.060319089193873555, + "grad_norm": 2173.892578125, + "learning_rate": 2.3888e-05, + "loss": 200.7836, + "step": 14930 + }, + { + "epoch": 0.06035949045924118, + "grad_norm": 1585.4259033203125, + "learning_rate": 2.3904000000000004e-05, + "loss": 175.6348, + "step": 14940 + }, + { + "epoch": 0.060399891724608815, + "grad_norm": 1542.4459228515625, + "learning_rate": 2.392e-05, + "loss": 162.7921, + "step": 14950 + }, + { + "epoch": 0.06044029298997645, + "grad_norm": 694.0040283203125, + "learning_rate": 2.3936000000000003e-05, + "loss": 97.7176, + "step": 14960 + }, + { + "epoch": 0.06048069425534408, + "grad_norm": 2402.3837890625, + "learning_rate": 2.3952e-05, + "loss": 224.7023, + "step": 14970 + }, + { + "epoch": 0.06052109552071171, + "grad_norm": 471.6507568359375, + "learning_rate": 2.3968e-05, + "loss": 187.4359, + "step": 14980 + }, + { + "epoch": 0.06056149678607934, + "grad_norm": 673.432373046875, + "learning_rate": 2.3984000000000003e-05, + "loss": 124.6808, + "step": 14990 + }, + { + "epoch": 0.06060189805144697, + "grad_norm": 1417.2762451171875, + "learning_rate": 2.4e-05, + "loss": 151.5447, + "step": 15000 + }, + { + "epoch": 0.060642299316814606, + "grad_norm": 1166.0643310546875, + "learning_rate": 2.4016000000000005e-05, + "loss": 150.9397, + "step": 15010 + }, + { + "epoch": 0.06068270058218223, + "grad_norm": 1263.1080322265625, + "learning_rate": 2.4032000000000003e-05, + "loss": 147.3255, + "step": 15020 + }, + { + "epoch": 0.060723101847549865, + "grad_norm": 656.23876953125, + "learning_rate": 2.4048e-05, + "loss": 158.397, + "step": 15030 + }, + { + "epoch": 0.0607635031129175, + "grad_norm": 1322.720947265625, + "learning_rate": 2.4064000000000002e-05, + "loss": 134.0966, + "step": 15040 + }, + { + "epoch": 0.06080390437828513, + "grad_norm": 788.550537109375, + "learning_rate": 2.408e-05, + "loss": 174.1096, + "step": 15050 + }, + { + "epoch": 0.06084430564365276, + "grad_norm": 1120.981689453125, + "learning_rate": 2.4096000000000004e-05, + "loss": 130.663, + "step": 15060 + }, + { + "epoch": 0.06088470690902039, + "grad_norm": 930.284912109375, + "learning_rate": 2.4112000000000002e-05, + "loss": 172.1123, + "step": 15070 + }, + { + "epoch": 0.060925108174388024, + "grad_norm": 2965.147216796875, + "learning_rate": 2.4128e-05, + "loss": 217.1598, + "step": 15080 + }, + { + "epoch": 0.06096550943975566, + "grad_norm": 2679.78515625, + "learning_rate": 2.4144000000000005e-05, + "loss": 176.2819, + "step": 15090 + }, + { + "epoch": 0.06100591070512328, + "grad_norm": 1492.6536865234375, + "learning_rate": 2.4160000000000002e-05, + "loss": 192.8286, + "step": 15100 + }, + { + "epoch": 0.061046311970490916, + "grad_norm": 1208.202880859375, + "learning_rate": 2.4176000000000004e-05, + "loss": 145.9959, + "step": 15110 + }, + { + "epoch": 0.06108671323585855, + "grad_norm": 1640.659423828125, + "learning_rate": 2.4192e-05, + "loss": 184.3186, + "step": 15120 + }, + { + "epoch": 0.06112711450122618, + "grad_norm": 847.74951171875, + "learning_rate": 2.4208e-05, + "loss": 94.8055, + "step": 15130 + }, + { + "epoch": 0.06116751576659381, + "grad_norm": 1882.6995849609375, + "learning_rate": 2.4224000000000004e-05, + "loss": 179.5817, + "step": 15140 + }, + { + "epoch": 0.06120791703196144, + "grad_norm": 910.8331909179688, + "learning_rate": 2.4240000000000002e-05, + "loss": 294.0912, + "step": 15150 + }, + { + "epoch": 0.061248318297329074, + "grad_norm": 1168.9476318359375, + "learning_rate": 2.4256000000000003e-05, + "loss": 273.9684, + "step": 15160 + }, + { + "epoch": 0.06128871956269671, + "grad_norm": 1800.0809326171875, + "learning_rate": 2.4272e-05, + "loss": 168.6481, + "step": 15170 + }, + { + "epoch": 0.06132912082806433, + "grad_norm": 2968.29150390625, + "learning_rate": 2.4288000000000002e-05, + "loss": 219.3987, + "step": 15180 + }, + { + "epoch": 0.061369522093431966, + "grad_norm": 3257.859375, + "learning_rate": 2.4304000000000003e-05, + "loss": 153.3993, + "step": 15190 + }, + { + "epoch": 0.0614099233587996, + "grad_norm": 719.5667114257812, + "learning_rate": 2.432e-05, + "loss": 179.2891, + "step": 15200 + }, + { + "epoch": 0.061450324624167225, + "grad_norm": 963.6610107421875, + "learning_rate": 2.4336000000000006e-05, + "loss": 173.9703, + "step": 15210 + }, + { + "epoch": 0.06149072588953486, + "grad_norm": 1687.5927734375, + "learning_rate": 2.4352000000000003e-05, + "loss": 200.9904, + "step": 15220 + }, + { + "epoch": 0.06153112715490249, + "grad_norm": 1103.3114013671875, + "learning_rate": 2.4368e-05, + "loss": 124.7939, + "step": 15230 + }, + { + "epoch": 0.061571528420270125, + "grad_norm": 1520.4241943359375, + "learning_rate": 2.4384000000000002e-05, + "loss": 118.1003, + "step": 15240 + }, + { + "epoch": 0.06161192968563775, + "grad_norm": 870.4832763671875, + "learning_rate": 2.44e-05, + "loss": 194.3774, + "step": 15250 + }, + { + "epoch": 0.061652330951005384, + "grad_norm": 408.4352722167969, + "learning_rate": 2.4416000000000005e-05, + "loss": 162.3016, + "step": 15260 + }, + { + "epoch": 0.06169273221637302, + "grad_norm": 1937.4906005859375, + "learning_rate": 2.4432000000000003e-05, + "loss": 283.8593, + "step": 15270 + }, + { + "epoch": 0.06173313348174065, + "grad_norm": 1454.977783203125, + "learning_rate": 2.4448e-05, + "loss": 130.2672, + "step": 15280 + }, + { + "epoch": 0.061773534747108276, + "grad_norm": 715.8368530273438, + "learning_rate": 2.4464000000000005e-05, + "loss": 104.7402, + "step": 15290 + }, + { + "epoch": 0.06181393601247591, + "grad_norm": 1866.5322265625, + "learning_rate": 2.4480000000000003e-05, + "loss": 169.5109, + "step": 15300 + }, + { + "epoch": 0.06185433727784354, + "grad_norm": 1251.082763671875, + "learning_rate": 2.4496000000000004e-05, + "loss": 160.4111, + "step": 15310 + }, + { + "epoch": 0.061894738543211175, + "grad_norm": 1537.179443359375, + "learning_rate": 2.4512000000000002e-05, + "loss": 153.087, + "step": 15320 + }, + { + "epoch": 0.0619351398085788, + "grad_norm": 1074.112060546875, + "learning_rate": 2.4528e-05, + "loss": 123.8266, + "step": 15330 + }, + { + "epoch": 0.061975541073946434, + "grad_norm": 1790.22998046875, + "learning_rate": 2.4544000000000004e-05, + "loss": 190.8621, + "step": 15340 + }, + { + "epoch": 0.06201594233931407, + "grad_norm": 614.25341796875, + "learning_rate": 2.4560000000000002e-05, + "loss": 160.5166, + "step": 15350 + }, + { + "epoch": 0.0620563436046817, + "grad_norm": 787.8048706054688, + "learning_rate": 2.4576e-05, + "loss": 119.5887, + "step": 15360 + }, + { + "epoch": 0.06209674487004933, + "grad_norm": 1496.0997314453125, + "learning_rate": 2.4592e-05, + "loss": 142.8696, + "step": 15370 + }, + { + "epoch": 0.06213714613541696, + "grad_norm": 429.21929931640625, + "learning_rate": 2.4608000000000002e-05, + "loss": 141.8167, + "step": 15380 + }, + { + "epoch": 0.06217754740078459, + "grad_norm": 4175.5009765625, + "learning_rate": 2.4624000000000003e-05, + "loss": 163.1653, + "step": 15390 + }, + { + "epoch": 0.062217948666152226, + "grad_norm": 1558.4129638671875, + "learning_rate": 2.464e-05, + "loss": 179.4715, + "step": 15400 + }, + { + "epoch": 0.06225834993151985, + "grad_norm": 868.0443115234375, + "learning_rate": 2.4656e-05, + "loss": 173.5915, + "step": 15410 + }, + { + "epoch": 0.062298751196887485, + "grad_norm": 1692.47802734375, + "learning_rate": 2.4672000000000004e-05, + "loss": 163.5249, + "step": 15420 + }, + { + "epoch": 0.06233915246225512, + "grad_norm": 823.198486328125, + "learning_rate": 2.4688e-05, + "loss": 142.6176, + "step": 15430 + }, + { + "epoch": 0.06237955372762275, + "grad_norm": 1579.255615234375, + "learning_rate": 2.4704000000000003e-05, + "loss": 245.4491, + "step": 15440 + }, + { + "epoch": 0.06241995499299038, + "grad_norm": 1782.1748046875, + "learning_rate": 2.472e-05, + "loss": 158.9307, + "step": 15450 + }, + { + "epoch": 0.06246035625835801, + "grad_norm": 885.89013671875, + "learning_rate": 2.4736e-05, + "loss": 123.9583, + "step": 15460 + }, + { + "epoch": 0.06250075752372564, + "grad_norm": 1921.191162109375, + "learning_rate": 2.4752000000000003e-05, + "loss": 182.9187, + "step": 15470 + }, + { + "epoch": 0.06254115878909328, + "grad_norm": 1945.1468505859375, + "learning_rate": 2.4768e-05, + "loss": 125.0932, + "step": 15480 + }, + { + "epoch": 0.0625815600544609, + "grad_norm": 1464.1646728515625, + "learning_rate": 2.4784000000000005e-05, + "loss": 178.6396, + "step": 15490 + }, + { + "epoch": 0.06262196131982854, + "grad_norm": 1363.4080810546875, + "learning_rate": 2.4800000000000003e-05, + "loss": 199.8617, + "step": 15500 + }, + { + "epoch": 0.06266236258519617, + "grad_norm": 1780.32666015625, + "learning_rate": 2.4816e-05, + "loss": 146.7752, + "step": 15510 + }, + { + "epoch": 0.0627027638505638, + "grad_norm": 1167.7327880859375, + "learning_rate": 2.4832000000000002e-05, + "loss": 146.4875, + "step": 15520 + }, + { + "epoch": 0.06274316511593143, + "grad_norm": 1051.5430908203125, + "learning_rate": 2.4848e-05, + "loss": 149.0116, + "step": 15530 + }, + { + "epoch": 0.06278356638129906, + "grad_norm": 800.5545654296875, + "learning_rate": 2.4864000000000005e-05, + "loss": 129.605, + "step": 15540 + }, + { + "epoch": 0.06282396764666669, + "grad_norm": 1484.2762451171875, + "learning_rate": 2.4880000000000002e-05, + "loss": 153.8512, + "step": 15550 + }, + { + "epoch": 0.06286436891203433, + "grad_norm": 1116.9107666015625, + "learning_rate": 2.4896e-05, + "loss": 181.5804, + "step": 15560 + }, + { + "epoch": 0.06290477017740195, + "grad_norm": 1099.352783203125, + "learning_rate": 2.4912e-05, + "loss": 176.6984, + "step": 15570 + }, + { + "epoch": 0.06294517144276959, + "grad_norm": 969.045166015625, + "learning_rate": 2.4928000000000003e-05, + "loss": 167.2933, + "step": 15580 + }, + { + "epoch": 0.06298557270813722, + "grad_norm": 3436.98486328125, + "learning_rate": 2.4944000000000004e-05, + "loss": 221.8033, + "step": 15590 + }, + { + "epoch": 0.06302597397350485, + "grad_norm": 1358.7958984375, + "learning_rate": 2.496e-05, + "loss": 212.1142, + "step": 15600 + }, + { + "epoch": 0.06306637523887249, + "grad_norm": 991.5151977539062, + "learning_rate": 2.4976e-05, + "loss": 155.7989, + "step": 15610 + }, + { + "epoch": 0.06310677650424011, + "grad_norm": 1949.333740234375, + "learning_rate": 2.4992000000000004e-05, + "loss": 167.794, + "step": 15620 + }, + { + "epoch": 0.06314717776960774, + "grad_norm": 1397.4315185546875, + "learning_rate": 2.5008000000000002e-05, + "loss": 170.0522, + "step": 15630 + }, + { + "epoch": 0.06318757903497538, + "grad_norm": 859.2783813476562, + "learning_rate": 2.5024000000000003e-05, + "loss": 197.5159, + "step": 15640 + }, + { + "epoch": 0.063227980300343, + "grad_norm": 5312.9580078125, + "learning_rate": 2.504e-05, + "loss": 218.9037, + "step": 15650 + }, + { + "epoch": 0.06326838156571064, + "grad_norm": 888.1822509765625, + "learning_rate": 2.5056e-05, + "loss": 176.3435, + "step": 15660 + }, + { + "epoch": 0.06330878283107827, + "grad_norm": 1954.6416015625, + "learning_rate": 2.5072000000000003e-05, + "loss": 123.3197, + "step": 15670 + }, + { + "epoch": 0.0633491840964459, + "grad_norm": 1060.547119140625, + "learning_rate": 2.5088e-05, + "loss": 202.8483, + "step": 15680 + }, + { + "epoch": 0.06338958536181354, + "grad_norm": 1446.5245361328125, + "learning_rate": 2.5104000000000002e-05, + "loss": 196.876, + "step": 15690 + }, + { + "epoch": 0.06342998662718116, + "grad_norm": 1314.761474609375, + "learning_rate": 2.5120000000000003e-05, + "loss": 154.1555, + "step": 15700 + }, + { + "epoch": 0.06347038789254879, + "grad_norm": 565.6538696289062, + "learning_rate": 2.5136e-05, + "loss": 126.4601, + "step": 15710 + }, + { + "epoch": 0.06351078915791643, + "grad_norm": 834.8342895507812, + "learning_rate": 2.5152000000000002e-05, + "loss": 184.4603, + "step": 15720 + }, + { + "epoch": 0.06355119042328405, + "grad_norm": 4520.55810546875, + "learning_rate": 2.5168e-05, + "loss": 137.8728, + "step": 15730 + }, + { + "epoch": 0.0635915916886517, + "grad_norm": 1665.9130859375, + "learning_rate": 2.5184000000000005e-05, + "loss": 178.4856, + "step": 15740 + }, + { + "epoch": 0.06363199295401932, + "grad_norm": 1707.5582275390625, + "learning_rate": 2.5200000000000003e-05, + "loss": 174.7732, + "step": 15750 + }, + { + "epoch": 0.06367239421938695, + "grad_norm": 862.8652954101562, + "learning_rate": 2.5216e-05, + "loss": 148.2816, + "step": 15760 + }, + { + "epoch": 0.06371279548475459, + "grad_norm": 887.178955078125, + "learning_rate": 2.5232e-05, + "loss": 171.1535, + "step": 15770 + }, + { + "epoch": 0.06375319675012221, + "grad_norm": 1456.6793212890625, + "learning_rate": 2.5248000000000003e-05, + "loss": 168.7933, + "step": 15780 + }, + { + "epoch": 0.06379359801548984, + "grad_norm": 1489.5843505859375, + "learning_rate": 2.5264000000000004e-05, + "loss": 169.4203, + "step": 15790 + }, + { + "epoch": 0.06383399928085748, + "grad_norm": 816.1390991210938, + "learning_rate": 2.5280000000000002e-05, + "loss": 236.9377, + "step": 15800 + }, + { + "epoch": 0.0638744005462251, + "grad_norm": 1159.039794921875, + "learning_rate": 2.5296e-05, + "loss": 131.3749, + "step": 15810 + }, + { + "epoch": 0.06391480181159274, + "grad_norm": 960.1936645507812, + "learning_rate": 2.5312000000000004e-05, + "loss": 141.835, + "step": 15820 + }, + { + "epoch": 0.06395520307696037, + "grad_norm": 3455.357177734375, + "learning_rate": 2.5328000000000002e-05, + "loss": 152.1159, + "step": 15830 + }, + { + "epoch": 0.063995604342328, + "grad_norm": 1454.54052734375, + "learning_rate": 2.5344000000000003e-05, + "loss": 100.5891, + "step": 15840 + }, + { + "epoch": 0.06403600560769564, + "grad_norm": 1210.58642578125, + "learning_rate": 2.536e-05, + "loss": 149.9548, + "step": 15850 + }, + { + "epoch": 0.06407640687306326, + "grad_norm": 1093.5389404296875, + "learning_rate": 2.5376e-05, + "loss": 166.8361, + "step": 15860 + }, + { + "epoch": 0.06411680813843089, + "grad_norm": 970.94970703125, + "learning_rate": 2.5392000000000004e-05, + "loss": 153.5655, + "step": 15870 + }, + { + "epoch": 0.06415720940379853, + "grad_norm": 838.3987426757812, + "learning_rate": 2.5408e-05, + "loss": 107.348, + "step": 15880 + }, + { + "epoch": 0.06419761066916616, + "grad_norm": 1320.478515625, + "learning_rate": 2.5424000000000003e-05, + "loss": 258.1474, + "step": 15890 + }, + { + "epoch": 0.0642380119345338, + "grad_norm": 828.8936767578125, + "learning_rate": 2.5440000000000004e-05, + "loss": 135.7182, + "step": 15900 + }, + { + "epoch": 0.06427841319990142, + "grad_norm": 2003.343505859375, + "learning_rate": 2.5456e-05, + "loss": 133.851, + "step": 15910 + }, + { + "epoch": 0.06431881446526905, + "grad_norm": 868.5172729492188, + "learning_rate": 2.5472000000000003e-05, + "loss": 162.7141, + "step": 15920 + }, + { + "epoch": 0.06435921573063669, + "grad_norm": 1344.43798828125, + "learning_rate": 2.5488e-05, + "loss": 222.709, + "step": 15930 + }, + { + "epoch": 0.06439961699600431, + "grad_norm": 599.7905883789062, + "learning_rate": 2.5504e-05, + "loss": 143.4741, + "step": 15940 + }, + { + "epoch": 0.06444001826137194, + "grad_norm": 721.102294921875, + "learning_rate": 2.5520000000000003e-05, + "loss": 128.2398, + "step": 15950 + }, + { + "epoch": 0.06448041952673958, + "grad_norm": 816.4972534179688, + "learning_rate": 2.5536e-05, + "loss": 152.2355, + "step": 15960 + }, + { + "epoch": 0.0645208207921072, + "grad_norm": 1271.5546875, + "learning_rate": 2.5552000000000002e-05, + "loss": 182.0818, + "step": 15970 + }, + { + "epoch": 0.06456122205747485, + "grad_norm": 1532.1783447265625, + "learning_rate": 2.5568000000000003e-05, + "loss": 197.6927, + "step": 15980 + }, + { + "epoch": 0.06460162332284247, + "grad_norm": 1093.992919921875, + "learning_rate": 2.5584e-05, + "loss": 184.1583, + "step": 15990 + }, + { + "epoch": 0.0646420245882101, + "grad_norm": 1920.5660400390625, + "learning_rate": 2.5600000000000002e-05, + "loss": 203.8719, + "step": 16000 + }, + { + "epoch": 0.06468242585357774, + "grad_norm": 11341.0087890625, + "learning_rate": 2.5616e-05, + "loss": 245.7065, + "step": 16010 + }, + { + "epoch": 0.06472282711894536, + "grad_norm": 2457.092529296875, + "learning_rate": 2.5632000000000005e-05, + "loss": 177.3619, + "step": 16020 + }, + { + "epoch": 0.06476322838431299, + "grad_norm": 1455.2933349609375, + "learning_rate": 2.5648000000000002e-05, + "loss": 194.29, + "step": 16030 + }, + { + "epoch": 0.06480362964968063, + "grad_norm": 946.679931640625, + "learning_rate": 2.5664e-05, + "loss": 199.3132, + "step": 16040 + }, + { + "epoch": 0.06484403091504826, + "grad_norm": 1318.2392578125, + "learning_rate": 2.568e-05, + "loss": 141.0971, + "step": 16050 + }, + { + "epoch": 0.0648844321804159, + "grad_norm": 1914.954345703125, + "learning_rate": 2.5696e-05, + "loss": 123.1144, + "step": 16060 + }, + { + "epoch": 0.06492483344578352, + "grad_norm": 1321.2059326171875, + "learning_rate": 2.5712000000000004e-05, + "loss": 172.8179, + "step": 16070 + }, + { + "epoch": 0.06496523471115115, + "grad_norm": 3343.878662109375, + "learning_rate": 2.5728e-05, + "loss": 160.9229, + "step": 16080 + }, + { + "epoch": 0.06500563597651879, + "grad_norm": 1544.3414306640625, + "learning_rate": 2.5744e-05, + "loss": 166.7863, + "step": 16090 + }, + { + "epoch": 0.06504603724188641, + "grad_norm": 6488.65771484375, + "learning_rate": 2.5760000000000004e-05, + "loss": 157.5711, + "step": 16100 + }, + { + "epoch": 0.06508643850725404, + "grad_norm": 1307.9971923828125, + "learning_rate": 2.5776000000000002e-05, + "loss": 130.4945, + "step": 16110 + }, + { + "epoch": 0.06512683977262168, + "grad_norm": 1637.953125, + "learning_rate": 2.5792000000000003e-05, + "loss": 124.7523, + "step": 16120 + }, + { + "epoch": 0.0651672410379893, + "grad_norm": 4761.41259765625, + "learning_rate": 2.5808e-05, + "loss": 198.0917, + "step": 16130 + }, + { + "epoch": 0.06520764230335695, + "grad_norm": 2059.017333984375, + "learning_rate": 2.5824e-05, + "loss": 224.5677, + "step": 16140 + }, + { + "epoch": 0.06524804356872457, + "grad_norm": 948.0253295898438, + "learning_rate": 2.5840000000000003e-05, + "loss": 189.1101, + "step": 16150 + }, + { + "epoch": 0.0652884448340922, + "grad_norm": 1249.06982421875, + "learning_rate": 2.5856e-05, + "loss": 138.2282, + "step": 16160 + }, + { + "epoch": 0.06532884609945984, + "grad_norm": 1457.1297607421875, + "learning_rate": 2.5872000000000002e-05, + "loss": 193.1476, + "step": 16170 + }, + { + "epoch": 0.06536924736482747, + "grad_norm": 1416.79345703125, + "learning_rate": 2.5888000000000004e-05, + "loss": 134.1458, + "step": 16180 + }, + { + "epoch": 0.06540964863019509, + "grad_norm": 1181.331298828125, + "learning_rate": 2.5904e-05, + "loss": 192.2691, + "step": 16190 + }, + { + "epoch": 0.06545004989556273, + "grad_norm": 1155.0457763671875, + "learning_rate": 2.5920000000000003e-05, + "loss": 161.9441, + "step": 16200 + }, + { + "epoch": 0.06549045116093036, + "grad_norm": 959.2298583984375, + "learning_rate": 2.5936e-05, + "loss": 197.6703, + "step": 16210 + }, + { + "epoch": 0.065530852426298, + "grad_norm": 726.1742553710938, + "learning_rate": 2.5952000000000005e-05, + "loss": 144.9934, + "step": 16220 + }, + { + "epoch": 0.06557125369166562, + "grad_norm": 1051.063720703125, + "learning_rate": 2.5968000000000003e-05, + "loss": 192.3852, + "step": 16230 + }, + { + "epoch": 0.06561165495703325, + "grad_norm": 1421.943359375, + "learning_rate": 2.5984e-05, + "loss": 146.0789, + "step": 16240 + }, + { + "epoch": 0.06565205622240089, + "grad_norm": 733.9120483398438, + "learning_rate": 2.6000000000000002e-05, + "loss": 118.5099, + "step": 16250 + }, + { + "epoch": 0.06569245748776852, + "grad_norm": 490.5064697265625, + "learning_rate": 2.6016e-05, + "loss": 215.8195, + "step": 16260 + }, + { + "epoch": 0.06573285875313614, + "grad_norm": 913.6560668945312, + "learning_rate": 2.6032000000000004e-05, + "loss": 170.7989, + "step": 16270 + }, + { + "epoch": 0.06577326001850378, + "grad_norm": 2034.986083984375, + "learning_rate": 2.6048000000000002e-05, + "loss": 236.7844, + "step": 16280 + }, + { + "epoch": 0.06581366128387141, + "grad_norm": 562.3770751953125, + "learning_rate": 2.6064e-05, + "loss": 147.8844, + "step": 16290 + }, + { + "epoch": 0.06585406254923905, + "grad_norm": 1596.8494873046875, + "learning_rate": 2.6080000000000004e-05, + "loss": 213.8854, + "step": 16300 + }, + { + "epoch": 0.06589446381460667, + "grad_norm": 1114.106201171875, + "learning_rate": 2.6096000000000002e-05, + "loss": 138.1682, + "step": 16310 + }, + { + "epoch": 0.0659348650799743, + "grad_norm": 973.8516845703125, + "learning_rate": 2.6112000000000003e-05, + "loss": 136.866, + "step": 16320 + }, + { + "epoch": 0.06597526634534194, + "grad_norm": 740.34521484375, + "learning_rate": 2.6128e-05, + "loss": 200.7124, + "step": 16330 + }, + { + "epoch": 0.06601566761070957, + "grad_norm": 1407.721923828125, + "learning_rate": 2.6144e-05, + "loss": 160.1144, + "step": 16340 + }, + { + "epoch": 0.06605606887607719, + "grad_norm": 2428.232421875, + "learning_rate": 2.6160000000000004e-05, + "loss": 239.0257, + "step": 16350 + }, + { + "epoch": 0.06609647014144483, + "grad_norm": 783.7161254882812, + "learning_rate": 2.6176e-05, + "loss": 131.6811, + "step": 16360 + }, + { + "epoch": 0.06613687140681246, + "grad_norm": 570.0099487304688, + "learning_rate": 2.6192000000000003e-05, + "loss": 132.0175, + "step": 16370 + }, + { + "epoch": 0.0661772726721801, + "grad_norm": 1306.68310546875, + "learning_rate": 2.6208000000000004e-05, + "loss": 154.7275, + "step": 16380 + }, + { + "epoch": 0.06621767393754772, + "grad_norm": 718.3677978515625, + "learning_rate": 2.6224e-05, + "loss": 160.8331, + "step": 16390 + }, + { + "epoch": 0.06625807520291535, + "grad_norm": 1221.4266357421875, + "learning_rate": 2.6240000000000003e-05, + "loss": 125.6958, + "step": 16400 + }, + { + "epoch": 0.06629847646828299, + "grad_norm": 2354.071044921875, + "learning_rate": 2.6256e-05, + "loss": 153.9134, + "step": 16410 + }, + { + "epoch": 0.06633887773365062, + "grad_norm": 976.1270751953125, + "learning_rate": 2.6272000000000005e-05, + "loss": 130.7828, + "step": 16420 + }, + { + "epoch": 0.06637927899901824, + "grad_norm": 2494.61865234375, + "learning_rate": 2.6288000000000003e-05, + "loss": 113.0146, + "step": 16430 + }, + { + "epoch": 0.06641968026438588, + "grad_norm": 830.2864990234375, + "learning_rate": 2.6304e-05, + "loss": 120.6537, + "step": 16440 + }, + { + "epoch": 0.06646008152975351, + "grad_norm": 481.46295166015625, + "learning_rate": 2.6320000000000002e-05, + "loss": 135.0639, + "step": 16450 + }, + { + "epoch": 0.06650048279512115, + "grad_norm": 545.0795288085938, + "learning_rate": 2.6336e-05, + "loss": 141.4272, + "step": 16460 + }, + { + "epoch": 0.06654088406048878, + "grad_norm": 1272.570556640625, + "learning_rate": 2.6352000000000005e-05, + "loss": 140.6075, + "step": 16470 + }, + { + "epoch": 0.0665812853258564, + "grad_norm": 1238.59326171875, + "learning_rate": 2.6368000000000002e-05, + "loss": 126.1327, + "step": 16480 + }, + { + "epoch": 0.06662168659122404, + "grad_norm": 1715.858154296875, + "learning_rate": 2.6384e-05, + "loss": 120.6355, + "step": 16490 + }, + { + "epoch": 0.06666208785659167, + "grad_norm": 1026.4405517578125, + "learning_rate": 2.6400000000000005e-05, + "loss": 202.4467, + "step": 16500 + }, + { + "epoch": 0.0667024891219593, + "grad_norm": 760.9880981445312, + "learning_rate": 2.6416000000000003e-05, + "loss": 158.297, + "step": 16510 + }, + { + "epoch": 0.06674289038732693, + "grad_norm": 850.3055419921875, + "learning_rate": 2.6432000000000004e-05, + "loss": 156.7254, + "step": 16520 + }, + { + "epoch": 0.06678329165269456, + "grad_norm": 632.5795288085938, + "learning_rate": 2.6448e-05, + "loss": 140.229, + "step": 16530 + }, + { + "epoch": 0.0668236929180622, + "grad_norm": 2129.2236328125, + "learning_rate": 2.6464e-05, + "loss": 186.6124, + "step": 16540 + }, + { + "epoch": 0.06686409418342983, + "grad_norm": 1143.651123046875, + "learning_rate": 2.6480000000000004e-05, + "loss": 175.8427, + "step": 16550 + }, + { + "epoch": 0.06690449544879745, + "grad_norm": 1214.3197021484375, + "learning_rate": 2.6496000000000002e-05, + "loss": 145.5008, + "step": 16560 + }, + { + "epoch": 0.06694489671416509, + "grad_norm": 586.2470703125, + "learning_rate": 2.6512e-05, + "loss": 144.2627, + "step": 16570 + }, + { + "epoch": 0.06698529797953272, + "grad_norm": 1379.0516357421875, + "learning_rate": 2.6528000000000004e-05, + "loss": 148.8099, + "step": 16580 + }, + { + "epoch": 0.06702569924490034, + "grad_norm": 1403.1612548828125, + "learning_rate": 2.6544000000000002e-05, + "loss": 219.5404, + "step": 16590 + }, + { + "epoch": 0.06706610051026798, + "grad_norm": 1394.89306640625, + "learning_rate": 2.6560000000000003e-05, + "loss": 184.0305, + "step": 16600 + }, + { + "epoch": 0.06710650177563561, + "grad_norm": 551.115478515625, + "learning_rate": 2.6576e-05, + "loss": 152.7451, + "step": 16610 + }, + { + "epoch": 0.06714690304100325, + "grad_norm": 802.965576171875, + "learning_rate": 2.6592e-05, + "loss": 191.4807, + "step": 16620 + }, + { + "epoch": 0.06718730430637088, + "grad_norm": 1042.13427734375, + "learning_rate": 2.6608000000000003e-05, + "loss": 266.9655, + "step": 16630 + }, + { + "epoch": 0.0672277055717385, + "grad_norm": 1988.9599609375, + "learning_rate": 2.6624e-05, + "loss": 221.7701, + "step": 16640 + }, + { + "epoch": 0.06726810683710614, + "grad_norm": 1135.5465087890625, + "learning_rate": 2.6640000000000002e-05, + "loss": 141.6292, + "step": 16650 + }, + { + "epoch": 0.06730850810247377, + "grad_norm": 1168.9017333984375, + "learning_rate": 2.6656e-05, + "loss": 142.4856, + "step": 16660 + }, + { + "epoch": 0.0673489093678414, + "grad_norm": 821.0917358398438, + "learning_rate": 2.6672e-05, + "loss": 147.9627, + "step": 16670 + }, + { + "epoch": 0.06738931063320903, + "grad_norm": 1472.503173828125, + "learning_rate": 2.6688000000000003e-05, + "loss": 154.0404, + "step": 16680 + }, + { + "epoch": 0.06742971189857666, + "grad_norm": 451.1228942871094, + "learning_rate": 2.6704e-05, + "loss": 135.3651, + "step": 16690 + }, + { + "epoch": 0.0674701131639443, + "grad_norm": 707.6647338867188, + "learning_rate": 2.6720000000000005e-05, + "loss": 152.0818, + "step": 16700 + }, + { + "epoch": 0.06751051442931193, + "grad_norm": 789.9301147460938, + "learning_rate": 2.6736000000000003e-05, + "loss": 149.7495, + "step": 16710 + }, + { + "epoch": 0.06755091569467955, + "grad_norm": 772.65283203125, + "learning_rate": 2.6752e-05, + "loss": 148.9099, + "step": 16720 + }, + { + "epoch": 0.06759131696004719, + "grad_norm": 1156.409423828125, + "learning_rate": 2.6768000000000002e-05, + "loss": 165.1214, + "step": 16730 + }, + { + "epoch": 0.06763171822541482, + "grad_norm": 767.896240234375, + "learning_rate": 2.6784e-05, + "loss": 126.301, + "step": 16740 + }, + { + "epoch": 0.06767211949078245, + "grad_norm": 909.0670776367188, + "learning_rate": 2.6800000000000004e-05, + "loss": 129.9579, + "step": 16750 + }, + { + "epoch": 0.06771252075615009, + "grad_norm": 1180.4197998046875, + "learning_rate": 2.6816000000000002e-05, + "loss": 130.0526, + "step": 16760 + }, + { + "epoch": 0.06775292202151771, + "grad_norm": 885.28125, + "learning_rate": 2.6832e-05, + "loss": 158.2902, + "step": 16770 + }, + { + "epoch": 0.06779332328688535, + "grad_norm": 1079.5313720703125, + "learning_rate": 2.6848000000000005e-05, + "loss": 179.5551, + "step": 16780 + }, + { + "epoch": 0.06783372455225298, + "grad_norm": 1556.186279296875, + "learning_rate": 2.6864000000000002e-05, + "loss": 178.4147, + "step": 16790 + }, + { + "epoch": 0.0678741258176206, + "grad_norm": 1662.3153076171875, + "learning_rate": 2.6880000000000004e-05, + "loss": 162.2894, + "step": 16800 + }, + { + "epoch": 0.06791452708298824, + "grad_norm": 1424.922607421875, + "learning_rate": 2.6896e-05, + "loss": 230.0277, + "step": 16810 + }, + { + "epoch": 0.06795492834835587, + "grad_norm": 1668.8187255859375, + "learning_rate": 2.6912e-05, + "loss": 167.8363, + "step": 16820 + }, + { + "epoch": 0.0679953296137235, + "grad_norm": 1573.07861328125, + "learning_rate": 2.6928000000000004e-05, + "loss": 148.6213, + "step": 16830 + }, + { + "epoch": 0.06803573087909114, + "grad_norm": 1306.1380615234375, + "learning_rate": 2.6944e-05, + "loss": 226.4797, + "step": 16840 + }, + { + "epoch": 0.06807613214445876, + "grad_norm": 2957.28466796875, + "learning_rate": 2.6960000000000003e-05, + "loss": 190.6916, + "step": 16850 + }, + { + "epoch": 0.0681165334098264, + "grad_norm": 3546.421630859375, + "learning_rate": 2.6976e-05, + "loss": 165.3576, + "step": 16860 + }, + { + "epoch": 0.06815693467519403, + "grad_norm": 987.8593139648438, + "learning_rate": 2.6992000000000002e-05, + "loss": 178.4886, + "step": 16870 + }, + { + "epoch": 0.06819733594056165, + "grad_norm": 2393.57861328125, + "learning_rate": 2.7008000000000003e-05, + "loss": 159.4878, + "step": 16880 + }, + { + "epoch": 0.0682377372059293, + "grad_norm": 1266.3116455078125, + "learning_rate": 2.7024e-05, + "loss": 139.0926, + "step": 16890 + }, + { + "epoch": 0.06827813847129692, + "grad_norm": 982.8475341796875, + "learning_rate": 2.7040000000000005e-05, + "loss": 200.8793, + "step": 16900 + }, + { + "epoch": 0.06831853973666455, + "grad_norm": 1040.5333251953125, + "learning_rate": 2.7056000000000003e-05, + "loss": 128.8209, + "step": 16910 + }, + { + "epoch": 0.06835894100203219, + "grad_norm": 2438.3291015625, + "learning_rate": 2.7072e-05, + "loss": 230.3979, + "step": 16920 + }, + { + "epoch": 0.06839934226739981, + "grad_norm": 1081.27783203125, + "learning_rate": 2.7088000000000002e-05, + "loss": 156.8892, + "step": 16930 + }, + { + "epoch": 0.06843974353276745, + "grad_norm": 514.3671264648438, + "learning_rate": 2.7104e-05, + "loss": 156.4488, + "step": 16940 + }, + { + "epoch": 0.06848014479813508, + "grad_norm": 772.23046875, + "learning_rate": 2.7120000000000005e-05, + "loss": 116.2345, + "step": 16950 + }, + { + "epoch": 0.0685205460635027, + "grad_norm": 1381.51025390625, + "learning_rate": 2.7136000000000002e-05, + "loss": 202.5787, + "step": 16960 + }, + { + "epoch": 0.06856094732887034, + "grad_norm": 2335.098876953125, + "learning_rate": 2.7152e-05, + "loss": 197.7046, + "step": 16970 + }, + { + "epoch": 0.06860134859423797, + "grad_norm": 777.8046264648438, + "learning_rate": 2.7168000000000005e-05, + "loss": 213.4342, + "step": 16980 + }, + { + "epoch": 0.0686417498596056, + "grad_norm": 2403.986572265625, + "learning_rate": 2.7184000000000003e-05, + "loss": 185.1913, + "step": 16990 + }, + { + "epoch": 0.06868215112497324, + "grad_norm": 909.466064453125, + "learning_rate": 2.7200000000000004e-05, + "loss": 255.2025, + "step": 17000 + }, + { + "epoch": 0.06872255239034086, + "grad_norm": 1579.209228515625, + "learning_rate": 2.7216e-05, + "loss": 143.354, + "step": 17010 + }, + { + "epoch": 0.0687629536557085, + "grad_norm": 2410.959716796875, + "learning_rate": 2.7232e-05, + "loss": 174.4973, + "step": 17020 + }, + { + "epoch": 0.06880335492107613, + "grad_norm": 873.26318359375, + "learning_rate": 2.7248000000000004e-05, + "loss": 200.3045, + "step": 17030 + }, + { + "epoch": 0.06884375618644376, + "grad_norm": 2788.401123046875, + "learning_rate": 2.7264000000000002e-05, + "loss": 182.6357, + "step": 17040 + }, + { + "epoch": 0.0688841574518114, + "grad_norm": 706.1448364257812, + "learning_rate": 2.7280000000000003e-05, + "loss": 174.6851, + "step": 17050 + }, + { + "epoch": 0.06892455871717902, + "grad_norm": 977.5054321289062, + "learning_rate": 2.7296e-05, + "loss": 143.1549, + "step": 17060 + }, + { + "epoch": 0.06896495998254665, + "grad_norm": 1838.663330078125, + "learning_rate": 2.7312000000000002e-05, + "loss": 128.8516, + "step": 17070 + }, + { + "epoch": 0.06900536124791429, + "grad_norm": 1287.214111328125, + "learning_rate": 2.7328000000000003e-05, + "loss": 116.2414, + "step": 17080 + }, + { + "epoch": 0.06904576251328191, + "grad_norm": 732.8470458984375, + "learning_rate": 2.7344e-05, + "loss": 102.9589, + "step": 17090 + }, + { + "epoch": 0.06908616377864955, + "grad_norm": 1503.9390869140625, + "learning_rate": 2.7360000000000006e-05, + "loss": 148.5086, + "step": 17100 + }, + { + "epoch": 0.06912656504401718, + "grad_norm": 828.2022705078125, + "learning_rate": 2.7376000000000004e-05, + "loss": 167.7288, + "step": 17110 + }, + { + "epoch": 0.0691669663093848, + "grad_norm": 1106.20751953125, + "learning_rate": 2.7392e-05, + "loss": 176.8142, + "step": 17120 + }, + { + "epoch": 0.06920736757475245, + "grad_norm": 1061.0313720703125, + "learning_rate": 2.7408000000000003e-05, + "loss": 149.9585, + "step": 17130 + }, + { + "epoch": 0.06924776884012007, + "grad_norm": 1465.9508056640625, + "learning_rate": 2.7424e-05, + "loss": 152.9072, + "step": 17140 + }, + { + "epoch": 0.0692881701054877, + "grad_norm": 7130.65673828125, + "learning_rate": 2.7440000000000005e-05, + "loss": 195.1458, + "step": 17150 + }, + { + "epoch": 0.06932857137085534, + "grad_norm": 7064.46923828125, + "learning_rate": 2.7456000000000003e-05, + "loss": 145.7483, + "step": 17160 + }, + { + "epoch": 0.06936897263622296, + "grad_norm": 704.233154296875, + "learning_rate": 2.7472e-05, + "loss": 175.6077, + "step": 17170 + }, + { + "epoch": 0.0694093739015906, + "grad_norm": 4458.50634765625, + "learning_rate": 2.7488000000000005e-05, + "loss": 166.6151, + "step": 17180 + }, + { + "epoch": 0.06944977516695823, + "grad_norm": 967.6094360351562, + "learning_rate": 2.7504000000000003e-05, + "loss": 271.6618, + "step": 17190 + }, + { + "epoch": 0.06949017643232586, + "grad_norm": 801.8226318359375, + "learning_rate": 2.752e-05, + "loss": 160.7607, + "step": 17200 + }, + { + "epoch": 0.0695305776976935, + "grad_norm": 839.94384765625, + "learning_rate": 2.7536000000000002e-05, + "loss": 131.9188, + "step": 17210 + }, + { + "epoch": 0.06957097896306112, + "grad_norm": 1940.8067626953125, + "learning_rate": 2.7552e-05, + "loss": 185.3835, + "step": 17220 + }, + { + "epoch": 0.06961138022842875, + "grad_norm": 1523.3514404296875, + "learning_rate": 2.7568000000000004e-05, + "loss": 166.4924, + "step": 17230 + }, + { + "epoch": 0.06965178149379639, + "grad_norm": 922.7266845703125, + "learning_rate": 2.7584000000000002e-05, + "loss": 115.7828, + "step": 17240 + }, + { + "epoch": 0.06969218275916401, + "grad_norm": 298.5150451660156, + "learning_rate": 2.76e-05, + "loss": 142.0611, + "step": 17250 + }, + { + "epoch": 0.06973258402453165, + "grad_norm": 512.4647827148438, + "learning_rate": 2.7616e-05, + "loss": 86.2635, + "step": 17260 + }, + { + "epoch": 0.06977298528989928, + "grad_norm": 817.6835327148438, + "learning_rate": 2.7632000000000002e-05, + "loss": 136.5038, + "step": 17270 + }, + { + "epoch": 0.06981338655526691, + "grad_norm": 6070.59326171875, + "learning_rate": 2.7648000000000004e-05, + "loss": 313.964, + "step": 17280 + }, + { + "epoch": 0.06985378782063455, + "grad_norm": 1589.6690673828125, + "learning_rate": 2.7664e-05, + "loss": 196.3971, + "step": 17290 + }, + { + "epoch": 0.06989418908600217, + "grad_norm": 2827.360107421875, + "learning_rate": 2.768e-05, + "loss": 187.2893, + "step": 17300 + }, + { + "epoch": 0.0699345903513698, + "grad_norm": 1332.740234375, + "learning_rate": 2.7696000000000004e-05, + "loss": 164.2328, + "step": 17310 + }, + { + "epoch": 0.06997499161673744, + "grad_norm": 934.0603637695312, + "learning_rate": 2.7712e-05, + "loss": 164.4768, + "step": 17320 + }, + { + "epoch": 0.07001539288210507, + "grad_norm": 566.90966796875, + "learning_rate": 2.7728000000000003e-05, + "loss": 150.5375, + "step": 17330 + }, + { + "epoch": 0.0700557941474727, + "grad_norm": 737.4328002929688, + "learning_rate": 2.7744e-05, + "loss": 161.6458, + "step": 17340 + }, + { + "epoch": 0.07009619541284033, + "grad_norm": 642.9918823242188, + "learning_rate": 2.7760000000000002e-05, + "loss": 174.9653, + "step": 17350 + }, + { + "epoch": 0.07013659667820796, + "grad_norm": 1694.597412109375, + "learning_rate": 2.7776000000000003e-05, + "loss": 169.8124, + "step": 17360 + }, + { + "epoch": 0.0701769979435756, + "grad_norm": 3195.741455078125, + "learning_rate": 2.7792e-05, + "loss": 223.3085, + "step": 17370 + }, + { + "epoch": 0.07021739920894322, + "grad_norm": 1232.7979736328125, + "learning_rate": 2.7808000000000005e-05, + "loss": 190.8486, + "step": 17380 + }, + { + "epoch": 0.07025780047431085, + "grad_norm": 914.6426391601562, + "learning_rate": 2.7824000000000003e-05, + "loss": 136.7934, + "step": 17390 + }, + { + "epoch": 0.07029820173967849, + "grad_norm": 872.2842407226562, + "learning_rate": 2.784e-05, + "loss": 86.614, + "step": 17400 + }, + { + "epoch": 0.07033860300504612, + "grad_norm": 698.7125244140625, + "learning_rate": 2.7856000000000002e-05, + "loss": 233.3906, + "step": 17410 + }, + { + "epoch": 0.07037900427041376, + "grad_norm": 1155.395751953125, + "learning_rate": 2.7872e-05, + "loss": 226.2394, + "step": 17420 + }, + { + "epoch": 0.07041940553578138, + "grad_norm": 858.9786376953125, + "learning_rate": 2.7888000000000005e-05, + "loss": 134.8895, + "step": 17430 + }, + { + "epoch": 0.07045980680114901, + "grad_norm": 1098.1910400390625, + "learning_rate": 2.7904000000000003e-05, + "loss": 131.8406, + "step": 17440 + }, + { + "epoch": 0.07050020806651665, + "grad_norm": 830.5571899414062, + "learning_rate": 2.792e-05, + "loss": 110.891, + "step": 17450 + }, + { + "epoch": 0.07054060933188427, + "grad_norm": 1311.156005859375, + "learning_rate": 2.7936e-05, + "loss": 160.1189, + "step": 17460 + }, + { + "epoch": 0.0705810105972519, + "grad_norm": 2313.90478515625, + "learning_rate": 2.7952000000000003e-05, + "loss": 163.2786, + "step": 17470 + }, + { + "epoch": 0.07062141186261954, + "grad_norm": 1204.2625732421875, + "learning_rate": 2.7968000000000004e-05, + "loss": 137.831, + "step": 17480 + }, + { + "epoch": 0.07066181312798717, + "grad_norm": 573.0549926757812, + "learning_rate": 2.7984000000000002e-05, + "loss": 131.9372, + "step": 17490 + }, + { + "epoch": 0.0707022143933548, + "grad_norm": 841.6285400390625, + "learning_rate": 2.8e-05, + "loss": 216.74, + "step": 17500 + }, + { + "epoch": 0.07074261565872243, + "grad_norm": 12401.87890625, + "learning_rate": 2.8016000000000004e-05, + "loss": 192.0267, + "step": 17510 + }, + { + "epoch": 0.07078301692409006, + "grad_norm": 1060.1409912109375, + "learning_rate": 2.8032000000000002e-05, + "loss": 118.6904, + "step": 17520 + }, + { + "epoch": 0.0708234181894577, + "grad_norm": 1797.1884765625, + "learning_rate": 2.8048000000000003e-05, + "loss": 151.0897, + "step": 17530 + }, + { + "epoch": 0.07086381945482532, + "grad_norm": 543.3251342773438, + "learning_rate": 2.8064e-05, + "loss": 128.8838, + "step": 17540 + }, + { + "epoch": 0.07090422072019295, + "grad_norm": 1241.7158203125, + "learning_rate": 2.8080000000000002e-05, + "loss": 225.5486, + "step": 17550 + }, + { + "epoch": 0.07094462198556059, + "grad_norm": 776.8890991210938, + "learning_rate": 2.8096000000000003e-05, + "loss": 192.7116, + "step": 17560 + }, + { + "epoch": 0.07098502325092822, + "grad_norm": 467.0813293457031, + "learning_rate": 2.8112e-05, + "loss": 107.9122, + "step": 17570 + }, + { + "epoch": 0.07102542451629586, + "grad_norm": 689.6055908203125, + "learning_rate": 2.8128000000000006e-05, + "loss": 153.7161, + "step": 17580 + }, + { + "epoch": 0.07106582578166348, + "grad_norm": 1045.6322021484375, + "learning_rate": 2.8144000000000004e-05, + "loss": 145.0255, + "step": 17590 + }, + { + "epoch": 0.07110622704703111, + "grad_norm": 800.5152587890625, + "learning_rate": 2.816e-05, + "loss": 226.8169, + "step": 17600 + }, + { + "epoch": 0.07114662831239875, + "grad_norm": 1582.339599609375, + "learning_rate": 2.8176000000000003e-05, + "loss": 156.5265, + "step": 17610 + }, + { + "epoch": 0.07118702957776638, + "grad_norm": 1183.8358154296875, + "learning_rate": 2.8192e-05, + "loss": 193.3922, + "step": 17620 + }, + { + "epoch": 0.071227430843134, + "grad_norm": 676.0487670898438, + "learning_rate": 2.8208000000000005e-05, + "loss": 143.6772, + "step": 17630 + }, + { + "epoch": 0.07126783210850164, + "grad_norm": 804.8961181640625, + "learning_rate": 2.8224000000000003e-05, + "loss": 111.7669, + "step": 17640 + }, + { + "epoch": 0.07130823337386927, + "grad_norm": 1682.43310546875, + "learning_rate": 2.824e-05, + "loss": 189.0991, + "step": 17650 + }, + { + "epoch": 0.07134863463923691, + "grad_norm": 759.1065673828125, + "learning_rate": 2.8256000000000002e-05, + "loss": 139.1096, + "step": 17660 + }, + { + "epoch": 0.07138903590460453, + "grad_norm": 2441.015869140625, + "learning_rate": 2.8272000000000003e-05, + "loss": 235.0398, + "step": 17670 + }, + { + "epoch": 0.07142943716997216, + "grad_norm": 1266.8140869140625, + "learning_rate": 2.8288000000000004e-05, + "loss": 215.3387, + "step": 17680 + }, + { + "epoch": 0.0714698384353398, + "grad_norm": 454.6260681152344, + "learning_rate": 2.8304000000000002e-05, + "loss": 167.471, + "step": 17690 + }, + { + "epoch": 0.07151023970070743, + "grad_norm": 4604.65185546875, + "learning_rate": 2.832e-05, + "loss": 153.389, + "step": 17700 + }, + { + "epoch": 0.07155064096607505, + "grad_norm": 1276.393798828125, + "learning_rate": 2.8336000000000004e-05, + "loss": 150.8094, + "step": 17710 + }, + { + "epoch": 0.07159104223144269, + "grad_norm": 984.7479248046875, + "learning_rate": 2.8352000000000002e-05, + "loss": 120.8513, + "step": 17720 + }, + { + "epoch": 0.07163144349681032, + "grad_norm": 952.474365234375, + "learning_rate": 2.8368000000000004e-05, + "loss": 175.2693, + "step": 17730 + }, + { + "epoch": 0.07167184476217796, + "grad_norm": 913.1341552734375, + "learning_rate": 2.8384e-05, + "loss": 136.3084, + "step": 17740 + }, + { + "epoch": 0.07171224602754558, + "grad_norm": 1072.4630126953125, + "learning_rate": 2.8400000000000003e-05, + "loss": 176.3062, + "step": 17750 + }, + { + "epoch": 0.07175264729291321, + "grad_norm": 952.9400634765625, + "learning_rate": 2.8416000000000004e-05, + "loss": 107.3849, + "step": 17760 + }, + { + "epoch": 0.07179304855828085, + "grad_norm": 1470.4320068359375, + "learning_rate": 2.8432e-05, + "loss": 205.5968, + "step": 17770 + }, + { + "epoch": 0.07183344982364848, + "grad_norm": 867.2557373046875, + "learning_rate": 2.8448000000000006e-05, + "loss": 188.5464, + "step": 17780 + }, + { + "epoch": 0.0718738510890161, + "grad_norm": 726.8024291992188, + "learning_rate": 2.8464000000000004e-05, + "loss": 176.3354, + "step": 17790 + }, + { + "epoch": 0.07191425235438374, + "grad_norm": 1209.099365234375, + "learning_rate": 2.8480000000000002e-05, + "loss": 124.015, + "step": 17800 + }, + { + "epoch": 0.07195465361975137, + "grad_norm": 1257.0675048828125, + "learning_rate": 2.8496000000000003e-05, + "loss": 139.4148, + "step": 17810 + }, + { + "epoch": 0.07199505488511901, + "grad_norm": 476.1322937011719, + "learning_rate": 2.8512e-05, + "loss": 143.7301, + "step": 17820 + }, + { + "epoch": 0.07203545615048663, + "grad_norm": 1515.902587890625, + "learning_rate": 2.8528e-05, + "loss": 134.5676, + "step": 17830 + }, + { + "epoch": 0.07207585741585426, + "grad_norm": 1408.635986328125, + "learning_rate": 2.8544000000000003e-05, + "loss": 146.5741, + "step": 17840 + }, + { + "epoch": 0.0721162586812219, + "grad_norm": 1577.3648681640625, + "learning_rate": 2.856e-05, + "loss": 188.9528, + "step": 17850 + }, + { + "epoch": 0.07215665994658953, + "grad_norm": 856.917236328125, + "learning_rate": 2.8576000000000002e-05, + "loss": 101.3158, + "step": 17860 + }, + { + "epoch": 0.07219706121195715, + "grad_norm": 558.7588500976562, + "learning_rate": 2.8592000000000003e-05, + "loss": 153.0461, + "step": 17870 + }, + { + "epoch": 0.0722374624773248, + "grad_norm": 2916.3544921875, + "learning_rate": 2.8608e-05, + "loss": 185.8032, + "step": 17880 + }, + { + "epoch": 0.07227786374269242, + "grad_norm": 1237.33984375, + "learning_rate": 2.8624000000000002e-05, + "loss": 192.9786, + "step": 17890 + }, + { + "epoch": 0.07231826500806006, + "grad_norm": 1126.301025390625, + "learning_rate": 2.864e-05, + "loss": 143.1352, + "step": 17900 + }, + { + "epoch": 0.07235866627342769, + "grad_norm": 1192.56591796875, + "learning_rate": 2.8656000000000005e-05, + "loss": 148.4612, + "step": 17910 + }, + { + "epoch": 0.07239906753879531, + "grad_norm": 1251.504638671875, + "learning_rate": 2.8672000000000003e-05, + "loss": 103.6479, + "step": 17920 + }, + { + "epoch": 0.07243946880416295, + "grad_norm": 675.5980834960938, + "learning_rate": 2.8688e-05, + "loss": 155.472, + "step": 17930 + }, + { + "epoch": 0.07247987006953058, + "grad_norm": 1813.1705322265625, + "learning_rate": 2.8704e-05, + "loss": 246.3732, + "step": 17940 + }, + { + "epoch": 0.0725202713348982, + "grad_norm": 1508.1116943359375, + "learning_rate": 2.8720000000000003e-05, + "loss": 135.4645, + "step": 17950 + }, + { + "epoch": 0.07256067260026584, + "grad_norm": 1017.2825317382812, + "learning_rate": 2.8736000000000004e-05, + "loss": 202.4383, + "step": 17960 + }, + { + "epoch": 0.07260107386563347, + "grad_norm": 794.519775390625, + "learning_rate": 2.8752000000000002e-05, + "loss": 143.7167, + "step": 17970 + }, + { + "epoch": 0.07264147513100111, + "grad_norm": 1667.383544921875, + "learning_rate": 2.8768e-05, + "loss": 124.9272, + "step": 17980 + }, + { + "epoch": 0.07268187639636874, + "grad_norm": 1473.98095703125, + "learning_rate": 2.8784000000000004e-05, + "loss": 145.3454, + "step": 17990 + }, + { + "epoch": 0.07272227766173636, + "grad_norm": 2079.574462890625, + "learning_rate": 2.8800000000000002e-05, + "loss": 144.7181, + "step": 18000 + }, + { + "epoch": 0.072762678927104, + "grad_norm": 719.4954223632812, + "learning_rate": 2.8816000000000003e-05, + "loss": 167.1594, + "step": 18010 + }, + { + "epoch": 0.07280308019247163, + "grad_norm": 788.9729614257812, + "learning_rate": 2.8832e-05, + "loss": 117.7792, + "step": 18020 + }, + { + "epoch": 0.07284348145783925, + "grad_norm": 683.7277221679688, + "learning_rate": 2.8848e-05, + "loss": 97.208, + "step": 18030 + }, + { + "epoch": 0.0728838827232069, + "grad_norm": 982.801025390625, + "learning_rate": 2.8864000000000004e-05, + "loss": 94.338, + "step": 18040 + }, + { + "epoch": 0.07292428398857452, + "grad_norm": 3670.46435546875, + "learning_rate": 2.888e-05, + "loss": 161.824, + "step": 18050 + }, + { + "epoch": 0.07296468525394216, + "grad_norm": 1476.6680908203125, + "learning_rate": 2.8896000000000003e-05, + "loss": 153.1308, + "step": 18060 + }, + { + "epoch": 0.07300508651930979, + "grad_norm": 3073.16455078125, + "learning_rate": 2.8912000000000004e-05, + "loss": 134.4426, + "step": 18070 + }, + { + "epoch": 0.07304548778467741, + "grad_norm": 2170.526611328125, + "learning_rate": 2.8928e-05, + "loss": 127.2971, + "step": 18080 + }, + { + "epoch": 0.07308588905004505, + "grad_norm": 663.6891479492188, + "learning_rate": 2.8944000000000003e-05, + "loss": 130.322, + "step": 18090 + }, + { + "epoch": 0.07312629031541268, + "grad_norm": 458.315185546875, + "learning_rate": 2.896e-05, + "loss": 92.6931, + "step": 18100 + }, + { + "epoch": 0.0731666915807803, + "grad_norm": 858.5078125, + "learning_rate": 2.8976000000000005e-05, + "loss": 139.1855, + "step": 18110 + }, + { + "epoch": 0.07320709284614794, + "grad_norm": 845.654052734375, + "learning_rate": 2.8992000000000003e-05, + "loss": 150.1132, + "step": 18120 + }, + { + "epoch": 0.07324749411151557, + "grad_norm": 1308.1099853515625, + "learning_rate": 2.9008e-05, + "loss": 184.3887, + "step": 18130 + }, + { + "epoch": 0.07328789537688321, + "grad_norm": 1765.68896484375, + "learning_rate": 2.9024000000000002e-05, + "loss": 175.224, + "step": 18140 + }, + { + "epoch": 0.07332829664225084, + "grad_norm": 1561.1123046875, + "learning_rate": 2.9040000000000003e-05, + "loss": 161.0827, + "step": 18150 + }, + { + "epoch": 0.07336869790761846, + "grad_norm": 623.8340454101562, + "learning_rate": 2.9056000000000004e-05, + "loss": 123.9703, + "step": 18160 + }, + { + "epoch": 0.0734090991729861, + "grad_norm": 2045.145263671875, + "learning_rate": 2.9072000000000002e-05, + "loss": 162.2184, + "step": 18170 + }, + { + "epoch": 0.07344950043835373, + "grad_norm": 766.1104736328125, + "learning_rate": 2.9088e-05, + "loss": 164.8092, + "step": 18180 + }, + { + "epoch": 0.07348990170372136, + "grad_norm": 1180.1484375, + "learning_rate": 2.9104000000000005e-05, + "loss": 155.154, + "step": 18190 + }, + { + "epoch": 0.073530302969089, + "grad_norm": 884.0999755859375, + "learning_rate": 2.9120000000000002e-05, + "loss": 151.8814, + "step": 18200 + }, + { + "epoch": 0.07357070423445662, + "grad_norm": 1025.338623046875, + "learning_rate": 2.9136000000000004e-05, + "loss": 170.269, + "step": 18210 + }, + { + "epoch": 0.07361110549982426, + "grad_norm": 972.7962646484375, + "learning_rate": 2.9152e-05, + "loss": 158.7671, + "step": 18220 + }, + { + "epoch": 0.07365150676519189, + "grad_norm": 1966.0303955078125, + "learning_rate": 2.9168e-05, + "loss": 150.0479, + "step": 18230 + }, + { + "epoch": 0.07369190803055951, + "grad_norm": 1667.3870849609375, + "learning_rate": 2.9184000000000004e-05, + "loss": 187.1067, + "step": 18240 + }, + { + "epoch": 0.07373230929592715, + "grad_norm": 939.9055786132812, + "learning_rate": 2.92e-05, + "loss": 164.8281, + "step": 18250 + }, + { + "epoch": 0.07377271056129478, + "grad_norm": 710.55859375, + "learning_rate": 2.9216000000000003e-05, + "loss": 114.1709, + "step": 18260 + }, + { + "epoch": 0.0738131118266624, + "grad_norm": 1066.427734375, + "learning_rate": 2.9232000000000004e-05, + "loss": 156.624, + "step": 18270 + }, + { + "epoch": 0.07385351309203005, + "grad_norm": 1388.24267578125, + "learning_rate": 2.9248000000000002e-05, + "loss": 190.9957, + "step": 18280 + }, + { + "epoch": 0.07389391435739767, + "grad_norm": 933.8331298828125, + "learning_rate": 2.9264000000000003e-05, + "loss": 166.0619, + "step": 18290 + }, + { + "epoch": 0.0739343156227653, + "grad_norm": 2535.637451171875, + "learning_rate": 2.928e-05, + "loss": 223.8416, + "step": 18300 + }, + { + "epoch": 0.07397471688813294, + "grad_norm": 2473.804443359375, + "learning_rate": 2.9296000000000005e-05, + "loss": 152.6108, + "step": 18310 + }, + { + "epoch": 0.07401511815350056, + "grad_norm": 638.9561157226562, + "learning_rate": 2.9312000000000003e-05, + "loss": 168.9979, + "step": 18320 + }, + { + "epoch": 0.0740555194188682, + "grad_norm": 1030.912841796875, + "learning_rate": 2.9328e-05, + "loss": 186.1908, + "step": 18330 + }, + { + "epoch": 0.07409592068423583, + "grad_norm": 1772.89013671875, + "learning_rate": 2.9344000000000002e-05, + "loss": 151.4674, + "step": 18340 + }, + { + "epoch": 0.07413632194960346, + "grad_norm": 786.4491577148438, + "learning_rate": 2.9360000000000003e-05, + "loss": 137.3822, + "step": 18350 + }, + { + "epoch": 0.0741767232149711, + "grad_norm": 625.10205078125, + "learning_rate": 2.9376000000000005e-05, + "loss": 114.0711, + "step": 18360 + }, + { + "epoch": 0.07421712448033872, + "grad_norm": 864.49560546875, + "learning_rate": 2.9392000000000003e-05, + "loss": 232.9865, + "step": 18370 + }, + { + "epoch": 0.07425752574570635, + "grad_norm": 1343.116455078125, + "learning_rate": 2.9408e-05, + "loss": 132.7003, + "step": 18380 + }, + { + "epoch": 0.07429792701107399, + "grad_norm": 1040.84619140625, + "learning_rate": 2.9424000000000005e-05, + "loss": 197.457, + "step": 18390 + }, + { + "epoch": 0.07433832827644161, + "grad_norm": 1671.703857421875, + "learning_rate": 2.9440000000000003e-05, + "loss": 163.817, + "step": 18400 + }, + { + "epoch": 0.07437872954180925, + "grad_norm": 2325.96435546875, + "learning_rate": 2.9456000000000004e-05, + "loss": 211.1564, + "step": 18410 + }, + { + "epoch": 0.07441913080717688, + "grad_norm": 738.5628051757812, + "learning_rate": 2.9472000000000002e-05, + "loss": 117.4639, + "step": 18420 + }, + { + "epoch": 0.07445953207254451, + "grad_norm": 1374.7052001953125, + "learning_rate": 2.9488e-05, + "loss": 153.7602, + "step": 18430 + }, + { + "epoch": 0.07449993333791215, + "grad_norm": 927.5657958984375, + "learning_rate": 2.9504000000000004e-05, + "loss": 81.2049, + "step": 18440 + }, + { + "epoch": 0.07454033460327977, + "grad_norm": 967.732666015625, + "learning_rate": 2.9520000000000002e-05, + "loss": 113.4281, + "step": 18450 + }, + { + "epoch": 0.0745807358686474, + "grad_norm": 851.0897827148438, + "learning_rate": 2.9536e-05, + "loss": 161.3327, + "step": 18460 + }, + { + "epoch": 0.07462113713401504, + "grad_norm": 1526.0782470703125, + "learning_rate": 2.9552000000000004e-05, + "loss": 139.2771, + "step": 18470 + }, + { + "epoch": 0.07466153839938267, + "grad_norm": 692.1572265625, + "learning_rate": 2.9568000000000002e-05, + "loss": 135.6548, + "step": 18480 + }, + { + "epoch": 0.0747019396647503, + "grad_norm": 974.0632934570312, + "learning_rate": 2.9584000000000003e-05, + "loss": 142.4245, + "step": 18490 + }, + { + "epoch": 0.07474234093011793, + "grad_norm": 1040.435302734375, + "learning_rate": 2.96e-05, + "loss": 135.2899, + "step": 18500 + }, + { + "epoch": 0.07478274219548556, + "grad_norm": 548.8395385742188, + "learning_rate": 2.9616e-05, + "loss": 168.3911, + "step": 18510 + }, + { + "epoch": 0.0748231434608532, + "grad_norm": 1707.4127197265625, + "learning_rate": 2.9632000000000004e-05, + "loss": 121.9398, + "step": 18520 + }, + { + "epoch": 0.07486354472622082, + "grad_norm": 826.6209716796875, + "learning_rate": 2.9648e-05, + "loss": 139.4082, + "step": 18530 + }, + { + "epoch": 0.07490394599158845, + "grad_norm": 1672.7015380859375, + "learning_rate": 2.9664000000000003e-05, + "loss": 229.4466, + "step": 18540 + }, + { + "epoch": 0.07494434725695609, + "grad_norm": 446.5745544433594, + "learning_rate": 2.9680000000000004e-05, + "loss": 163.4797, + "step": 18550 + }, + { + "epoch": 0.07498474852232372, + "grad_norm": 1023.4652099609375, + "learning_rate": 2.9696e-05, + "loss": 107.8883, + "step": 18560 + }, + { + "epoch": 0.07502514978769136, + "grad_norm": 1041.41650390625, + "learning_rate": 2.9712000000000003e-05, + "loss": 199.6158, + "step": 18570 + }, + { + "epoch": 0.07506555105305898, + "grad_norm": 1023.8604736328125, + "learning_rate": 2.9728e-05, + "loss": 146.2948, + "step": 18580 + }, + { + "epoch": 0.07510595231842661, + "grad_norm": 518.9553833007812, + "learning_rate": 2.9744000000000005e-05, + "loss": 84.2765, + "step": 18590 + }, + { + "epoch": 0.07514635358379425, + "grad_norm": 946.0505981445312, + "learning_rate": 2.9760000000000003e-05, + "loss": 240.7824, + "step": 18600 + }, + { + "epoch": 0.07518675484916187, + "grad_norm": 569.4635620117188, + "learning_rate": 2.9776e-05, + "loss": 206.5026, + "step": 18610 + }, + { + "epoch": 0.0752271561145295, + "grad_norm": 741.1220092773438, + "learning_rate": 2.9792000000000002e-05, + "loss": 178.427, + "step": 18620 + }, + { + "epoch": 0.07526755737989714, + "grad_norm": 611.7376098632812, + "learning_rate": 2.9808e-05, + "loss": 156.7509, + "step": 18630 + }, + { + "epoch": 0.07530795864526477, + "grad_norm": 787.3582763671875, + "learning_rate": 2.9824000000000004e-05, + "loss": 187.9609, + "step": 18640 + }, + { + "epoch": 0.0753483599106324, + "grad_norm": 696.8880615234375, + "learning_rate": 2.9840000000000002e-05, + "loss": 83.3569, + "step": 18650 + }, + { + "epoch": 0.07538876117600003, + "grad_norm": 1950.1044921875, + "learning_rate": 2.9856e-05, + "loss": 162.3512, + "step": 18660 + }, + { + "epoch": 0.07542916244136766, + "grad_norm": 1288.1805419921875, + "learning_rate": 2.9872000000000005e-05, + "loss": 146.2886, + "step": 18670 + }, + { + "epoch": 0.0754695637067353, + "grad_norm": 935.785888671875, + "learning_rate": 2.9888000000000003e-05, + "loss": 132.7545, + "step": 18680 + }, + { + "epoch": 0.07550996497210292, + "grad_norm": 4552.470703125, + "learning_rate": 2.9904000000000004e-05, + "loss": 113.0376, + "step": 18690 + }, + { + "epoch": 0.07555036623747055, + "grad_norm": 1135.8033447265625, + "learning_rate": 2.992e-05, + "loss": 161.4849, + "step": 18700 + }, + { + "epoch": 0.07559076750283819, + "grad_norm": 1265.2376708984375, + "learning_rate": 2.9936e-05, + "loss": 235.9524, + "step": 18710 + }, + { + "epoch": 0.07563116876820582, + "grad_norm": 1477.458740234375, + "learning_rate": 2.9952000000000004e-05, + "loss": 156.0702, + "step": 18720 + }, + { + "epoch": 0.07567157003357346, + "grad_norm": 550.357421875, + "learning_rate": 2.9968000000000002e-05, + "loss": 144.9627, + "step": 18730 + }, + { + "epoch": 0.07571197129894108, + "grad_norm": 817.1676025390625, + "learning_rate": 2.9984000000000003e-05, + "loss": 163.8753, + "step": 18740 + }, + { + "epoch": 0.07575237256430871, + "grad_norm": 875.1635131835938, + "learning_rate": 3.0000000000000004e-05, + "loss": 154.4611, + "step": 18750 + }, + { + "epoch": 0.07579277382967635, + "grad_norm": 1046.1038818359375, + "learning_rate": 3.0016000000000002e-05, + "loss": 114.9563, + "step": 18760 + }, + { + "epoch": 0.07583317509504398, + "grad_norm": 539.35791015625, + "learning_rate": 3.0032000000000003e-05, + "loss": 148.6764, + "step": 18770 + }, + { + "epoch": 0.0758735763604116, + "grad_norm": 1803.2303466796875, + "learning_rate": 3.0048e-05, + "loss": 127.8625, + "step": 18780 + }, + { + "epoch": 0.07591397762577924, + "grad_norm": 3386.962158203125, + "learning_rate": 3.0064000000000006e-05, + "loss": 131.0421, + "step": 18790 + }, + { + "epoch": 0.07595437889114687, + "grad_norm": 1955.7564697265625, + "learning_rate": 3.0080000000000003e-05, + "loss": 147.7422, + "step": 18800 + }, + { + "epoch": 0.07599478015651451, + "grad_norm": 1716.2154541015625, + "learning_rate": 3.0096e-05, + "loss": 219.0831, + "step": 18810 + }, + { + "epoch": 0.07603518142188213, + "grad_norm": 940.4362182617188, + "learning_rate": 3.0112000000000002e-05, + "loss": 172.9132, + "step": 18820 + }, + { + "epoch": 0.07607558268724976, + "grad_norm": 1097.08642578125, + "learning_rate": 3.0128e-05, + "loss": 161.6279, + "step": 18830 + }, + { + "epoch": 0.0761159839526174, + "grad_norm": 1104.9554443359375, + "learning_rate": 3.0144000000000005e-05, + "loss": 100.9529, + "step": 18840 + }, + { + "epoch": 0.07615638521798503, + "grad_norm": 508.1643981933594, + "learning_rate": 3.0160000000000003e-05, + "loss": 165.3267, + "step": 18850 + }, + { + "epoch": 0.07619678648335265, + "grad_norm": 1504.6287841796875, + "learning_rate": 3.0176e-05, + "loss": 199.1547, + "step": 18860 + }, + { + "epoch": 0.07623718774872029, + "grad_norm": 1034.776123046875, + "learning_rate": 3.0192000000000005e-05, + "loss": 136.6313, + "step": 18870 + }, + { + "epoch": 0.07627758901408792, + "grad_norm": 865.9027709960938, + "learning_rate": 3.0208000000000003e-05, + "loss": 143.8346, + "step": 18880 + }, + { + "epoch": 0.07631799027945556, + "grad_norm": 1424.35693359375, + "learning_rate": 3.0224000000000004e-05, + "loss": 141.1759, + "step": 18890 + }, + { + "epoch": 0.07635839154482318, + "grad_norm": 600.4012451171875, + "learning_rate": 3.0240000000000002e-05, + "loss": 174.2728, + "step": 18900 + }, + { + "epoch": 0.07639879281019081, + "grad_norm": 725.7918701171875, + "learning_rate": 3.0256e-05, + "loss": 160.6155, + "step": 18910 + }, + { + "epoch": 0.07643919407555845, + "grad_norm": 900.5263671875, + "learning_rate": 3.0272000000000004e-05, + "loss": 259.7815, + "step": 18920 + }, + { + "epoch": 0.07647959534092608, + "grad_norm": 924.3510131835938, + "learning_rate": 3.0288000000000002e-05, + "loss": 132.0552, + "step": 18930 + }, + { + "epoch": 0.0765199966062937, + "grad_norm": 856.6452026367188, + "learning_rate": 3.0304000000000003e-05, + "loss": 135.5814, + "step": 18940 + }, + { + "epoch": 0.07656039787166134, + "grad_norm": 762.0154418945312, + "learning_rate": 3.032e-05, + "loss": 118.221, + "step": 18950 + }, + { + "epoch": 0.07660079913702897, + "grad_norm": 856.0108642578125, + "learning_rate": 3.0336000000000002e-05, + "loss": 152.2224, + "step": 18960 + }, + { + "epoch": 0.07664120040239661, + "grad_norm": 455.441650390625, + "learning_rate": 3.0352000000000003e-05, + "loss": 109.3129, + "step": 18970 + }, + { + "epoch": 0.07668160166776423, + "grad_norm": 1193.544189453125, + "learning_rate": 3.0368e-05, + "loss": 164.0314, + "step": 18980 + }, + { + "epoch": 0.07672200293313186, + "grad_norm": 1364.1358642578125, + "learning_rate": 3.0384000000000006e-05, + "loss": 211.994, + "step": 18990 + }, + { + "epoch": 0.0767624041984995, + "grad_norm": 2086.20068359375, + "learning_rate": 3.0400000000000004e-05, + "loss": 226.2427, + "step": 19000 + }, + { + "epoch": 0.07680280546386713, + "grad_norm": 1273.3094482421875, + "learning_rate": 3.0416e-05, + "loss": 93.3028, + "step": 19010 + }, + { + "epoch": 0.07684320672923475, + "grad_norm": 1034.8399658203125, + "learning_rate": 3.0432000000000003e-05, + "loss": 118.1521, + "step": 19020 + }, + { + "epoch": 0.0768836079946024, + "grad_norm": 1666.349853515625, + "learning_rate": 3.0448e-05, + "loss": 118.8775, + "step": 19030 + }, + { + "epoch": 0.07692400925997002, + "grad_norm": 1439.1546630859375, + "learning_rate": 3.0464000000000005e-05, + "loss": 152.8132, + "step": 19040 + }, + { + "epoch": 0.07696441052533766, + "grad_norm": 1334.192138671875, + "learning_rate": 3.0480000000000003e-05, + "loss": 189.825, + "step": 19050 + }, + { + "epoch": 0.07700481179070529, + "grad_norm": 1274.4586181640625, + "learning_rate": 3.0496e-05, + "loss": 212.3798, + "step": 19060 + }, + { + "epoch": 0.07704521305607291, + "grad_norm": 1858.0794677734375, + "learning_rate": 3.0512000000000005e-05, + "loss": 137.3435, + "step": 19070 + }, + { + "epoch": 0.07708561432144055, + "grad_norm": 1114.0675048828125, + "learning_rate": 3.0528e-05, + "loss": 132.0629, + "step": 19080 + }, + { + "epoch": 0.07712601558680818, + "grad_norm": 777.7725219726562, + "learning_rate": 3.0544e-05, + "loss": 141.4264, + "step": 19090 + }, + { + "epoch": 0.0771664168521758, + "grad_norm": 1279.466064453125, + "learning_rate": 3.0560000000000006e-05, + "loss": 180.509, + "step": 19100 + }, + { + "epoch": 0.07720681811754344, + "grad_norm": 1011.5797729492188, + "learning_rate": 3.0576e-05, + "loss": 152.8724, + "step": 19110 + }, + { + "epoch": 0.07724721938291107, + "grad_norm": 2025.70458984375, + "learning_rate": 3.0592e-05, + "loss": 102.4337, + "step": 19120 + }, + { + "epoch": 0.07728762064827871, + "grad_norm": 598.9620361328125, + "learning_rate": 3.0608e-05, + "loss": 176.5491, + "step": 19130 + }, + { + "epoch": 0.07732802191364634, + "grad_norm": 789.054931640625, + "learning_rate": 3.0624000000000004e-05, + "loss": 121.4139, + "step": 19140 + }, + { + "epoch": 0.07736842317901396, + "grad_norm": 719.32861328125, + "learning_rate": 3.0640000000000005e-05, + "loss": 99.5375, + "step": 19150 + }, + { + "epoch": 0.0774088244443816, + "grad_norm": 789.1879272460938, + "learning_rate": 3.0656e-05, + "loss": 94.6395, + "step": 19160 + }, + { + "epoch": 0.07744922570974923, + "grad_norm": 1388.8897705078125, + "learning_rate": 3.067200000000001e-05, + "loss": 92.3671, + "step": 19170 + }, + { + "epoch": 0.07748962697511685, + "grad_norm": 7907.0166015625, + "learning_rate": 3.0688e-05, + "loss": 143.0179, + "step": 19180 + }, + { + "epoch": 0.0775300282404845, + "grad_norm": 1009.6155395507812, + "learning_rate": 3.0704e-05, + "loss": 205.8122, + "step": 19190 + }, + { + "epoch": 0.07757042950585212, + "grad_norm": 1285.3741455078125, + "learning_rate": 3.0720000000000004e-05, + "loss": 166.3354, + "step": 19200 + }, + { + "epoch": 0.07761083077121976, + "grad_norm": 1445.5545654296875, + "learning_rate": 3.0736e-05, + "loss": 178.6532, + "step": 19210 + }, + { + "epoch": 0.07765123203658739, + "grad_norm": 582.2118530273438, + "learning_rate": 3.0752000000000006e-05, + "loss": 199.8334, + "step": 19220 + }, + { + "epoch": 0.07769163330195501, + "grad_norm": 806.663818359375, + "learning_rate": 3.0768e-05, + "loss": 135.1258, + "step": 19230 + }, + { + "epoch": 0.07773203456732265, + "grad_norm": 1411.8658447265625, + "learning_rate": 3.0784e-05, + "loss": 198.7457, + "step": 19240 + }, + { + "epoch": 0.07777243583269028, + "grad_norm": 1003.43505859375, + "learning_rate": 3.08e-05, + "loss": 122.1954, + "step": 19250 + }, + { + "epoch": 0.0778128370980579, + "grad_norm": 1204.8990478515625, + "learning_rate": 3.0816000000000004e-05, + "loss": 182.6991, + "step": 19260 + }, + { + "epoch": 0.07785323836342554, + "grad_norm": 1859.2806396484375, + "learning_rate": 3.0832000000000006e-05, + "loss": 140.195, + "step": 19270 + }, + { + "epoch": 0.07789363962879317, + "grad_norm": 3072.67626953125, + "learning_rate": 3.0848e-05, + "loss": 106.2253, + "step": 19280 + }, + { + "epoch": 0.07793404089416081, + "grad_norm": 517.3604736328125, + "learning_rate": 3.0864e-05, + "loss": 85.8877, + "step": 19290 + }, + { + "epoch": 0.07797444215952844, + "grad_norm": 1192.221923828125, + "learning_rate": 3.088e-05, + "loss": 157.3652, + "step": 19300 + }, + { + "epoch": 0.07801484342489606, + "grad_norm": 1515.2806396484375, + "learning_rate": 3.0896000000000004e-05, + "loss": 112.9237, + "step": 19310 + }, + { + "epoch": 0.0780552446902637, + "grad_norm": 3517.332275390625, + "learning_rate": 3.0912000000000005e-05, + "loss": 237.282, + "step": 19320 + }, + { + "epoch": 0.07809564595563133, + "grad_norm": 1342.5167236328125, + "learning_rate": 3.0928e-05, + "loss": 194.1672, + "step": 19330 + }, + { + "epoch": 0.07813604722099896, + "grad_norm": 2524.6083984375, + "learning_rate": 3.0944e-05, + "loss": 149.2424, + "step": 19340 + }, + { + "epoch": 0.0781764484863666, + "grad_norm": 1059.5882568359375, + "learning_rate": 3.096e-05, + "loss": 132.9667, + "step": 19350 + }, + { + "epoch": 0.07821684975173422, + "grad_norm": 922.0531005859375, + "learning_rate": 3.0976e-05, + "loss": 182.0881, + "step": 19360 + }, + { + "epoch": 0.07825725101710186, + "grad_norm": 2116.48828125, + "learning_rate": 3.0992000000000004e-05, + "loss": 136.2758, + "step": 19370 + }, + { + "epoch": 0.07829765228246949, + "grad_norm": 2007.3494873046875, + "learning_rate": 3.1008000000000005e-05, + "loss": 229.1184, + "step": 19380 + }, + { + "epoch": 0.07833805354783711, + "grad_norm": 1462.4576416015625, + "learning_rate": 3.1024e-05, + "loss": 196.5691, + "step": 19390 + }, + { + "epoch": 0.07837845481320475, + "grad_norm": 1289.5394287109375, + "learning_rate": 3.104e-05, + "loss": 194.9763, + "step": 19400 + }, + { + "epoch": 0.07841885607857238, + "grad_norm": 1543.3292236328125, + "learning_rate": 3.1056e-05, + "loss": 171.4035, + "step": 19410 + }, + { + "epoch": 0.07845925734394, + "grad_norm": 3381.907958984375, + "learning_rate": 3.1072e-05, + "loss": 152.3068, + "step": 19420 + }, + { + "epoch": 0.07849965860930765, + "grad_norm": 1130.15283203125, + "learning_rate": 3.1088000000000005e-05, + "loss": 147.3309, + "step": 19430 + }, + { + "epoch": 0.07854005987467527, + "grad_norm": 826.4940185546875, + "learning_rate": 3.1104e-05, + "loss": 125.9425, + "step": 19440 + }, + { + "epoch": 0.07858046114004291, + "grad_norm": 1645.444580078125, + "learning_rate": 3.112e-05, + "loss": 175.4056, + "step": 19450 + }, + { + "epoch": 0.07862086240541054, + "grad_norm": 373.0332946777344, + "learning_rate": 3.1136e-05, + "loss": 153.51, + "step": 19460 + }, + { + "epoch": 0.07866126367077816, + "grad_norm": 1007.900146484375, + "learning_rate": 3.1152e-05, + "loss": 163.355, + "step": 19470 + }, + { + "epoch": 0.0787016649361458, + "grad_norm": 1573.274658203125, + "learning_rate": 3.1168000000000004e-05, + "loss": 120.6345, + "step": 19480 + }, + { + "epoch": 0.07874206620151343, + "grad_norm": 1653.9564208984375, + "learning_rate": 3.1184e-05, + "loss": 226.9921, + "step": 19490 + }, + { + "epoch": 0.07878246746688106, + "grad_norm": 748.5175170898438, + "learning_rate": 3.1200000000000006e-05, + "loss": 160.3489, + "step": 19500 + }, + { + "epoch": 0.0788228687322487, + "grad_norm": 641.2455444335938, + "learning_rate": 3.1216e-05, + "loss": 132.7127, + "step": 19510 + }, + { + "epoch": 0.07886326999761632, + "grad_norm": 1444.9599609375, + "learning_rate": 3.1232e-05, + "loss": 122.3432, + "step": 19520 + }, + { + "epoch": 0.07890367126298396, + "grad_norm": 1217.377197265625, + "learning_rate": 3.1248e-05, + "loss": 147.5508, + "step": 19530 + }, + { + "epoch": 0.07894407252835159, + "grad_norm": 2127.09228515625, + "learning_rate": 3.1264000000000004e-05, + "loss": 146.9545, + "step": 19540 + }, + { + "epoch": 0.07898447379371921, + "grad_norm": 1130.946533203125, + "learning_rate": 3.1280000000000005e-05, + "loss": 142.446, + "step": 19550 + }, + { + "epoch": 0.07902487505908685, + "grad_norm": 881.1486206054688, + "learning_rate": 3.1296e-05, + "loss": 148.7066, + "step": 19560 + }, + { + "epoch": 0.07906527632445448, + "grad_norm": 901.329345703125, + "learning_rate": 3.131200000000001e-05, + "loss": 158.543, + "step": 19570 + }, + { + "epoch": 0.07910567758982211, + "grad_norm": 970.90576171875, + "learning_rate": 3.1328e-05, + "loss": 105.8365, + "step": 19580 + }, + { + "epoch": 0.07914607885518975, + "grad_norm": 1250.1529541015625, + "learning_rate": 3.1344000000000003e-05, + "loss": 101.1629, + "step": 19590 + }, + { + "epoch": 0.07918648012055737, + "grad_norm": 1022.8147583007812, + "learning_rate": 3.1360000000000005e-05, + "loss": 197.5734, + "step": 19600 + }, + { + "epoch": 0.07922688138592501, + "grad_norm": 655.7382202148438, + "learning_rate": 3.1376e-05, + "loss": 139.5682, + "step": 19610 + }, + { + "epoch": 0.07926728265129264, + "grad_norm": 1020.8795776367188, + "learning_rate": 3.139200000000001e-05, + "loss": 122.5804, + "step": 19620 + }, + { + "epoch": 0.07930768391666027, + "grad_norm": 1037.203369140625, + "learning_rate": 3.1408e-05, + "loss": 150.0474, + "step": 19630 + }, + { + "epoch": 0.0793480851820279, + "grad_norm": 1057.42236328125, + "learning_rate": 3.1424e-05, + "loss": 145.177, + "step": 19640 + }, + { + "epoch": 0.07938848644739553, + "grad_norm": 970.1075439453125, + "learning_rate": 3.1440000000000004e-05, + "loss": 136.6255, + "step": 19650 + }, + { + "epoch": 0.07942888771276316, + "grad_norm": 504.5531921386719, + "learning_rate": 3.1456000000000005e-05, + "loss": 148.972, + "step": 19660 + }, + { + "epoch": 0.0794692889781308, + "grad_norm": 854.7567749023438, + "learning_rate": 3.1472000000000006e-05, + "loss": 175.1092, + "step": 19670 + }, + { + "epoch": 0.07950969024349842, + "grad_norm": 1027.3280029296875, + "learning_rate": 3.1488e-05, + "loss": 119.6056, + "step": 19680 + }, + { + "epoch": 0.07955009150886606, + "grad_norm": 961.7713623046875, + "learning_rate": 3.1504e-05, + "loss": 212.2655, + "step": 19690 + }, + { + "epoch": 0.07959049277423369, + "grad_norm": 358.7893371582031, + "learning_rate": 3.152e-05, + "loss": 119.7107, + "step": 19700 + }, + { + "epoch": 0.07963089403960132, + "grad_norm": 1187.442138671875, + "learning_rate": 3.1536000000000004e-05, + "loss": 128.6715, + "step": 19710 + }, + { + "epoch": 0.07967129530496896, + "grad_norm": 1112.4154052734375, + "learning_rate": 3.1552e-05, + "loss": 124.3715, + "step": 19720 + }, + { + "epoch": 0.07971169657033658, + "grad_norm": 1475.3905029296875, + "learning_rate": 3.1568e-05, + "loss": 177.9702, + "step": 19730 + }, + { + "epoch": 0.07975209783570421, + "grad_norm": 1211.7606201171875, + "learning_rate": 3.1584e-05, + "loss": 136.1508, + "step": 19740 + }, + { + "epoch": 0.07979249910107185, + "grad_norm": 1250.31591796875, + "learning_rate": 3.16e-05, + "loss": 112.0893, + "step": 19750 + }, + { + "epoch": 0.07983290036643947, + "grad_norm": 2854.193603515625, + "learning_rate": 3.1616000000000004e-05, + "loss": 155.2664, + "step": 19760 + }, + { + "epoch": 0.07987330163180711, + "grad_norm": 1156.216552734375, + "learning_rate": 3.1632e-05, + "loss": 179.2385, + "step": 19770 + }, + { + "epoch": 0.07991370289717474, + "grad_norm": 1361.3341064453125, + "learning_rate": 3.1648000000000006e-05, + "loss": 108.4875, + "step": 19780 + }, + { + "epoch": 0.07995410416254237, + "grad_norm": 1548.9888916015625, + "learning_rate": 3.1664e-05, + "loss": 134.9432, + "step": 19790 + }, + { + "epoch": 0.07999450542791, + "grad_norm": 2229.53662109375, + "learning_rate": 3.168e-05, + "loss": 132.5375, + "step": 19800 + }, + { + "epoch": 0.08003490669327763, + "grad_norm": 753.1016845703125, + "learning_rate": 3.1696e-05, + "loss": 180.9278, + "step": 19810 + }, + { + "epoch": 0.08007530795864526, + "grad_norm": 566.2924194335938, + "learning_rate": 3.1712e-05, + "loss": 171.7009, + "step": 19820 + }, + { + "epoch": 0.0801157092240129, + "grad_norm": 5497.46240234375, + "learning_rate": 3.1728000000000005e-05, + "loss": 227.1652, + "step": 19830 + }, + { + "epoch": 0.08015611048938052, + "grad_norm": 1319.4591064453125, + "learning_rate": 3.1744e-05, + "loss": 215.2752, + "step": 19840 + }, + { + "epoch": 0.08019651175474816, + "grad_norm": 961.1768798828125, + "learning_rate": 3.176e-05, + "loss": 163.4913, + "step": 19850 + }, + { + "epoch": 0.08023691302011579, + "grad_norm": 1046.6246337890625, + "learning_rate": 3.1776e-05, + "loss": 90.9139, + "step": 19860 + }, + { + "epoch": 0.08027731428548342, + "grad_norm": 1033.51953125, + "learning_rate": 3.1792e-05, + "loss": 114.1837, + "step": 19870 + }, + { + "epoch": 0.08031771555085106, + "grad_norm": 836.09765625, + "learning_rate": 3.1808000000000004e-05, + "loss": 168.3431, + "step": 19880 + }, + { + "epoch": 0.08035811681621868, + "grad_norm": 428.75982666015625, + "learning_rate": 3.1824e-05, + "loss": 160.8278, + "step": 19890 + }, + { + "epoch": 0.08039851808158631, + "grad_norm": 2490.32373046875, + "learning_rate": 3.184000000000001e-05, + "loss": 153.6409, + "step": 19900 + }, + { + "epoch": 0.08043891934695395, + "grad_norm": 1610.21826171875, + "learning_rate": 3.1856e-05, + "loss": 242.9949, + "step": 19910 + }, + { + "epoch": 0.08047932061232158, + "grad_norm": 1406.444580078125, + "learning_rate": 3.1872e-05, + "loss": 100.1264, + "step": 19920 + }, + { + "epoch": 0.08051972187768922, + "grad_norm": 1357.9105224609375, + "learning_rate": 3.1888000000000004e-05, + "loss": 104.4464, + "step": 19930 + }, + { + "epoch": 0.08056012314305684, + "grad_norm": 704.1654052734375, + "learning_rate": 3.1904000000000005e-05, + "loss": 96.2778, + "step": 19940 + }, + { + "epoch": 0.08060052440842447, + "grad_norm": 787.5489501953125, + "learning_rate": 3.1920000000000006e-05, + "loss": 126.4141, + "step": 19950 + }, + { + "epoch": 0.08064092567379211, + "grad_norm": 982.6527709960938, + "learning_rate": 3.1936e-05, + "loss": 144.8411, + "step": 19960 + }, + { + "epoch": 0.08068132693915973, + "grad_norm": 2443.51806640625, + "learning_rate": 3.1952e-05, + "loss": 204.7473, + "step": 19970 + }, + { + "epoch": 0.08072172820452736, + "grad_norm": 1531.197509765625, + "learning_rate": 3.1968e-05, + "loss": 178.3601, + "step": 19980 + }, + { + "epoch": 0.080762129469895, + "grad_norm": 862.2193603515625, + "learning_rate": 3.1984000000000004e-05, + "loss": 109.4884, + "step": 19990 + }, + { + "epoch": 0.08080253073526263, + "grad_norm": 878.788818359375, + "learning_rate": 3.2000000000000005e-05, + "loss": 131.0473, + "step": 20000 + }, + { + "epoch": 0.08084293200063027, + "grad_norm": 1754.6673583984375, + "learning_rate": 3.2016e-05, + "loss": 224.1403, + "step": 20010 + }, + { + "epoch": 0.08088333326599789, + "grad_norm": 566.07421875, + "learning_rate": 3.2032e-05, + "loss": 129.1175, + "step": 20020 + }, + { + "epoch": 0.08092373453136552, + "grad_norm": 439.57843017578125, + "learning_rate": 3.2048e-05, + "loss": 138.3162, + "step": 20030 + }, + { + "epoch": 0.08096413579673316, + "grad_norm": 1223.7293701171875, + "learning_rate": 3.2064e-05, + "loss": 165.8624, + "step": 20040 + }, + { + "epoch": 0.08100453706210078, + "grad_norm": 798.21826171875, + "learning_rate": 3.2080000000000005e-05, + "loss": 139.7389, + "step": 20050 + }, + { + "epoch": 0.08104493832746841, + "grad_norm": 784.7877807617188, + "learning_rate": 3.2096000000000006e-05, + "loss": 142.9121, + "step": 20060 + }, + { + "epoch": 0.08108533959283605, + "grad_norm": 2548.88671875, + "learning_rate": 3.2112e-05, + "loss": 218.7094, + "step": 20070 + }, + { + "epoch": 0.08112574085820368, + "grad_norm": 1143.803466796875, + "learning_rate": 3.2128e-05, + "loss": 202.9833, + "step": 20080 + }, + { + "epoch": 0.08116614212357132, + "grad_norm": 1598.7059326171875, + "learning_rate": 3.2144e-05, + "loss": 143.634, + "step": 20090 + }, + { + "epoch": 0.08120654338893894, + "grad_norm": 1178.9173583984375, + "learning_rate": 3.2160000000000004e-05, + "loss": 136.7162, + "step": 20100 + }, + { + "epoch": 0.08124694465430657, + "grad_norm": 1176.172119140625, + "learning_rate": 3.2176000000000005e-05, + "loss": 161.4775, + "step": 20110 + }, + { + "epoch": 0.08128734591967421, + "grad_norm": 826.2978515625, + "learning_rate": 3.2192e-05, + "loss": 173.0015, + "step": 20120 + }, + { + "epoch": 0.08132774718504183, + "grad_norm": 453.69903564453125, + "learning_rate": 3.2208e-05, + "loss": 170.0593, + "step": 20130 + }, + { + "epoch": 0.08136814845040946, + "grad_norm": 4325.46533203125, + "learning_rate": 3.2224e-05, + "loss": 173.8872, + "step": 20140 + }, + { + "epoch": 0.0814085497157771, + "grad_norm": 415.2043151855469, + "learning_rate": 3.224e-05, + "loss": 145.3643, + "step": 20150 + }, + { + "epoch": 0.08144895098114473, + "grad_norm": 576.4541625976562, + "learning_rate": 3.2256000000000004e-05, + "loss": 130.435, + "step": 20160 + }, + { + "epoch": 0.08148935224651237, + "grad_norm": 1745.3973388671875, + "learning_rate": 3.2272e-05, + "loss": 134.0694, + "step": 20170 + }, + { + "epoch": 0.08152975351188, + "grad_norm": 1008.50634765625, + "learning_rate": 3.228800000000001e-05, + "loss": 175.9761, + "step": 20180 + }, + { + "epoch": 0.08157015477724762, + "grad_norm": 1314.888671875, + "learning_rate": 3.2304e-05, + "loss": 131.5329, + "step": 20190 + }, + { + "epoch": 0.08161055604261526, + "grad_norm": 829.5794067382812, + "learning_rate": 3.232e-05, + "loss": 136.1802, + "step": 20200 + }, + { + "epoch": 0.08165095730798289, + "grad_norm": 761.9506225585938, + "learning_rate": 3.2336000000000003e-05, + "loss": 134.261, + "step": 20210 + }, + { + "epoch": 0.08169135857335051, + "grad_norm": 1873.1478271484375, + "learning_rate": 3.2352e-05, + "loss": 139.3486, + "step": 20220 + }, + { + "epoch": 0.08173175983871815, + "grad_norm": 2748.007568359375, + "learning_rate": 3.2368000000000006e-05, + "loss": 204.3437, + "step": 20230 + }, + { + "epoch": 0.08177216110408578, + "grad_norm": 2420.284912109375, + "learning_rate": 3.2384e-05, + "loss": 192.7785, + "step": 20240 + }, + { + "epoch": 0.08181256236945342, + "grad_norm": 978.112060546875, + "learning_rate": 3.24e-05, + "loss": 130.3142, + "step": 20250 + }, + { + "epoch": 0.08185296363482104, + "grad_norm": 1153.291748046875, + "learning_rate": 3.2416e-05, + "loss": 130.7711, + "step": 20260 + }, + { + "epoch": 0.08189336490018867, + "grad_norm": 895.7388305664062, + "learning_rate": 3.2432000000000004e-05, + "loss": 135.9407, + "step": 20270 + }, + { + "epoch": 0.08193376616555631, + "grad_norm": 682.3944091796875, + "learning_rate": 3.2448000000000005e-05, + "loss": 168.7378, + "step": 20280 + }, + { + "epoch": 0.08197416743092394, + "grad_norm": 642.4920043945312, + "learning_rate": 3.2464e-05, + "loss": 117.1005, + "step": 20290 + }, + { + "epoch": 0.08201456869629156, + "grad_norm": 1556.114990234375, + "learning_rate": 3.248000000000001e-05, + "loss": 134.2864, + "step": 20300 + }, + { + "epoch": 0.0820549699616592, + "grad_norm": 1354.38671875, + "learning_rate": 3.2496e-05, + "loss": 200.7367, + "step": 20310 + }, + { + "epoch": 0.08209537122702683, + "grad_norm": 918.3173217773438, + "learning_rate": 3.2512e-05, + "loss": 111.7906, + "step": 20320 + }, + { + "epoch": 0.08213577249239447, + "grad_norm": 1387.911376953125, + "learning_rate": 3.2528000000000004e-05, + "loss": 157.9821, + "step": 20330 + }, + { + "epoch": 0.0821761737577621, + "grad_norm": 1123.1510009765625, + "learning_rate": 3.2544000000000006e-05, + "loss": 146.9615, + "step": 20340 + }, + { + "epoch": 0.08221657502312972, + "grad_norm": 2294.60400390625, + "learning_rate": 3.256e-05, + "loss": 135.0096, + "step": 20350 + }, + { + "epoch": 0.08225697628849736, + "grad_norm": 1003.4678344726562, + "learning_rate": 3.2576e-05, + "loss": 142.3162, + "step": 20360 + }, + { + "epoch": 0.08229737755386499, + "grad_norm": 1704.735595703125, + "learning_rate": 3.2592e-05, + "loss": 175.9096, + "step": 20370 + }, + { + "epoch": 0.08233777881923261, + "grad_norm": 5514.064453125, + "learning_rate": 3.2608000000000004e-05, + "loss": 145.4828, + "step": 20380 + }, + { + "epoch": 0.08237818008460025, + "grad_norm": 1671.9061279296875, + "learning_rate": 3.2624000000000005e-05, + "loss": 139.3675, + "step": 20390 + }, + { + "epoch": 0.08241858134996788, + "grad_norm": 650.0180053710938, + "learning_rate": 3.264e-05, + "loss": 145.3681, + "step": 20400 + }, + { + "epoch": 0.08245898261533552, + "grad_norm": 4346.93017578125, + "learning_rate": 3.2656e-05, + "loss": 121.7884, + "step": 20410 + }, + { + "epoch": 0.08249938388070314, + "grad_norm": 1530.2352294921875, + "learning_rate": 3.2672e-05, + "loss": 201.2308, + "step": 20420 + }, + { + "epoch": 0.08253978514607077, + "grad_norm": 794.2041015625, + "learning_rate": 3.2688e-05, + "loss": 129.883, + "step": 20430 + }, + { + "epoch": 0.08258018641143841, + "grad_norm": 3478.363037109375, + "learning_rate": 3.2704000000000004e-05, + "loss": 154.0663, + "step": 20440 + }, + { + "epoch": 0.08262058767680604, + "grad_norm": 3458.045166015625, + "learning_rate": 3.272e-05, + "loss": 149.6867, + "step": 20450 + }, + { + "epoch": 0.08266098894217366, + "grad_norm": 405.7808837890625, + "learning_rate": 3.2736000000000006e-05, + "loss": 143.6204, + "step": 20460 + }, + { + "epoch": 0.0827013902075413, + "grad_norm": 1114.430419921875, + "learning_rate": 3.2752e-05, + "loss": 199.4287, + "step": 20470 + }, + { + "epoch": 0.08274179147290893, + "grad_norm": 767.6410522460938, + "learning_rate": 3.2768e-05, + "loss": 127.3182, + "step": 20480 + }, + { + "epoch": 0.08278219273827657, + "grad_norm": 1250.516845703125, + "learning_rate": 3.2784e-05, + "loss": 200.1363, + "step": 20490 + }, + { + "epoch": 0.0828225940036442, + "grad_norm": 3073.381591796875, + "learning_rate": 3.28e-05, + "loss": 189.8158, + "step": 20500 + }, + { + "epoch": 0.08286299526901182, + "grad_norm": 1014.962890625, + "learning_rate": 3.2816000000000006e-05, + "loss": 207.3552, + "step": 20510 + }, + { + "epoch": 0.08290339653437946, + "grad_norm": 1103.14990234375, + "learning_rate": 3.2832e-05, + "loss": 108.8261, + "step": 20520 + }, + { + "epoch": 0.08294379779974709, + "grad_norm": 640.3897705078125, + "learning_rate": 3.2848e-05, + "loss": 166.723, + "step": 20530 + }, + { + "epoch": 0.08298419906511471, + "grad_norm": 597.391845703125, + "learning_rate": 3.2864e-05, + "loss": 112.421, + "step": 20540 + }, + { + "epoch": 0.08302460033048235, + "grad_norm": 2361.25341796875, + "learning_rate": 3.2880000000000004e-05, + "loss": 152.6865, + "step": 20550 + }, + { + "epoch": 0.08306500159584998, + "grad_norm": 1410.344970703125, + "learning_rate": 3.2896000000000005e-05, + "loss": 134.7542, + "step": 20560 + }, + { + "epoch": 0.08310540286121762, + "grad_norm": 865.1497802734375, + "learning_rate": 3.2912e-05, + "loss": 80.9885, + "step": 20570 + }, + { + "epoch": 0.08314580412658525, + "grad_norm": 1357.2235107421875, + "learning_rate": 3.292800000000001e-05, + "loss": 176.9338, + "step": 20580 + }, + { + "epoch": 0.08318620539195287, + "grad_norm": 632.3554077148438, + "learning_rate": 3.2944e-05, + "loss": 156.9602, + "step": 20590 + }, + { + "epoch": 0.08322660665732051, + "grad_norm": 723.4881591796875, + "learning_rate": 3.296e-05, + "loss": 207.8113, + "step": 20600 + }, + { + "epoch": 0.08326700792268814, + "grad_norm": 737.8985595703125, + "learning_rate": 3.2976000000000004e-05, + "loss": 191.0957, + "step": 20610 + }, + { + "epoch": 0.08330740918805576, + "grad_norm": 1410.85546875, + "learning_rate": 3.2992e-05, + "loss": 166.4973, + "step": 20620 + }, + { + "epoch": 0.0833478104534234, + "grad_norm": 1520.5904541015625, + "learning_rate": 3.3008000000000007e-05, + "loss": 194.798, + "step": 20630 + }, + { + "epoch": 0.08338821171879103, + "grad_norm": 830.2517700195312, + "learning_rate": 3.3024e-05, + "loss": 161.4706, + "step": 20640 + }, + { + "epoch": 0.08342861298415867, + "grad_norm": 1816.5230712890625, + "learning_rate": 3.304e-05, + "loss": 121.4072, + "step": 20650 + }, + { + "epoch": 0.0834690142495263, + "grad_norm": 2890.85107421875, + "learning_rate": 3.3056e-05, + "loss": 210.0617, + "step": 20660 + }, + { + "epoch": 0.08350941551489392, + "grad_norm": 1434.088134765625, + "learning_rate": 3.3072000000000005e-05, + "loss": 199.6831, + "step": 20670 + }, + { + "epoch": 0.08354981678026156, + "grad_norm": 908.7508544921875, + "learning_rate": 3.3088000000000006e-05, + "loss": 157.5875, + "step": 20680 + }, + { + "epoch": 0.08359021804562919, + "grad_norm": 1723.634521484375, + "learning_rate": 3.3104e-05, + "loss": 165.9936, + "step": 20690 + }, + { + "epoch": 0.08363061931099681, + "grad_norm": 991.2564697265625, + "learning_rate": 3.312e-05, + "loss": 188.8215, + "step": 20700 + }, + { + "epoch": 0.08367102057636445, + "grad_norm": 921.714599609375, + "learning_rate": 3.3136e-05, + "loss": 180.7932, + "step": 20710 + }, + { + "epoch": 0.08371142184173208, + "grad_norm": 757.0946044921875, + "learning_rate": 3.3152000000000004e-05, + "loss": 146.0066, + "step": 20720 + }, + { + "epoch": 0.08375182310709972, + "grad_norm": 728.5523681640625, + "learning_rate": 3.3168000000000005e-05, + "loss": 166.663, + "step": 20730 + }, + { + "epoch": 0.08379222437246735, + "grad_norm": 7663.13916015625, + "learning_rate": 3.3184000000000006e-05, + "loss": 245.4232, + "step": 20740 + }, + { + "epoch": 0.08383262563783497, + "grad_norm": 1275.4249267578125, + "learning_rate": 3.32e-05, + "loss": 96.0934, + "step": 20750 + }, + { + "epoch": 0.08387302690320261, + "grad_norm": 1094.66748046875, + "learning_rate": 3.3216e-05, + "loss": 143.1588, + "step": 20760 + }, + { + "epoch": 0.08391342816857024, + "grad_norm": 2520.77099609375, + "learning_rate": 3.3232e-05, + "loss": 133.0138, + "step": 20770 + }, + { + "epoch": 0.08395382943393787, + "grad_norm": 702.4775390625, + "learning_rate": 3.3248000000000004e-05, + "loss": 189.948, + "step": 20780 + }, + { + "epoch": 0.0839942306993055, + "grad_norm": 954.2200317382812, + "learning_rate": 3.3264000000000005e-05, + "loss": 144.7623, + "step": 20790 + }, + { + "epoch": 0.08403463196467313, + "grad_norm": 3302.3115234375, + "learning_rate": 3.328e-05, + "loss": 190.5672, + "step": 20800 + }, + { + "epoch": 0.08407503323004077, + "grad_norm": 1727.0504150390625, + "learning_rate": 3.3296e-05, + "loss": 171.9515, + "step": 20810 + }, + { + "epoch": 0.0841154344954084, + "grad_norm": 1287.97412109375, + "learning_rate": 3.3312e-05, + "loss": 160.4744, + "step": 20820 + }, + { + "epoch": 0.08415583576077602, + "grad_norm": 830.2168579101562, + "learning_rate": 3.3328000000000003e-05, + "loss": 156.1915, + "step": 20830 + }, + { + "epoch": 0.08419623702614366, + "grad_norm": 2019.2760009765625, + "learning_rate": 3.3344000000000005e-05, + "loss": 156.8544, + "step": 20840 + }, + { + "epoch": 0.08423663829151129, + "grad_norm": 705.07861328125, + "learning_rate": 3.336e-05, + "loss": 91.8192, + "step": 20850 + }, + { + "epoch": 0.08427703955687892, + "grad_norm": 746.8809204101562, + "learning_rate": 3.337600000000001e-05, + "loss": 109.4907, + "step": 20860 + }, + { + "epoch": 0.08431744082224656, + "grad_norm": 687.0313720703125, + "learning_rate": 3.3392e-05, + "loss": 106.7415, + "step": 20870 + }, + { + "epoch": 0.08435784208761418, + "grad_norm": 1740.00830078125, + "learning_rate": 3.3408e-05, + "loss": 183.5423, + "step": 20880 + }, + { + "epoch": 0.08439824335298182, + "grad_norm": 984.9962768554688, + "learning_rate": 3.3424000000000004e-05, + "loss": 127.0554, + "step": 20890 + }, + { + "epoch": 0.08443864461834945, + "grad_norm": 2045.16943359375, + "learning_rate": 3.344e-05, + "loss": 153.6352, + "step": 20900 + }, + { + "epoch": 0.08447904588371707, + "grad_norm": 1318.299072265625, + "learning_rate": 3.3456000000000006e-05, + "loss": 249.7303, + "step": 20910 + }, + { + "epoch": 0.08451944714908471, + "grad_norm": 1542.0604248046875, + "learning_rate": 3.3472e-05, + "loss": 146.596, + "step": 20920 + }, + { + "epoch": 0.08455984841445234, + "grad_norm": 834.7710571289062, + "learning_rate": 3.3488e-05, + "loss": 134.1098, + "step": 20930 + }, + { + "epoch": 0.08460024967981997, + "grad_norm": 813.9830322265625, + "learning_rate": 3.3504e-05, + "loss": 141.5175, + "step": 20940 + }, + { + "epoch": 0.0846406509451876, + "grad_norm": 745.8136596679688, + "learning_rate": 3.3520000000000004e-05, + "loss": 144.3198, + "step": 20950 + }, + { + "epoch": 0.08468105221055523, + "grad_norm": 558.539794921875, + "learning_rate": 3.3536000000000006e-05, + "loss": 121.216, + "step": 20960 + }, + { + "epoch": 0.08472145347592287, + "grad_norm": 1051.08984375, + "learning_rate": 3.3552e-05, + "loss": 156.3983, + "step": 20970 + }, + { + "epoch": 0.0847618547412905, + "grad_norm": 1455.3486328125, + "learning_rate": 3.3568e-05, + "loss": 99.2875, + "step": 20980 + }, + { + "epoch": 0.08480225600665812, + "grad_norm": 906.9086303710938, + "learning_rate": 3.3584e-05, + "loss": 157.9554, + "step": 20990 + }, + { + "epoch": 0.08484265727202576, + "grad_norm": 1177.9666748046875, + "learning_rate": 3.3600000000000004e-05, + "loss": 111.7463, + "step": 21000 + }, + { + "epoch": 0.08488305853739339, + "grad_norm": 2995.0673828125, + "learning_rate": 3.3616000000000005e-05, + "loss": 211.9187, + "step": 21010 + }, + { + "epoch": 0.08492345980276102, + "grad_norm": 5193.7265625, + "learning_rate": 3.3632e-05, + "loss": 221.2816, + "step": 21020 + }, + { + "epoch": 0.08496386106812866, + "grad_norm": 345.26715087890625, + "learning_rate": 3.3648e-05, + "loss": 120.4475, + "step": 21030 + }, + { + "epoch": 0.08500426233349628, + "grad_norm": 775.6820678710938, + "learning_rate": 3.3664e-05, + "loss": 102.8668, + "step": 21040 + }, + { + "epoch": 0.08504466359886392, + "grad_norm": 1124.5084228515625, + "learning_rate": 3.368e-05, + "loss": 132.4066, + "step": 21050 + }, + { + "epoch": 0.08508506486423155, + "grad_norm": 747.197021484375, + "learning_rate": 3.3696000000000004e-05, + "loss": 96.9779, + "step": 21060 + }, + { + "epoch": 0.08512546612959918, + "grad_norm": 687.505615234375, + "learning_rate": 3.3712000000000005e-05, + "loss": 191.3859, + "step": 21070 + }, + { + "epoch": 0.08516586739496682, + "grad_norm": 692.177734375, + "learning_rate": 3.3728e-05, + "loss": 152.729, + "step": 21080 + }, + { + "epoch": 0.08520626866033444, + "grad_norm": 2417.6376953125, + "learning_rate": 3.3744e-05, + "loss": 147.1884, + "step": 21090 + }, + { + "epoch": 0.08524666992570207, + "grad_norm": 997.3673706054688, + "learning_rate": 3.376e-05, + "loss": 131.0797, + "step": 21100 + }, + { + "epoch": 0.08528707119106971, + "grad_norm": 1230.6165771484375, + "learning_rate": 3.3776e-05, + "loss": 175.462, + "step": 21110 + }, + { + "epoch": 0.08532747245643733, + "grad_norm": 1275.230712890625, + "learning_rate": 3.3792000000000004e-05, + "loss": 155.2959, + "step": 21120 + }, + { + "epoch": 0.08536787372180497, + "grad_norm": 797.14892578125, + "learning_rate": 3.3808e-05, + "loss": 166.8581, + "step": 21130 + }, + { + "epoch": 0.0854082749871726, + "grad_norm": 1977.776123046875, + "learning_rate": 3.382400000000001e-05, + "loss": 214.3944, + "step": 21140 + }, + { + "epoch": 0.08544867625254023, + "grad_norm": 1204.1937255859375, + "learning_rate": 3.384e-05, + "loss": 131.4441, + "step": 21150 + }, + { + "epoch": 0.08548907751790787, + "grad_norm": 611.2650146484375, + "learning_rate": 3.3856e-05, + "loss": 150.1408, + "step": 21160 + }, + { + "epoch": 0.08552947878327549, + "grad_norm": 941.5194702148438, + "learning_rate": 3.3872000000000004e-05, + "loss": 139.6102, + "step": 21170 + }, + { + "epoch": 0.08556988004864312, + "grad_norm": 454.7662048339844, + "learning_rate": 3.3888e-05, + "loss": 122.3746, + "step": 21180 + }, + { + "epoch": 0.08561028131401076, + "grad_norm": 933.9237060546875, + "learning_rate": 3.3904000000000006e-05, + "loss": 164.4714, + "step": 21190 + }, + { + "epoch": 0.08565068257937838, + "grad_norm": 1266.6494140625, + "learning_rate": 3.392e-05, + "loss": 160.6412, + "step": 21200 + }, + { + "epoch": 0.08569108384474602, + "grad_norm": 2845.56103515625, + "learning_rate": 3.3936e-05, + "loss": 213.5401, + "step": 21210 + }, + { + "epoch": 0.08573148511011365, + "grad_norm": 5497.9970703125, + "learning_rate": 3.3952e-05, + "loss": 190.3089, + "step": 21220 + }, + { + "epoch": 0.08577188637548128, + "grad_norm": 1630.35205078125, + "learning_rate": 3.3968000000000004e-05, + "loss": 153.3488, + "step": 21230 + }, + { + "epoch": 0.08581228764084892, + "grad_norm": 462.4332275390625, + "learning_rate": 3.3984000000000005e-05, + "loss": 118.9506, + "step": 21240 + }, + { + "epoch": 0.08585268890621654, + "grad_norm": 1223.260009765625, + "learning_rate": 3.4e-05, + "loss": 120.7511, + "step": 21250 + }, + { + "epoch": 0.08589309017158417, + "grad_norm": 1368.648193359375, + "learning_rate": 3.401600000000001e-05, + "loss": 162.4402, + "step": 21260 + }, + { + "epoch": 0.08593349143695181, + "grad_norm": 1293.765869140625, + "learning_rate": 3.4032e-05, + "loss": 133.6338, + "step": 21270 + }, + { + "epoch": 0.08597389270231943, + "grad_norm": 2944.755615234375, + "learning_rate": 3.4048e-05, + "loss": 193.0378, + "step": 21280 + }, + { + "epoch": 0.08601429396768706, + "grad_norm": 3621.5576171875, + "learning_rate": 3.4064000000000005e-05, + "loss": 202.0364, + "step": 21290 + }, + { + "epoch": 0.0860546952330547, + "grad_norm": 915.840576171875, + "learning_rate": 3.408e-05, + "loss": 153.052, + "step": 21300 + }, + { + "epoch": 0.08609509649842233, + "grad_norm": 1874.4476318359375, + "learning_rate": 3.409600000000001e-05, + "loss": 154.6367, + "step": 21310 + }, + { + "epoch": 0.08613549776378997, + "grad_norm": 992.4075927734375, + "learning_rate": 3.4112e-05, + "loss": 126.7134, + "step": 21320 + }, + { + "epoch": 0.0861758990291576, + "grad_norm": 760.5980834960938, + "learning_rate": 3.4128e-05, + "loss": 125.7095, + "step": 21330 + }, + { + "epoch": 0.08621630029452522, + "grad_norm": 1635.6253662109375, + "learning_rate": 3.4144000000000004e-05, + "loss": 157.3116, + "step": 21340 + }, + { + "epoch": 0.08625670155989286, + "grad_norm": 871.8284912109375, + "learning_rate": 3.4160000000000005e-05, + "loss": 130.87, + "step": 21350 + }, + { + "epoch": 0.08629710282526049, + "grad_norm": 1175.6163330078125, + "learning_rate": 3.4176000000000006e-05, + "loss": 132.0413, + "step": 21360 + }, + { + "epoch": 0.08633750409062811, + "grad_norm": 1538.3843994140625, + "learning_rate": 3.4192e-05, + "loss": 135.4157, + "step": 21370 + }, + { + "epoch": 0.08637790535599575, + "grad_norm": 1098.7410888671875, + "learning_rate": 3.4208e-05, + "loss": 112.8145, + "step": 21380 + }, + { + "epoch": 0.08641830662136338, + "grad_norm": 1552.542236328125, + "learning_rate": 3.4224e-05, + "loss": 142.8729, + "step": 21390 + }, + { + "epoch": 0.08645870788673102, + "grad_norm": 1617.868408203125, + "learning_rate": 3.4240000000000004e-05, + "loss": 196.1722, + "step": 21400 + }, + { + "epoch": 0.08649910915209864, + "grad_norm": 688.4221801757812, + "learning_rate": 3.4256000000000005e-05, + "loss": 171.6963, + "step": 21410 + }, + { + "epoch": 0.08653951041746627, + "grad_norm": 967.9605712890625, + "learning_rate": 3.4272e-05, + "loss": 134.7255, + "step": 21420 + }, + { + "epoch": 0.08657991168283391, + "grad_norm": 880.7876586914062, + "learning_rate": 3.4288e-05, + "loss": 118.4826, + "step": 21430 + }, + { + "epoch": 0.08662031294820154, + "grad_norm": 1676.497314453125, + "learning_rate": 3.4304e-05, + "loss": 170.033, + "step": 21440 + }, + { + "epoch": 0.08666071421356916, + "grad_norm": 770.4820556640625, + "learning_rate": 3.4320000000000003e-05, + "loss": 137.369, + "step": 21450 + }, + { + "epoch": 0.0867011154789368, + "grad_norm": 2236.962890625, + "learning_rate": 3.4336000000000005e-05, + "loss": 136.3011, + "step": 21460 + }, + { + "epoch": 0.08674151674430443, + "grad_norm": 4461.37158203125, + "learning_rate": 3.4352000000000006e-05, + "loss": 203.7692, + "step": 21470 + }, + { + "epoch": 0.08678191800967207, + "grad_norm": 816.0757446289062, + "learning_rate": 3.4368e-05, + "loss": 89.3527, + "step": 21480 + }, + { + "epoch": 0.0868223192750397, + "grad_norm": 1883.265625, + "learning_rate": 3.4384e-05, + "loss": 200.5738, + "step": 21490 + }, + { + "epoch": 0.08686272054040732, + "grad_norm": 950.2667236328125, + "learning_rate": 3.44e-05, + "loss": 111.6436, + "step": 21500 + }, + { + "epoch": 0.08690312180577496, + "grad_norm": 556.4793090820312, + "learning_rate": 3.4416000000000004e-05, + "loss": 168.93, + "step": 21510 + }, + { + "epoch": 0.08694352307114259, + "grad_norm": 1274.6942138671875, + "learning_rate": 3.4432000000000005e-05, + "loss": 148.2716, + "step": 21520 + }, + { + "epoch": 0.08698392433651021, + "grad_norm": 1185.3028564453125, + "learning_rate": 3.4448e-05, + "loss": 145.5438, + "step": 21530 + }, + { + "epoch": 0.08702432560187785, + "grad_norm": 1652.6260986328125, + "learning_rate": 3.446400000000001e-05, + "loss": 104.8234, + "step": 21540 + }, + { + "epoch": 0.08706472686724548, + "grad_norm": 750.2820434570312, + "learning_rate": 3.448e-05, + "loss": 159.4831, + "step": 21550 + }, + { + "epoch": 0.08710512813261312, + "grad_norm": 1949.8719482421875, + "learning_rate": 3.4496e-05, + "loss": 159.4576, + "step": 21560 + }, + { + "epoch": 0.08714552939798074, + "grad_norm": 759.288818359375, + "learning_rate": 3.4512000000000004e-05, + "loss": 97.0836, + "step": 21570 + }, + { + "epoch": 0.08718593066334837, + "grad_norm": 564.93212890625, + "learning_rate": 3.4528e-05, + "loss": 148.2846, + "step": 21580 + }, + { + "epoch": 0.08722633192871601, + "grad_norm": 794.4310913085938, + "learning_rate": 3.454400000000001e-05, + "loss": 180.6302, + "step": 21590 + }, + { + "epoch": 0.08726673319408364, + "grad_norm": 1238.356201171875, + "learning_rate": 3.456e-05, + "loss": 167.728, + "step": 21600 + }, + { + "epoch": 0.08730713445945126, + "grad_norm": 516.9571533203125, + "learning_rate": 3.4576e-05, + "loss": 115.0428, + "step": 21610 + }, + { + "epoch": 0.0873475357248189, + "grad_norm": 1512.1278076171875, + "learning_rate": 3.4592000000000004e-05, + "loss": 158.977, + "step": 21620 + }, + { + "epoch": 0.08738793699018653, + "grad_norm": 511.3782653808594, + "learning_rate": 3.4608000000000005e-05, + "loss": 216.9958, + "step": 21630 + }, + { + "epoch": 0.08742833825555417, + "grad_norm": 1088.4840087890625, + "learning_rate": 3.4624000000000006e-05, + "loss": 114.9843, + "step": 21640 + }, + { + "epoch": 0.0874687395209218, + "grad_norm": 840.4856567382812, + "learning_rate": 3.464e-05, + "loss": 152.9572, + "step": 21650 + }, + { + "epoch": 0.08750914078628942, + "grad_norm": 1230.9224853515625, + "learning_rate": 3.4656e-05, + "loss": 129.8008, + "step": 21660 + }, + { + "epoch": 0.08754954205165706, + "grad_norm": 547.154541015625, + "learning_rate": 3.4672e-05, + "loss": 140.6826, + "step": 21670 + }, + { + "epoch": 0.08758994331702469, + "grad_norm": 1044.8892822265625, + "learning_rate": 3.4688000000000004e-05, + "loss": 113.0675, + "step": 21680 + }, + { + "epoch": 0.08763034458239231, + "grad_norm": 1026.08056640625, + "learning_rate": 3.4704000000000005e-05, + "loss": 154.0886, + "step": 21690 + }, + { + "epoch": 0.08767074584775995, + "grad_norm": 459.9974060058594, + "learning_rate": 3.472e-05, + "loss": 170.3707, + "step": 21700 + }, + { + "epoch": 0.08771114711312758, + "grad_norm": 1600.667724609375, + "learning_rate": 3.4736e-05, + "loss": 151.5475, + "step": 21710 + }, + { + "epoch": 0.08775154837849522, + "grad_norm": 871.069580078125, + "learning_rate": 3.4752e-05, + "loss": 144.102, + "step": 21720 + }, + { + "epoch": 0.08779194964386285, + "grad_norm": 662.0873413085938, + "learning_rate": 3.4768e-05, + "loss": 155.1722, + "step": 21730 + }, + { + "epoch": 0.08783235090923047, + "grad_norm": 1372.0865478515625, + "learning_rate": 3.4784000000000004e-05, + "loss": 106.985, + "step": 21740 + }, + { + "epoch": 0.08787275217459811, + "grad_norm": 655.1107788085938, + "learning_rate": 3.4800000000000006e-05, + "loss": 133.3239, + "step": 21750 + }, + { + "epoch": 0.08791315343996574, + "grad_norm": 927.88671875, + "learning_rate": 3.4816e-05, + "loss": 193.0352, + "step": 21760 + }, + { + "epoch": 0.08795355470533336, + "grad_norm": 673.9087524414062, + "learning_rate": 3.4832e-05, + "loss": 132.2774, + "step": 21770 + }, + { + "epoch": 0.087993955970701, + "grad_norm": 1195.658447265625, + "learning_rate": 3.4848e-05, + "loss": 201.5674, + "step": 21780 + }, + { + "epoch": 0.08803435723606863, + "grad_norm": 588.1712036132812, + "learning_rate": 3.4864000000000004e-05, + "loss": 150.3514, + "step": 21790 + }, + { + "epoch": 0.08807475850143627, + "grad_norm": 1374.11279296875, + "learning_rate": 3.4880000000000005e-05, + "loss": 206.019, + "step": 21800 + }, + { + "epoch": 0.0881151597668039, + "grad_norm": 1040.1878662109375, + "learning_rate": 3.4896e-05, + "loss": 171.2179, + "step": 21810 + }, + { + "epoch": 0.08815556103217152, + "grad_norm": 1333.456298828125, + "learning_rate": 3.4912e-05, + "loss": 109.9464, + "step": 21820 + }, + { + "epoch": 0.08819596229753916, + "grad_norm": 3173.137451171875, + "learning_rate": 3.4928e-05, + "loss": 149.7898, + "step": 21830 + }, + { + "epoch": 0.08823636356290679, + "grad_norm": 1633.1236572265625, + "learning_rate": 3.4944e-05, + "loss": 138.7288, + "step": 21840 + }, + { + "epoch": 0.08827676482827441, + "grad_norm": 551.3711547851562, + "learning_rate": 3.4960000000000004e-05, + "loss": 108.8455, + "step": 21850 + }, + { + "epoch": 0.08831716609364205, + "grad_norm": 1240.1185302734375, + "learning_rate": 3.4976e-05, + "loss": 131.9122, + "step": 21860 + }, + { + "epoch": 0.08835756735900968, + "grad_norm": 937.1015014648438, + "learning_rate": 3.4992000000000006e-05, + "loss": 145.2808, + "step": 21870 + }, + { + "epoch": 0.08839796862437732, + "grad_norm": 4325.3662109375, + "learning_rate": 3.5008e-05, + "loss": 142.6543, + "step": 21880 + }, + { + "epoch": 0.08843836988974495, + "grad_norm": 1809.6087646484375, + "learning_rate": 3.5024e-05, + "loss": 147.8612, + "step": 21890 + }, + { + "epoch": 0.08847877115511257, + "grad_norm": 1056.62158203125, + "learning_rate": 3.504e-05, + "loss": 159.7672, + "step": 21900 + }, + { + "epoch": 0.08851917242048021, + "grad_norm": 2963.2265625, + "learning_rate": 3.5056e-05, + "loss": 170.2018, + "step": 21910 + }, + { + "epoch": 0.08855957368584784, + "grad_norm": 3147.7216796875, + "learning_rate": 3.5072000000000006e-05, + "loss": 120.0709, + "step": 21920 + }, + { + "epoch": 0.08859997495121547, + "grad_norm": 1023.0355834960938, + "learning_rate": 3.5088e-05, + "loss": 120.3967, + "step": 21930 + }, + { + "epoch": 0.0886403762165831, + "grad_norm": 810.3030395507812, + "learning_rate": 3.5104e-05, + "loss": 194.2192, + "step": 21940 + }, + { + "epoch": 0.08868077748195073, + "grad_norm": 4196.16748046875, + "learning_rate": 3.512e-05, + "loss": 128.0004, + "step": 21950 + }, + { + "epoch": 0.08872117874731837, + "grad_norm": 1353.0517578125, + "learning_rate": 3.5136000000000004e-05, + "loss": 191.791, + "step": 21960 + }, + { + "epoch": 0.088761580012686, + "grad_norm": 659.225830078125, + "learning_rate": 3.5152000000000005e-05, + "loss": 183.0125, + "step": 21970 + }, + { + "epoch": 0.08880198127805362, + "grad_norm": 858.825439453125, + "learning_rate": 3.5168e-05, + "loss": 131.65, + "step": 21980 + }, + { + "epoch": 0.08884238254342126, + "grad_norm": 3072.340576171875, + "learning_rate": 3.518400000000001e-05, + "loss": 153.7905, + "step": 21990 + }, + { + "epoch": 0.08888278380878889, + "grad_norm": 601.5523071289062, + "learning_rate": 3.52e-05, + "loss": 144.7446, + "step": 22000 + }, + { + "epoch": 0.08892318507415652, + "grad_norm": 764.5150756835938, + "learning_rate": 3.5216e-05, + "loss": 132.0805, + "step": 22010 + }, + { + "epoch": 0.08896358633952416, + "grad_norm": 1314.170654296875, + "learning_rate": 3.5232000000000004e-05, + "loss": 185.2374, + "step": 22020 + }, + { + "epoch": 0.08900398760489178, + "grad_norm": 3032.62451171875, + "learning_rate": 3.5248000000000005e-05, + "loss": 180.5076, + "step": 22030 + }, + { + "epoch": 0.08904438887025942, + "grad_norm": 1360.1429443359375, + "learning_rate": 3.5264000000000007e-05, + "loss": 123.2203, + "step": 22040 + }, + { + "epoch": 0.08908479013562705, + "grad_norm": 1035.43505859375, + "learning_rate": 3.528e-05, + "loss": 99.4968, + "step": 22050 + }, + { + "epoch": 0.08912519140099467, + "grad_norm": 915.3469848632812, + "learning_rate": 3.5296e-05, + "loss": 155.0251, + "step": 22060 + }, + { + "epoch": 0.08916559266636231, + "grad_norm": 852.5868530273438, + "learning_rate": 3.5312000000000003e-05, + "loss": 118.8955, + "step": 22070 + }, + { + "epoch": 0.08920599393172994, + "grad_norm": 4727.56982421875, + "learning_rate": 3.5328000000000005e-05, + "loss": 173.0974, + "step": 22080 + }, + { + "epoch": 0.08924639519709757, + "grad_norm": 2289.63916015625, + "learning_rate": 3.5344000000000006e-05, + "loss": 193.1989, + "step": 22090 + }, + { + "epoch": 0.0892867964624652, + "grad_norm": 1030.8114013671875, + "learning_rate": 3.536e-05, + "loss": 159.3725, + "step": 22100 + }, + { + "epoch": 0.08932719772783283, + "grad_norm": 3064.20556640625, + "learning_rate": 3.5376e-05, + "loss": 155.0531, + "step": 22110 + }, + { + "epoch": 0.08936759899320047, + "grad_norm": 1814.234619140625, + "learning_rate": 3.5392e-05, + "loss": 121.5926, + "step": 22120 + }, + { + "epoch": 0.0894080002585681, + "grad_norm": 1664.8179931640625, + "learning_rate": 3.5408000000000004e-05, + "loss": 154.3786, + "step": 22130 + }, + { + "epoch": 0.08944840152393572, + "grad_norm": 1362.40869140625, + "learning_rate": 3.5424000000000005e-05, + "loss": 135.184, + "step": 22140 + }, + { + "epoch": 0.08948880278930336, + "grad_norm": 1009.8009033203125, + "learning_rate": 3.5440000000000006e-05, + "loss": 141.2752, + "step": 22150 + }, + { + "epoch": 0.08952920405467099, + "grad_norm": 1110.58203125, + "learning_rate": 3.5456e-05, + "loss": 95.6904, + "step": 22160 + }, + { + "epoch": 0.08956960532003862, + "grad_norm": 969.4694213867188, + "learning_rate": 3.5472e-05, + "loss": 144.6799, + "step": 22170 + }, + { + "epoch": 0.08961000658540626, + "grad_norm": 685.1071166992188, + "learning_rate": 3.5488e-05, + "loss": 114.7927, + "step": 22180 + }, + { + "epoch": 0.08965040785077388, + "grad_norm": 1380.3277587890625, + "learning_rate": 3.5504e-05, + "loss": 185.8695, + "step": 22190 + }, + { + "epoch": 0.08969080911614152, + "grad_norm": 1552.51806640625, + "learning_rate": 3.5520000000000006e-05, + "loss": 193.4836, + "step": 22200 + }, + { + "epoch": 0.08973121038150915, + "grad_norm": 819.05517578125, + "learning_rate": 3.5536e-05, + "loss": 134.4086, + "step": 22210 + }, + { + "epoch": 0.08977161164687678, + "grad_norm": 1532.5880126953125, + "learning_rate": 3.5552e-05, + "loss": 193.636, + "step": 22220 + }, + { + "epoch": 0.08981201291224442, + "grad_norm": 404.35418701171875, + "learning_rate": 3.5568e-05, + "loss": 101.8523, + "step": 22230 + }, + { + "epoch": 0.08985241417761204, + "grad_norm": 643.7118530273438, + "learning_rate": 3.5584000000000004e-05, + "loss": 180.8746, + "step": 22240 + }, + { + "epoch": 0.08989281544297967, + "grad_norm": 1128.2568359375, + "learning_rate": 3.5600000000000005e-05, + "loss": 238.6185, + "step": 22250 + }, + { + "epoch": 0.08993321670834731, + "grad_norm": 992.0011596679688, + "learning_rate": 3.5616e-05, + "loss": 127.9398, + "step": 22260 + }, + { + "epoch": 0.08997361797371493, + "grad_norm": 771.1392211914062, + "learning_rate": 3.563200000000001e-05, + "loss": 140.5484, + "step": 22270 + }, + { + "epoch": 0.09001401923908257, + "grad_norm": 565.6467895507812, + "learning_rate": 3.5648e-05, + "loss": 106.3423, + "step": 22280 + }, + { + "epoch": 0.0900544205044502, + "grad_norm": 510.07208251953125, + "learning_rate": 3.5664e-05, + "loss": 168.2302, + "step": 22290 + }, + { + "epoch": 0.09009482176981783, + "grad_norm": 1707.440673828125, + "learning_rate": 3.5680000000000004e-05, + "loss": 186.937, + "step": 22300 + }, + { + "epoch": 0.09013522303518547, + "grad_norm": 834.1055908203125, + "learning_rate": 3.5696e-05, + "loss": 116.7266, + "step": 22310 + }, + { + "epoch": 0.09017562430055309, + "grad_norm": 548.1199951171875, + "learning_rate": 3.5712000000000006e-05, + "loss": 182.4943, + "step": 22320 + }, + { + "epoch": 0.09021602556592072, + "grad_norm": 641.1887817382812, + "learning_rate": 3.5728e-05, + "loss": 169.0187, + "step": 22330 + }, + { + "epoch": 0.09025642683128836, + "grad_norm": 1315.64599609375, + "learning_rate": 3.5744e-05, + "loss": 141.7523, + "step": 22340 + }, + { + "epoch": 0.09029682809665598, + "grad_norm": 654.6367797851562, + "learning_rate": 3.576e-05, + "loss": 129.719, + "step": 22350 + }, + { + "epoch": 0.09033722936202362, + "grad_norm": 893.9439086914062, + "learning_rate": 3.5776000000000004e-05, + "loss": 124.2225, + "step": 22360 + }, + { + "epoch": 0.09037763062739125, + "grad_norm": 932.633544921875, + "learning_rate": 3.5792000000000006e-05, + "loss": 171.4354, + "step": 22370 + }, + { + "epoch": 0.09041803189275888, + "grad_norm": 680.4566650390625, + "learning_rate": 3.5808e-05, + "loss": 125.191, + "step": 22380 + }, + { + "epoch": 0.09045843315812652, + "grad_norm": 789.4610595703125, + "learning_rate": 3.5824e-05, + "loss": 163.3039, + "step": 22390 + }, + { + "epoch": 0.09049883442349414, + "grad_norm": 907.9837646484375, + "learning_rate": 3.584e-05, + "loss": 124.2404, + "step": 22400 + }, + { + "epoch": 0.09053923568886177, + "grad_norm": 702.7406616210938, + "learning_rate": 3.5856000000000004e-05, + "loss": 127.3184, + "step": 22410 + }, + { + "epoch": 0.09057963695422941, + "grad_norm": 660.792236328125, + "learning_rate": 3.5872000000000005e-05, + "loss": 140.2423, + "step": 22420 + }, + { + "epoch": 0.09062003821959703, + "grad_norm": 795.4486083984375, + "learning_rate": 3.5888000000000006e-05, + "loss": 129.5732, + "step": 22430 + }, + { + "epoch": 0.09066043948496467, + "grad_norm": 3354.665283203125, + "learning_rate": 3.5904e-05, + "loss": 131.7471, + "step": 22440 + }, + { + "epoch": 0.0907008407503323, + "grad_norm": 1027.1820068359375, + "learning_rate": 3.592e-05, + "loss": 161.8347, + "step": 22450 + }, + { + "epoch": 0.09074124201569993, + "grad_norm": 2271.216552734375, + "learning_rate": 3.5936e-05, + "loss": 193.8755, + "step": 22460 + }, + { + "epoch": 0.09078164328106757, + "grad_norm": 785.5519409179688, + "learning_rate": 3.5952000000000004e-05, + "loss": 160.9258, + "step": 22470 + }, + { + "epoch": 0.0908220445464352, + "grad_norm": 494.49603271484375, + "learning_rate": 3.5968000000000005e-05, + "loss": 152.8222, + "step": 22480 + }, + { + "epoch": 0.09086244581180282, + "grad_norm": 979.6446533203125, + "learning_rate": 3.5984e-05, + "loss": 152.389, + "step": 22490 + }, + { + "epoch": 0.09090284707717046, + "grad_norm": 1044.8822021484375, + "learning_rate": 3.6e-05, + "loss": 98.6145, + "step": 22500 + }, + { + "epoch": 0.09094324834253809, + "grad_norm": 1157.83544921875, + "learning_rate": 3.6016e-05, + "loss": 119.6571, + "step": 22510 + }, + { + "epoch": 0.09098364960790573, + "grad_norm": 1011.1734619140625, + "learning_rate": 3.6032e-05, + "loss": 111.348, + "step": 22520 + }, + { + "epoch": 0.09102405087327335, + "grad_norm": 1148.867919921875, + "learning_rate": 3.6048000000000005e-05, + "loss": 109.3501, + "step": 22530 + }, + { + "epoch": 0.09106445213864098, + "grad_norm": 1761.509033203125, + "learning_rate": 3.6064e-05, + "loss": 187.0426, + "step": 22540 + }, + { + "epoch": 0.09110485340400862, + "grad_norm": 1198.439208984375, + "learning_rate": 3.608000000000001e-05, + "loss": 102.2602, + "step": 22550 + }, + { + "epoch": 0.09114525466937624, + "grad_norm": 849.18359375, + "learning_rate": 3.6096e-05, + "loss": 161.1873, + "step": 22560 + }, + { + "epoch": 0.09118565593474387, + "grad_norm": 744.1982421875, + "learning_rate": 3.6112e-05, + "loss": 182.1997, + "step": 22570 + }, + { + "epoch": 0.09122605720011151, + "grad_norm": 1033.509765625, + "learning_rate": 3.6128000000000004e-05, + "loss": 166.5488, + "step": 22580 + }, + { + "epoch": 0.09126645846547914, + "grad_norm": 619.4390258789062, + "learning_rate": 3.6144e-05, + "loss": 93.459, + "step": 22590 + }, + { + "epoch": 0.09130685973084678, + "grad_norm": 726.9421997070312, + "learning_rate": 3.6160000000000006e-05, + "loss": 173.5223, + "step": 22600 + }, + { + "epoch": 0.0913472609962144, + "grad_norm": 1154.6739501953125, + "learning_rate": 3.6176e-05, + "loss": 126.8433, + "step": 22610 + }, + { + "epoch": 0.09138766226158203, + "grad_norm": 1169.060791015625, + "learning_rate": 3.6192e-05, + "loss": 150.0519, + "step": 22620 + }, + { + "epoch": 0.09142806352694967, + "grad_norm": 968.4627075195312, + "learning_rate": 3.6208e-05, + "loss": 162.5484, + "step": 22630 + }, + { + "epoch": 0.0914684647923173, + "grad_norm": 382.4349670410156, + "learning_rate": 3.6224000000000004e-05, + "loss": 129.1311, + "step": 22640 + }, + { + "epoch": 0.09150886605768492, + "grad_norm": 512.2999267578125, + "learning_rate": 3.6240000000000005e-05, + "loss": 152.405, + "step": 22650 + }, + { + "epoch": 0.09154926732305256, + "grad_norm": 3064.69189453125, + "learning_rate": 3.6256e-05, + "loss": 159.7207, + "step": 22660 + }, + { + "epoch": 0.09158966858842019, + "grad_norm": 970.8419189453125, + "learning_rate": 3.627200000000001e-05, + "loss": 151.4258, + "step": 22670 + }, + { + "epoch": 0.09163006985378783, + "grad_norm": 806.0418090820312, + "learning_rate": 3.6288e-05, + "loss": 143.3384, + "step": 22680 + }, + { + "epoch": 0.09167047111915545, + "grad_norm": 711.9028930664062, + "learning_rate": 3.6304000000000003e-05, + "loss": 116.7258, + "step": 22690 + }, + { + "epoch": 0.09171087238452308, + "grad_norm": 826.9251098632812, + "learning_rate": 3.6320000000000005e-05, + "loss": 177.9493, + "step": 22700 + }, + { + "epoch": 0.09175127364989072, + "grad_norm": 2070.822998046875, + "learning_rate": 3.6336e-05, + "loss": 186.8553, + "step": 22710 + }, + { + "epoch": 0.09179167491525834, + "grad_norm": 560.7052001953125, + "learning_rate": 3.635200000000001e-05, + "loss": 109.9031, + "step": 22720 + }, + { + "epoch": 0.09183207618062597, + "grad_norm": 1463.543212890625, + "learning_rate": 3.6368e-05, + "loss": 163.5243, + "step": 22730 + }, + { + "epoch": 0.09187247744599361, + "grad_norm": 1239.4158935546875, + "learning_rate": 3.6384e-05, + "loss": 120.6553, + "step": 22740 + }, + { + "epoch": 0.09191287871136124, + "grad_norm": 1005.3060302734375, + "learning_rate": 3.6400000000000004e-05, + "loss": 73.68, + "step": 22750 + }, + { + "epoch": 0.09195327997672888, + "grad_norm": 1087.8021240234375, + "learning_rate": 3.6416000000000005e-05, + "loss": 200.0768, + "step": 22760 + }, + { + "epoch": 0.0919936812420965, + "grad_norm": 688.5360107421875, + "learning_rate": 3.6432000000000006e-05, + "loss": 148.7597, + "step": 22770 + }, + { + "epoch": 0.09203408250746413, + "grad_norm": 1364.3865966796875, + "learning_rate": 3.6448e-05, + "loss": 138.2101, + "step": 22780 + }, + { + "epoch": 0.09207448377283177, + "grad_norm": 802.532958984375, + "learning_rate": 3.6464e-05, + "loss": 83.9188, + "step": 22790 + }, + { + "epoch": 0.0921148850381994, + "grad_norm": 977.3348999023438, + "learning_rate": 3.648e-05, + "loss": 138.3937, + "step": 22800 + }, + { + "epoch": 0.09215528630356702, + "grad_norm": 1319.4176025390625, + "learning_rate": 3.6496000000000004e-05, + "loss": 179.7714, + "step": 22810 + }, + { + "epoch": 0.09219568756893466, + "grad_norm": 815.4949951171875, + "learning_rate": 3.6512e-05, + "loss": 144.0214, + "step": 22820 + }, + { + "epoch": 0.09223608883430229, + "grad_norm": 1057.535400390625, + "learning_rate": 3.652800000000001e-05, + "loss": 117.9175, + "step": 22830 + }, + { + "epoch": 0.09227649009966993, + "grad_norm": 1290.474853515625, + "learning_rate": 3.6544e-05, + "loss": 173.7409, + "step": 22840 + }, + { + "epoch": 0.09231689136503755, + "grad_norm": 530.1447143554688, + "learning_rate": 3.656e-05, + "loss": 131.512, + "step": 22850 + }, + { + "epoch": 0.09235729263040518, + "grad_norm": 975.4651489257812, + "learning_rate": 3.6576000000000004e-05, + "loss": 155.5999, + "step": 22860 + }, + { + "epoch": 0.09239769389577282, + "grad_norm": 841.8017578125, + "learning_rate": 3.6592e-05, + "loss": 112.988, + "step": 22870 + }, + { + "epoch": 0.09243809516114045, + "grad_norm": 721.55078125, + "learning_rate": 3.6608000000000006e-05, + "loss": 107.2215, + "step": 22880 + }, + { + "epoch": 0.09247849642650807, + "grad_norm": 1096.8160400390625, + "learning_rate": 3.6624e-05, + "loss": 152.8692, + "step": 22890 + }, + { + "epoch": 0.09251889769187571, + "grad_norm": 893.4599609375, + "learning_rate": 3.664e-05, + "loss": 126.1478, + "step": 22900 + }, + { + "epoch": 0.09255929895724334, + "grad_norm": 634.9187622070312, + "learning_rate": 3.6656e-05, + "loss": 179.8972, + "step": 22910 + }, + { + "epoch": 0.09259970022261098, + "grad_norm": 1135.6064453125, + "learning_rate": 3.6672000000000004e-05, + "loss": 180.9714, + "step": 22920 + }, + { + "epoch": 0.0926401014879786, + "grad_norm": 664.3110961914062, + "learning_rate": 3.6688000000000005e-05, + "loss": 209.231, + "step": 22930 + }, + { + "epoch": 0.09268050275334623, + "grad_norm": 2294.11474609375, + "learning_rate": 3.6704e-05, + "loss": 115.3782, + "step": 22940 + }, + { + "epoch": 0.09272090401871387, + "grad_norm": 789.359619140625, + "learning_rate": 3.672000000000001e-05, + "loss": 114.7093, + "step": 22950 + }, + { + "epoch": 0.0927613052840815, + "grad_norm": 860.5071411132812, + "learning_rate": 3.6736e-05, + "loss": 135.498, + "step": 22960 + }, + { + "epoch": 0.09280170654944912, + "grad_norm": 953.83642578125, + "learning_rate": 3.6752e-05, + "loss": 147.866, + "step": 22970 + }, + { + "epoch": 0.09284210781481676, + "grad_norm": 1682.6334228515625, + "learning_rate": 3.6768000000000004e-05, + "loss": 102.4459, + "step": 22980 + }, + { + "epoch": 0.09288250908018439, + "grad_norm": 1022.0247802734375, + "learning_rate": 3.6784e-05, + "loss": 133.8921, + "step": 22990 + }, + { + "epoch": 0.09292291034555203, + "grad_norm": 1277.842529296875, + "learning_rate": 3.680000000000001e-05, + "loss": 107.0648, + "step": 23000 + }, + { + "epoch": 0.09296331161091966, + "grad_norm": 1354.216796875, + "learning_rate": 3.6816e-05, + "loss": 101.4201, + "step": 23010 + }, + { + "epoch": 0.09300371287628728, + "grad_norm": 959.92724609375, + "learning_rate": 3.6832e-05, + "loss": 84.4321, + "step": 23020 + }, + { + "epoch": 0.09304411414165492, + "grad_norm": 651.984130859375, + "learning_rate": 3.6848000000000004e-05, + "loss": 168.2103, + "step": 23030 + }, + { + "epoch": 0.09308451540702255, + "grad_norm": 788.34765625, + "learning_rate": 3.6864000000000005e-05, + "loss": 142.1959, + "step": 23040 + }, + { + "epoch": 0.09312491667239017, + "grad_norm": 1028.8729248046875, + "learning_rate": 3.6880000000000006e-05, + "loss": 322.2796, + "step": 23050 + }, + { + "epoch": 0.09316531793775781, + "grad_norm": 545.5979614257812, + "learning_rate": 3.6896e-05, + "loss": 120.0088, + "step": 23060 + }, + { + "epoch": 0.09320571920312544, + "grad_norm": 1290.5308837890625, + "learning_rate": 3.6912e-05, + "loss": 137.8579, + "step": 23070 + }, + { + "epoch": 0.09324612046849308, + "grad_norm": 1229.507568359375, + "learning_rate": 3.6928e-05, + "loss": 142.2953, + "step": 23080 + }, + { + "epoch": 0.0932865217338607, + "grad_norm": 1288.056396484375, + "learning_rate": 3.6944000000000004e-05, + "loss": 179.3537, + "step": 23090 + }, + { + "epoch": 0.09332692299922833, + "grad_norm": 473.9893798828125, + "learning_rate": 3.6960000000000005e-05, + "loss": 133.5954, + "step": 23100 + }, + { + "epoch": 0.09336732426459597, + "grad_norm": 328.0464782714844, + "learning_rate": 3.6976e-05, + "loss": 138.7491, + "step": 23110 + }, + { + "epoch": 0.0934077255299636, + "grad_norm": 739.8768310546875, + "learning_rate": 3.6992e-05, + "loss": 101.7406, + "step": 23120 + }, + { + "epoch": 0.09344812679533122, + "grad_norm": 1502.4400634765625, + "learning_rate": 3.7008e-05, + "loss": 163.1932, + "step": 23130 + }, + { + "epoch": 0.09348852806069886, + "grad_norm": 736.126220703125, + "learning_rate": 3.7024e-05, + "loss": 112.2283, + "step": 23140 + }, + { + "epoch": 0.09352892932606649, + "grad_norm": 1934.0955810546875, + "learning_rate": 3.7040000000000005e-05, + "loss": 223.2788, + "step": 23150 + }, + { + "epoch": 0.09356933059143413, + "grad_norm": 3059.709228515625, + "learning_rate": 3.7056000000000006e-05, + "loss": 156.637, + "step": 23160 + }, + { + "epoch": 0.09360973185680176, + "grad_norm": 756.9012451171875, + "learning_rate": 3.7072e-05, + "loss": 126.2166, + "step": 23170 + }, + { + "epoch": 0.09365013312216938, + "grad_norm": 1010.6748657226562, + "learning_rate": 3.7088e-05, + "loss": 180.7929, + "step": 23180 + }, + { + "epoch": 0.09369053438753702, + "grad_norm": 1007.3572998046875, + "learning_rate": 3.7104e-05, + "loss": 161.0757, + "step": 23190 + }, + { + "epoch": 0.09373093565290465, + "grad_norm": 1262.628173828125, + "learning_rate": 3.7120000000000004e-05, + "loss": 157.905, + "step": 23200 + }, + { + "epoch": 0.09377133691827227, + "grad_norm": 1171.1007080078125, + "learning_rate": 3.7136000000000005e-05, + "loss": 147.1025, + "step": 23210 + }, + { + "epoch": 0.09381173818363991, + "grad_norm": 918.8790893554688, + "learning_rate": 3.7152e-05, + "loss": 164.8327, + "step": 23220 + }, + { + "epoch": 0.09385213944900754, + "grad_norm": 2085.40625, + "learning_rate": 3.716800000000001e-05, + "loss": 206.7533, + "step": 23230 + }, + { + "epoch": 0.09389254071437518, + "grad_norm": 518.6524047851562, + "learning_rate": 3.7184e-05, + "loss": 97.2111, + "step": 23240 + }, + { + "epoch": 0.0939329419797428, + "grad_norm": 1226.23828125, + "learning_rate": 3.72e-05, + "loss": 79.7819, + "step": 23250 + }, + { + "epoch": 0.09397334324511043, + "grad_norm": 480.8341064453125, + "learning_rate": 3.7216000000000004e-05, + "loss": 128.033, + "step": 23260 + }, + { + "epoch": 0.09401374451047807, + "grad_norm": 447.2598876953125, + "learning_rate": 3.7232e-05, + "loss": 74.591, + "step": 23270 + }, + { + "epoch": 0.0940541457758457, + "grad_norm": 865.7748413085938, + "learning_rate": 3.7248000000000007e-05, + "loss": 191.5674, + "step": 23280 + }, + { + "epoch": 0.09409454704121333, + "grad_norm": 706.0413208007812, + "learning_rate": 3.7264e-05, + "loss": 92.4505, + "step": 23290 + }, + { + "epoch": 0.09413494830658097, + "grad_norm": 929.605712890625, + "learning_rate": 3.728e-05, + "loss": 112.2633, + "step": 23300 + }, + { + "epoch": 0.09417534957194859, + "grad_norm": 876.5859985351562, + "learning_rate": 3.7296000000000003e-05, + "loss": 104.2976, + "step": 23310 + }, + { + "epoch": 0.09421575083731623, + "grad_norm": 1444.4832763671875, + "learning_rate": 3.7312000000000005e-05, + "loss": 155.2843, + "step": 23320 + }, + { + "epoch": 0.09425615210268386, + "grad_norm": 790.5938720703125, + "learning_rate": 3.7328000000000006e-05, + "loss": 135.3645, + "step": 23330 + }, + { + "epoch": 0.09429655336805148, + "grad_norm": 497.1932067871094, + "learning_rate": 3.7344e-05, + "loss": 125.9408, + "step": 23340 + }, + { + "epoch": 0.09433695463341912, + "grad_norm": 1325.3848876953125, + "learning_rate": 3.736000000000001e-05, + "loss": 91.7027, + "step": 23350 + }, + { + "epoch": 0.09437735589878675, + "grad_norm": 975.733642578125, + "learning_rate": 3.7376e-05, + "loss": 138.2656, + "step": 23360 + }, + { + "epoch": 0.09441775716415438, + "grad_norm": 693.6296997070312, + "learning_rate": 3.7392000000000004e-05, + "loss": 99.8128, + "step": 23370 + }, + { + "epoch": 0.09445815842952202, + "grad_norm": 1274.6103515625, + "learning_rate": 3.7408000000000005e-05, + "loss": 151.0665, + "step": 23380 + }, + { + "epoch": 0.09449855969488964, + "grad_norm": 631.9049072265625, + "learning_rate": 3.7424e-05, + "loss": 76.9787, + "step": 23390 + }, + { + "epoch": 0.09453896096025728, + "grad_norm": 1393.939697265625, + "learning_rate": 3.744000000000001e-05, + "loss": 211.1487, + "step": 23400 + }, + { + "epoch": 0.09457936222562491, + "grad_norm": 1336.576171875, + "learning_rate": 3.7456e-05, + "loss": 147.0912, + "step": 23410 + }, + { + "epoch": 0.09461976349099253, + "grad_norm": 953.3854370117188, + "learning_rate": 3.7472e-05, + "loss": 116.8691, + "step": 23420 + }, + { + "epoch": 0.09466016475636017, + "grad_norm": 705.37646484375, + "learning_rate": 3.7488000000000004e-05, + "loss": 93.2267, + "step": 23430 + }, + { + "epoch": 0.0947005660217278, + "grad_norm": 1064.370849609375, + "learning_rate": 3.7504000000000005e-05, + "loss": 144.6919, + "step": 23440 + }, + { + "epoch": 0.09474096728709543, + "grad_norm": 907.05712890625, + "learning_rate": 3.752e-05, + "loss": 167.4084, + "step": 23450 + }, + { + "epoch": 0.09478136855246307, + "grad_norm": 624.1815185546875, + "learning_rate": 3.7536e-05, + "loss": 83.6302, + "step": 23460 + }, + { + "epoch": 0.09482176981783069, + "grad_norm": 1266.3880615234375, + "learning_rate": 3.7552e-05, + "loss": 132.4881, + "step": 23470 + }, + { + "epoch": 0.09486217108319833, + "grad_norm": 813.5526123046875, + "learning_rate": 3.7568000000000004e-05, + "loss": 98.5183, + "step": 23480 + }, + { + "epoch": 0.09490257234856596, + "grad_norm": 1952.166015625, + "learning_rate": 3.7584000000000005e-05, + "loss": 182.171, + "step": 23490 + }, + { + "epoch": 0.09494297361393358, + "grad_norm": 1250.93408203125, + "learning_rate": 3.76e-05, + "loss": 169.2327, + "step": 23500 + }, + { + "epoch": 0.09498337487930122, + "grad_norm": 989.1015625, + "learning_rate": 3.7616e-05, + "loss": 131.5697, + "step": 23510 + }, + { + "epoch": 0.09502377614466885, + "grad_norm": 1177.61474609375, + "learning_rate": 3.7632e-05, + "loss": 144.0925, + "step": 23520 + }, + { + "epoch": 0.09506417741003648, + "grad_norm": 793.3934326171875, + "learning_rate": 3.7648e-05, + "loss": 131.8033, + "step": 23530 + }, + { + "epoch": 0.09510457867540412, + "grad_norm": 959.4913330078125, + "learning_rate": 3.7664000000000004e-05, + "loss": 111.8908, + "step": 23540 + }, + { + "epoch": 0.09514497994077174, + "grad_norm": 1645.24462890625, + "learning_rate": 3.768e-05, + "loss": 138.6869, + "step": 23550 + }, + { + "epoch": 0.09518538120613938, + "grad_norm": 861.9459838867188, + "learning_rate": 3.7696000000000006e-05, + "loss": 135.6349, + "step": 23560 + }, + { + "epoch": 0.09522578247150701, + "grad_norm": 1338.7578125, + "learning_rate": 3.7712e-05, + "loss": 126.7223, + "step": 23570 + }, + { + "epoch": 0.09526618373687464, + "grad_norm": 485.1557312011719, + "learning_rate": 3.7728e-05, + "loss": 138.1629, + "step": 23580 + }, + { + "epoch": 0.09530658500224228, + "grad_norm": 821.0625610351562, + "learning_rate": 3.7744e-05, + "loss": 84.2995, + "step": 23590 + }, + { + "epoch": 0.0953469862676099, + "grad_norm": 737.8482055664062, + "learning_rate": 3.7760000000000004e-05, + "loss": 161.4703, + "step": 23600 + }, + { + "epoch": 0.09538738753297753, + "grad_norm": 500.7242736816406, + "learning_rate": 3.7776000000000006e-05, + "loss": 103.4814, + "step": 23610 + }, + { + "epoch": 0.09542778879834517, + "grad_norm": 776.170654296875, + "learning_rate": 3.7792e-05, + "loss": 209.5228, + "step": 23620 + }, + { + "epoch": 0.0954681900637128, + "grad_norm": 1282.9637451171875, + "learning_rate": 3.780800000000001e-05, + "loss": 202.6378, + "step": 23630 + }, + { + "epoch": 0.09550859132908043, + "grad_norm": 1296.654541015625, + "learning_rate": 3.7824e-05, + "loss": 132.0891, + "step": 23640 + }, + { + "epoch": 0.09554899259444806, + "grad_norm": 1877.1707763671875, + "learning_rate": 3.7840000000000004e-05, + "loss": 220.0349, + "step": 23650 + }, + { + "epoch": 0.09558939385981569, + "grad_norm": 579.21630859375, + "learning_rate": 3.7856000000000005e-05, + "loss": 115.275, + "step": 23660 + }, + { + "epoch": 0.09562979512518333, + "grad_norm": 1053.3505859375, + "learning_rate": 3.7872e-05, + "loss": 134.3348, + "step": 23670 + }, + { + "epoch": 0.09567019639055095, + "grad_norm": 735.5716552734375, + "learning_rate": 3.788800000000001e-05, + "loss": 181.0642, + "step": 23680 + }, + { + "epoch": 0.09571059765591858, + "grad_norm": 1244.1798095703125, + "learning_rate": 3.7904e-05, + "loss": 174.2707, + "step": 23690 + }, + { + "epoch": 0.09575099892128622, + "grad_norm": 1307.7808837890625, + "learning_rate": 3.792e-05, + "loss": 145.8374, + "step": 23700 + }, + { + "epoch": 0.09579140018665384, + "grad_norm": 505.2427062988281, + "learning_rate": 3.7936000000000004e-05, + "loss": 143.6947, + "step": 23710 + }, + { + "epoch": 0.09583180145202148, + "grad_norm": 1027.926513671875, + "learning_rate": 3.7952000000000005e-05, + "loss": 256.3735, + "step": 23720 + }, + { + "epoch": 0.09587220271738911, + "grad_norm": 2318.901123046875, + "learning_rate": 3.7968000000000006e-05, + "loss": 168.0672, + "step": 23730 + }, + { + "epoch": 0.09591260398275674, + "grad_norm": 1743.447998046875, + "learning_rate": 3.7984e-05, + "loss": 169.4227, + "step": 23740 + }, + { + "epoch": 0.09595300524812438, + "grad_norm": 1299.091552734375, + "learning_rate": 3.8e-05, + "loss": 137.6399, + "step": 23750 + }, + { + "epoch": 0.095993406513492, + "grad_norm": 635.9812622070312, + "learning_rate": 3.8016e-05, + "loss": 123.848, + "step": 23760 + }, + { + "epoch": 0.09603380777885963, + "grad_norm": 764.4136962890625, + "learning_rate": 3.8032000000000004e-05, + "loss": 170.6924, + "step": 23770 + }, + { + "epoch": 0.09607420904422727, + "grad_norm": 1045.0787353515625, + "learning_rate": 3.8048000000000006e-05, + "loss": 118.8767, + "step": 23780 + }, + { + "epoch": 0.0961146103095949, + "grad_norm": 969.3451538085938, + "learning_rate": 3.8064e-05, + "loss": 263.6281, + "step": 23790 + }, + { + "epoch": 0.09615501157496253, + "grad_norm": 863.6657104492188, + "learning_rate": 3.808e-05, + "loss": 151.9405, + "step": 23800 + }, + { + "epoch": 0.09619541284033016, + "grad_norm": 443.4707946777344, + "learning_rate": 3.8096e-05, + "loss": 109.4972, + "step": 23810 + }, + { + "epoch": 0.09623581410569779, + "grad_norm": 611.2152709960938, + "learning_rate": 3.8112000000000004e-05, + "loss": 142.6797, + "step": 23820 + }, + { + "epoch": 0.09627621537106543, + "grad_norm": 1075.916015625, + "learning_rate": 3.8128000000000005e-05, + "loss": 178.9587, + "step": 23830 + }, + { + "epoch": 0.09631661663643305, + "grad_norm": 705.0506591796875, + "learning_rate": 3.8144000000000006e-05, + "loss": 161.0051, + "step": 23840 + }, + { + "epoch": 0.09635701790180068, + "grad_norm": 1352.298095703125, + "learning_rate": 3.816e-05, + "loss": 95.2039, + "step": 23850 + }, + { + "epoch": 0.09639741916716832, + "grad_norm": 749.6808471679688, + "learning_rate": 3.8176e-05, + "loss": 132.2588, + "step": 23860 + }, + { + "epoch": 0.09643782043253595, + "grad_norm": 566.2886962890625, + "learning_rate": 3.8192e-05, + "loss": 104.998, + "step": 23870 + }, + { + "epoch": 0.09647822169790359, + "grad_norm": 967.2838134765625, + "learning_rate": 3.8208000000000004e-05, + "loss": 105.0314, + "step": 23880 + }, + { + "epoch": 0.09651862296327121, + "grad_norm": 588.8056640625, + "learning_rate": 3.8224000000000005e-05, + "loss": 139.5455, + "step": 23890 + }, + { + "epoch": 0.09655902422863884, + "grad_norm": 752.8648071289062, + "learning_rate": 3.824e-05, + "loss": 197.3626, + "step": 23900 + }, + { + "epoch": 0.09659942549400648, + "grad_norm": 2022.943359375, + "learning_rate": 3.8256e-05, + "loss": 169.7877, + "step": 23910 + }, + { + "epoch": 0.0966398267593741, + "grad_norm": 978.8882446289062, + "learning_rate": 3.8272e-05, + "loss": 102.7906, + "step": 23920 + }, + { + "epoch": 0.09668022802474173, + "grad_norm": 1176.936767578125, + "learning_rate": 3.8288000000000003e-05, + "loss": 175.1517, + "step": 23930 + }, + { + "epoch": 0.09672062929010937, + "grad_norm": 630.9819946289062, + "learning_rate": 3.8304000000000005e-05, + "loss": 144.5948, + "step": 23940 + }, + { + "epoch": 0.096761030555477, + "grad_norm": 1168.2840576171875, + "learning_rate": 3.832e-05, + "loss": 144.0024, + "step": 23950 + }, + { + "epoch": 0.09680143182084464, + "grad_norm": 764.27197265625, + "learning_rate": 3.833600000000001e-05, + "loss": 125.0117, + "step": 23960 + }, + { + "epoch": 0.09684183308621226, + "grad_norm": 2711.927734375, + "learning_rate": 3.8352e-05, + "loss": 157.2934, + "step": 23970 + }, + { + "epoch": 0.09688223435157989, + "grad_norm": 518.2022094726562, + "learning_rate": 3.8368e-05, + "loss": 91.7095, + "step": 23980 + }, + { + "epoch": 0.09692263561694753, + "grad_norm": 841.0518188476562, + "learning_rate": 3.8384000000000004e-05, + "loss": 137.6476, + "step": 23990 + }, + { + "epoch": 0.09696303688231515, + "grad_norm": 1324.9703369140625, + "learning_rate": 3.8400000000000005e-05, + "loss": 167.9167, + "step": 24000 + }, + { + "epoch": 0.09700343814768278, + "grad_norm": 1501.212646484375, + "learning_rate": 3.8416000000000006e-05, + "loss": 167.9078, + "step": 24010 + }, + { + "epoch": 0.09704383941305042, + "grad_norm": 690.9531860351562, + "learning_rate": 3.8432e-05, + "loss": 185.7411, + "step": 24020 + }, + { + "epoch": 0.09708424067841805, + "grad_norm": 935.4315185546875, + "learning_rate": 3.844800000000001e-05, + "loss": 143.6814, + "step": 24030 + }, + { + "epoch": 0.09712464194378569, + "grad_norm": 917.8416748046875, + "learning_rate": 3.8464e-05, + "loss": 164.2388, + "step": 24040 + }, + { + "epoch": 0.09716504320915331, + "grad_norm": 1273.2645263671875, + "learning_rate": 3.8480000000000004e-05, + "loss": 137.7012, + "step": 24050 + }, + { + "epoch": 0.09720544447452094, + "grad_norm": 791.5738525390625, + "learning_rate": 3.8496000000000005e-05, + "loss": 119.1036, + "step": 24060 + }, + { + "epoch": 0.09724584573988858, + "grad_norm": 410.45562744140625, + "learning_rate": 3.8512e-05, + "loss": 65.5812, + "step": 24070 + }, + { + "epoch": 0.0972862470052562, + "grad_norm": 962.7711791992188, + "learning_rate": 3.8528e-05, + "loss": 163.632, + "step": 24080 + }, + { + "epoch": 0.09732664827062383, + "grad_norm": 950.231201171875, + "learning_rate": 3.8544e-05, + "loss": 106.0146, + "step": 24090 + }, + { + "epoch": 0.09736704953599147, + "grad_norm": 842.4013061523438, + "learning_rate": 3.8560000000000004e-05, + "loss": 148.1679, + "step": 24100 + }, + { + "epoch": 0.0974074508013591, + "grad_norm": 699.17724609375, + "learning_rate": 3.8576000000000005e-05, + "loss": 124.9348, + "step": 24110 + }, + { + "epoch": 0.09744785206672674, + "grad_norm": 1233.7537841796875, + "learning_rate": 3.8592000000000006e-05, + "loss": 124.4887, + "step": 24120 + }, + { + "epoch": 0.09748825333209436, + "grad_norm": 1283.4124755859375, + "learning_rate": 3.8608e-05, + "loss": 113.4554, + "step": 24130 + }, + { + "epoch": 0.09752865459746199, + "grad_norm": 1376.4443359375, + "learning_rate": 3.8624e-05, + "loss": 139.3352, + "step": 24140 + }, + { + "epoch": 0.09756905586282963, + "grad_norm": 1317.845458984375, + "learning_rate": 3.864e-05, + "loss": 132.827, + "step": 24150 + }, + { + "epoch": 0.09760945712819726, + "grad_norm": 698.750732421875, + "learning_rate": 3.8656000000000004e-05, + "loss": 115.0349, + "step": 24160 + }, + { + "epoch": 0.09764985839356488, + "grad_norm": 1766.1968994140625, + "learning_rate": 3.8672000000000005e-05, + "loss": 112.9395, + "step": 24170 + }, + { + "epoch": 0.09769025965893252, + "grad_norm": 1381.3680419921875, + "learning_rate": 3.8688e-05, + "loss": 161.6418, + "step": 24180 + }, + { + "epoch": 0.09773066092430015, + "grad_norm": 1029.4576416015625, + "learning_rate": 3.8704e-05, + "loss": 123.0185, + "step": 24190 + }, + { + "epoch": 0.09777106218966779, + "grad_norm": 1466.9619140625, + "learning_rate": 3.872e-05, + "loss": 145.7828, + "step": 24200 + }, + { + "epoch": 0.09781146345503541, + "grad_norm": 1146.3895263671875, + "learning_rate": 3.8736e-05, + "loss": 170.756, + "step": 24210 + }, + { + "epoch": 0.09785186472040304, + "grad_norm": 1035.5133056640625, + "learning_rate": 3.8752000000000004e-05, + "loss": 127.7768, + "step": 24220 + }, + { + "epoch": 0.09789226598577068, + "grad_norm": 2244.2119140625, + "learning_rate": 3.8768e-05, + "loss": 186.9938, + "step": 24230 + }, + { + "epoch": 0.0979326672511383, + "grad_norm": 1485.30224609375, + "learning_rate": 3.878400000000001e-05, + "loss": 107.4328, + "step": 24240 + }, + { + "epoch": 0.09797306851650593, + "grad_norm": 676.0277099609375, + "learning_rate": 3.88e-05, + "loss": 191.0219, + "step": 24250 + }, + { + "epoch": 0.09801346978187357, + "grad_norm": 749.9191284179688, + "learning_rate": 3.8816e-05, + "loss": 200.9514, + "step": 24260 + }, + { + "epoch": 0.0980538710472412, + "grad_norm": 2080.029052734375, + "learning_rate": 3.8832000000000004e-05, + "loss": 111.3625, + "step": 24270 + }, + { + "epoch": 0.09809427231260884, + "grad_norm": 1611.2764892578125, + "learning_rate": 3.8848e-05, + "loss": 148.4773, + "step": 24280 + }, + { + "epoch": 0.09813467357797646, + "grad_norm": 1065.07080078125, + "learning_rate": 3.8864000000000006e-05, + "loss": 115.9167, + "step": 24290 + }, + { + "epoch": 0.09817507484334409, + "grad_norm": 773.0284423828125, + "learning_rate": 3.888e-05, + "loss": 110.4303, + "step": 24300 + }, + { + "epoch": 0.09821547610871173, + "grad_norm": 662.4265747070312, + "learning_rate": 3.8896e-05, + "loss": 135.8568, + "step": 24310 + }, + { + "epoch": 0.09825587737407936, + "grad_norm": 930.15380859375, + "learning_rate": 3.8912e-05, + "loss": 115.4041, + "step": 24320 + }, + { + "epoch": 0.09829627863944698, + "grad_norm": 925.0225219726562, + "learning_rate": 3.8928000000000004e-05, + "loss": 120.7513, + "step": 24330 + }, + { + "epoch": 0.09833667990481462, + "grad_norm": 531.5995483398438, + "learning_rate": 3.8944000000000005e-05, + "loss": 115.2182, + "step": 24340 + }, + { + "epoch": 0.09837708117018225, + "grad_norm": 440.7538146972656, + "learning_rate": 3.896e-05, + "loss": 105.96, + "step": 24350 + }, + { + "epoch": 0.09841748243554987, + "grad_norm": 1492.7076416015625, + "learning_rate": 3.897600000000001e-05, + "loss": 178.5108, + "step": 24360 + }, + { + "epoch": 0.09845788370091751, + "grad_norm": 740.3887939453125, + "learning_rate": 3.8992e-05, + "loss": 116.7319, + "step": 24370 + }, + { + "epoch": 0.09849828496628514, + "grad_norm": 821.4590454101562, + "learning_rate": 3.9008e-05, + "loss": 158.3296, + "step": 24380 + }, + { + "epoch": 0.09853868623165278, + "grad_norm": 2989.5830078125, + "learning_rate": 3.9024000000000004e-05, + "loss": 175.0016, + "step": 24390 + }, + { + "epoch": 0.0985790874970204, + "grad_norm": 900.2764892578125, + "learning_rate": 3.9040000000000006e-05, + "loss": 150.9962, + "step": 24400 + }, + { + "epoch": 0.09861948876238803, + "grad_norm": 1529.349853515625, + "learning_rate": 3.905600000000001e-05, + "loss": 178.0753, + "step": 24410 + }, + { + "epoch": 0.09865989002775567, + "grad_norm": 1499.201416015625, + "learning_rate": 3.9072e-05, + "loss": 229.8339, + "step": 24420 + }, + { + "epoch": 0.0987002912931233, + "grad_norm": 906.7962036132812, + "learning_rate": 3.9088e-05, + "loss": 104.2014, + "step": 24430 + }, + { + "epoch": 0.09874069255849093, + "grad_norm": 804.5547485351562, + "learning_rate": 3.9104000000000004e-05, + "loss": 195.8643, + "step": 24440 + }, + { + "epoch": 0.09878109382385857, + "grad_norm": 883.32421875, + "learning_rate": 3.9120000000000005e-05, + "loss": 157.1773, + "step": 24450 + }, + { + "epoch": 0.09882149508922619, + "grad_norm": 941.8516845703125, + "learning_rate": 3.9136000000000006e-05, + "loss": 143.8716, + "step": 24460 + }, + { + "epoch": 0.09886189635459383, + "grad_norm": 2118.697021484375, + "learning_rate": 3.9152e-05, + "loss": 155.3458, + "step": 24470 + }, + { + "epoch": 0.09890229761996146, + "grad_norm": 867.7554931640625, + "learning_rate": 3.9168e-05, + "loss": 108.0339, + "step": 24480 + }, + { + "epoch": 0.09894269888532908, + "grad_norm": 1188.6658935546875, + "learning_rate": 3.9184e-05, + "loss": 197.3661, + "step": 24490 + }, + { + "epoch": 0.09898310015069672, + "grad_norm": 998.950927734375, + "learning_rate": 3.9200000000000004e-05, + "loss": 173.2571, + "step": 24500 + }, + { + "epoch": 0.09902350141606435, + "grad_norm": 928.1749267578125, + "learning_rate": 3.9216000000000005e-05, + "loss": 134.9145, + "step": 24510 + }, + { + "epoch": 0.09906390268143198, + "grad_norm": 1987.83251953125, + "learning_rate": 3.9232000000000007e-05, + "loss": 120.6023, + "step": 24520 + }, + { + "epoch": 0.09910430394679962, + "grad_norm": 641.1785888671875, + "learning_rate": 3.9248e-05, + "loss": 116.1331, + "step": 24530 + }, + { + "epoch": 0.09914470521216724, + "grad_norm": 985.0919189453125, + "learning_rate": 3.9264e-05, + "loss": 133.4689, + "step": 24540 + }, + { + "epoch": 0.09918510647753488, + "grad_norm": 1131.6904296875, + "learning_rate": 3.9280000000000003e-05, + "loss": 120.5193, + "step": 24550 + }, + { + "epoch": 0.09922550774290251, + "grad_norm": 1347.8470458984375, + "learning_rate": 3.9296000000000005e-05, + "loss": 152.9488, + "step": 24560 + }, + { + "epoch": 0.09926590900827013, + "grad_norm": 1757.559326171875, + "learning_rate": 3.9312000000000006e-05, + "loss": 216.455, + "step": 24570 + }, + { + "epoch": 0.09930631027363777, + "grad_norm": 839.3011474609375, + "learning_rate": 3.9328e-05, + "loss": 107.9279, + "step": 24580 + }, + { + "epoch": 0.0993467115390054, + "grad_norm": 925.4636840820312, + "learning_rate": 3.9344e-05, + "loss": 197.6841, + "step": 24590 + }, + { + "epoch": 0.09938711280437303, + "grad_norm": 1504.67822265625, + "learning_rate": 3.936e-05, + "loss": 191.8697, + "step": 24600 + }, + { + "epoch": 0.09942751406974067, + "grad_norm": 1022.2680053710938, + "learning_rate": 3.9376000000000004e-05, + "loss": 118.906, + "step": 24610 + }, + { + "epoch": 0.09946791533510829, + "grad_norm": 812.20751953125, + "learning_rate": 3.9392000000000005e-05, + "loss": 85.545, + "step": 24620 + }, + { + "epoch": 0.09950831660047593, + "grad_norm": 460.5447082519531, + "learning_rate": 3.9408e-05, + "loss": 108.6712, + "step": 24630 + }, + { + "epoch": 0.09954871786584356, + "grad_norm": 1229.788330078125, + "learning_rate": 3.942400000000001e-05, + "loss": 149.9629, + "step": 24640 + }, + { + "epoch": 0.09958911913121118, + "grad_norm": 936.870361328125, + "learning_rate": 3.944e-05, + "loss": 97.0796, + "step": 24650 + }, + { + "epoch": 0.09962952039657882, + "grad_norm": 1801.947265625, + "learning_rate": 3.9456e-05, + "loss": 164.9348, + "step": 24660 + }, + { + "epoch": 0.09966992166194645, + "grad_norm": 865.18310546875, + "learning_rate": 3.9472000000000004e-05, + "loss": 159.6672, + "step": 24670 + }, + { + "epoch": 0.09971032292731408, + "grad_norm": 1778.037353515625, + "learning_rate": 3.9488e-05, + "loss": 137.5853, + "step": 24680 + }, + { + "epoch": 0.09975072419268172, + "grad_norm": 829.7141723632812, + "learning_rate": 3.950400000000001e-05, + "loss": 164.879, + "step": 24690 + }, + { + "epoch": 0.09979112545804934, + "grad_norm": 626.0407104492188, + "learning_rate": 3.952e-05, + "loss": 95.574, + "step": 24700 + }, + { + "epoch": 0.09983152672341698, + "grad_norm": 1820.85986328125, + "learning_rate": 3.9536e-05, + "loss": 126.3299, + "step": 24710 + }, + { + "epoch": 0.09987192798878461, + "grad_norm": 1025.991943359375, + "learning_rate": 3.9552000000000003e-05, + "loss": 160.1643, + "step": 24720 + }, + { + "epoch": 0.09991232925415224, + "grad_norm": 592.163330078125, + "learning_rate": 3.9568000000000005e-05, + "loss": 118.3517, + "step": 24730 + }, + { + "epoch": 0.09995273051951988, + "grad_norm": 2043.333740234375, + "learning_rate": 3.9584000000000006e-05, + "loss": 133.4334, + "step": 24740 + }, + { + "epoch": 0.0999931317848875, + "grad_norm": 1578.8074951171875, + "learning_rate": 3.96e-05, + "loss": 203.2788, + "step": 24750 + }, + { + "epoch": 0.10003353305025513, + "grad_norm": 522.986083984375, + "learning_rate": 3.9616e-05, + "loss": 110.4859, + "step": 24760 + }, + { + "epoch": 0.10007393431562277, + "grad_norm": 792.1961669921875, + "learning_rate": 3.9632e-05, + "loss": 98.6826, + "step": 24770 + }, + { + "epoch": 0.1001143355809904, + "grad_norm": 1226.542236328125, + "learning_rate": 3.9648000000000004e-05, + "loss": 102.9975, + "step": 24780 + }, + { + "epoch": 0.10015473684635803, + "grad_norm": 1341.432861328125, + "learning_rate": 3.9664000000000005e-05, + "loss": 152.2395, + "step": 24790 + }, + { + "epoch": 0.10019513811172566, + "grad_norm": 1274.8831787109375, + "learning_rate": 3.9680000000000006e-05, + "loss": 186.4028, + "step": 24800 + }, + { + "epoch": 0.10023553937709329, + "grad_norm": 1188.5775146484375, + "learning_rate": 3.9696e-05, + "loss": 138.2114, + "step": 24810 + }, + { + "epoch": 0.10027594064246093, + "grad_norm": 1279.4384765625, + "learning_rate": 3.9712e-05, + "loss": 156.6146, + "step": 24820 + }, + { + "epoch": 0.10031634190782855, + "grad_norm": 1361.79345703125, + "learning_rate": 3.9728e-05, + "loss": 220.2153, + "step": 24830 + }, + { + "epoch": 0.10035674317319618, + "grad_norm": 874.5380859375, + "learning_rate": 3.9744000000000004e-05, + "loss": 168.1279, + "step": 24840 + }, + { + "epoch": 0.10039714443856382, + "grad_norm": 459.81561279296875, + "learning_rate": 3.9760000000000006e-05, + "loss": 116.7073, + "step": 24850 + }, + { + "epoch": 0.10043754570393144, + "grad_norm": 1626.4224853515625, + "learning_rate": 3.9776e-05, + "loss": 183.1674, + "step": 24860 + }, + { + "epoch": 0.10047794696929908, + "grad_norm": 1202.317138671875, + "learning_rate": 3.9792e-05, + "loss": 168.5099, + "step": 24870 + }, + { + "epoch": 0.10051834823466671, + "grad_norm": 1323.177734375, + "learning_rate": 3.9808e-05, + "loss": 139.7357, + "step": 24880 + }, + { + "epoch": 0.10055874950003434, + "grad_norm": 1755.9442138671875, + "learning_rate": 3.9824000000000004e-05, + "loss": 132.9178, + "step": 24890 + }, + { + "epoch": 0.10059915076540198, + "grad_norm": 541.7413940429688, + "learning_rate": 3.9840000000000005e-05, + "loss": 138.2767, + "step": 24900 + }, + { + "epoch": 0.1006395520307696, + "grad_norm": 1088.847900390625, + "learning_rate": 3.9856e-05, + "loss": 120.6401, + "step": 24910 + }, + { + "epoch": 0.10067995329613723, + "grad_norm": 1161.7738037109375, + "learning_rate": 3.987200000000001e-05, + "loss": 149.295, + "step": 24920 + }, + { + "epoch": 0.10072035456150487, + "grad_norm": 772.9661254882812, + "learning_rate": 3.9888e-05, + "loss": 133.1764, + "step": 24930 + }, + { + "epoch": 0.1007607558268725, + "grad_norm": 1113.4073486328125, + "learning_rate": 3.9904e-05, + "loss": 134.8506, + "step": 24940 + }, + { + "epoch": 0.10080115709224013, + "grad_norm": 1537.1070556640625, + "learning_rate": 3.9920000000000004e-05, + "loss": 118.9804, + "step": 24950 + }, + { + "epoch": 0.10084155835760776, + "grad_norm": 659.7022094726562, + "learning_rate": 3.9936e-05, + "loss": 147.106, + "step": 24960 + }, + { + "epoch": 0.10088195962297539, + "grad_norm": 742.6493530273438, + "learning_rate": 3.9952000000000006e-05, + "loss": 119.1868, + "step": 24970 + }, + { + "epoch": 0.10092236088834303, + "grad_norm": 1780.515625, + "learning_rate": 3.9968e-05, + "loss": 182.6448, + "step": 24980 + }, + { + "epoch": 0.10096276215371065, + "grad_norm": 962.6526489257812, + "learning_rate": 3.9984e-05, + "loss": 123.2351, + "step": 24990 + }, + { + "epoch": 0.10100316341907828, + "grad_norm": 456.5058898925781, + "learning_rate": 4e-05, + "loss": 149.2299, + "step": 25000 + }, + { + "epoch": 0.10104356468444592, + "grad_norm": 2819.208251953125, + "learning_rate": 3.999999980504486e-05, + "loss": 182.2097, + "step": 25010 + }, + { + "epoch": 0.10108396594981355, + "grad_norm": 1605.78466796875, + "learning_rate": 3.999999922017941e-05, + "loss": 190.2862, + "step": 25020 + }, + { + "epoch": 0.10112436721518119, + "grad_norm": 1368.6927490234375, + "learning_rate": 3.999999824540369e-05, + "loss": 129.3276, + "step": 25030 + }, + { + "epoch": 0.10116476848054881, + "grad_norm": 934.4607543945312, + "learning_rate": 3.9999996880717705e-05, + "loss": 94.7181, + "step": 25040 + }, + { + "epoch": 0.10120516974591644, + "grad_norm": 1747.9781494140625, + "learning_rate": 3.999999512612149e-05, + "loss": 145.6787, + "step": 25050 + }, + { + "epoch": 0.10124557101128408, + "grad_norm": 1146.9049072265625, + "learning_rate": 3.9999992981615066e-05, + "loss": 145.1845, + "step": 25060 + }, + { + "epoch": 0.1012859722766517, + "grad_norm": 478.7104797363281, + "learning_rate": 3.999999044719848e-05, + "loss": 122.6555, + "step": 25070 + }, + { + "epoch": 0.10132637354201933, + "grad_norm": 519.4974365234375, + "learning_rate": 3.9999987522871786e-05, + "loss": 93.5314, + "step": 25080 + }, + { + "epoch": 0.10136677480738697, + "grad_norm": 1305.19970703125, + "learning_rate": 3.999998420863504e-05, + "loss": 138.1012, + "step": 25090 + }, + { + "epoch": 0.1014071760727546, + "grad_norm": 1915.395263671875, + "learning_rate": 3.9999980504488305e-05, + "loss": 148.0114, + "step": 25100 + }, + { + "epoch": 0.10144757733812224, + "grad_norm": 847.4804077148438, + "learning_rate": 3.999997641043165e-05, + "loss": 156.4502, + "step": 25110 + }, + { + "epoch": 0.10148797860348986, + "grad_norm": 482.191162109375, + "learning_rate": 3.999997192646517e-05, + "loss": 137.0837, + "step": 25120 + }, + { + "epoch": 0.10152837986885749, + "grad_norm": 1324.8929443359375, + "learning_rate": 3.999996705258893e-05, + "loss": 121.3795, + "step": 25130 + }, + { + "epoch": 0.10156878113422513, + "grad_norm": 523.1554565429688, + "learning_rate": 3.9999961788803034e-05, + "loss": 217.2956, + "step": 25140 + }, + { + "epoch": 0.10160918239959275, + "grad_norm": 563.0143432617188, + "learning_rate": 3.999995613510759e-05, + "loss": 165.4069, + "step": 25150 + }, + { + "epoch": 0.10164958366496038, + "grad_norm": 1577.764404296875, + "learning_rate": 3.9999950091502706e-05, + "loss": 171.2874, + "step": 25160 + }, + { + "epoch": 0.10168998493032802, + "grad_norm": 1565.6058349609375, + "learning_rate": 3.9999943657988496e-05, + "loss": 168.1739, + "step": 25170 + }, + { + "epoch": 0.10173038619569565, + "grad_norm": 1231.2525634765625, + "learning_rate": 3.9999936834565085e-05, + "loss": 135.7332, + "step": 25180 + }, + { + "epoch": 0.10177078746106329, + "grad_norm": 2142.35595703125, + "learning_rate": 3.9999929621232614e-05, + "loss": 140.119, + "step": 25190 + }, + { + "epoch": 0.10181118872643091, + "grad_norm": 954.3404541015625, + "learning_rate": 3.9999922017991213e-05, + "loss": 148.4794, + "step": 25200 + }, + { + "epoch": 0.10185158999179854, + "grad_norm": 708.4192504882812, + "learning_rate": 3.999991402484104e-05, + "loss": 116.8228, + "step": 25210 + }, + { + "epoch": 0.10189199125716618, + "grad_norm": 709.9586181640625, + "learning_rate": 3.999990564178225e-05, + "loss": 94.8024, + "step": 25220 + }, + { + "epoch": 0.1019323925225338, + "grad_norm": 1062.994873046875, + "learning_rate": 3.9999896868814995e-05, + "loss": 149.0882, + "step": 25230 + }, + { + "epoch": 0.10197279378790143, + "grad_norm": 3695.262451171875, + "learning_rate": 3.999988770593946e-05, + "loss": 207.0175, + "step": 25240 + }, + { + "epoch": 0.10201319505326907, + "grad_norm": 602.0803833007812, + "learning_rate": 3.999987815315581e-05, + "loss": 122.5251, + "step": 25250 + }, + { + "epoch": 0.1020535963186367, + "grad_norm": 661.0485229492188, + "learning_rate": 3.9999868210464244e-05, + "loss": 143.7932, + "step": 25260 + }, + { + "epoch": 0.10209399758400434, + "grad_norm": 626.5259399414062, + "learning_rate": 3.9999857877864945e-05, + "loss": 167.6487, + "step": 25270 + }, + { + "epoch": 0.10213439884937196, + "grad_norm": 824.9734497070312, + "learning_rate": 3.999984715535813e-05, + "loss": 135.9847, + "step": 25280 + }, + { + "epoch": 0.10217480011473959, + "grad_norm": 502.703369140625, + "learning_rate": 3.999983604294399e-05, + "loss": 121.1089, + "step": 25290 + }, + { + "epoch": 0.10221520138010723, + "grad_norm": 495.3207702636719, + "learning_rate": 3.999982454062275e-05, + "loss": 96.1928, + "step": 25300 + }, + { + "epoch": 0.10225560264547486, + "grad_norm": 1370.7491455078125, + "learning_rate": 3.999981264839464e-05, + "loss": 136.4106, + "step": 25310 + }, + { + "epoch": 0.10229600391084248, + "grad_norm": 619.8629150390625, + "learning_rate": 3.999980036625989e-05, + "loss": 200.4662, + "step": 25320 + }, + { + "epoch": 0.10233640517621012, + "grad_norm": 1394.193359375, + "learning_rate": 3.9999787694218724e-05, + "loss": 156.4572, + "step": 25330 + }, + { + "epoch": 0.10237680644157775, + "grad_norm": 605.2230224609375, + "learning_rate": 3.99997746322714e-05, + "loss": 165.2274, + "step": 25340 + }, + { + "epoch": 0.10241720770694539, + "grad_norm": 925.9087524414062, + "learning_rate": 3.999976118041818e-05, + "loss": 139.5152, + "step": 25350 + }, + { + "epoch": 0.10245760897231301, + "grad_norm": 477.5989685058594, + "learning_rate": 3.999974733865932e-05, + "loss": 139.3903, + "step": 25360 + }, + { + "epoch": 0.10249801023768064, + "grad_norm": 938.0068359375, + "learning_rate": 3.999973310699509e-05, + "loss": 163.8984, + "step": 25370 + }, + { + "epoch": 0.10253841150304828, + "grad_norm": 777.7579345703125, + "learning_rate": 3.999971848542576e-05, + "loss": 154.397, + "step": 25380 + }, + { + "epoch": 0.1025788127684159, + "grad_norm": 839.5980224609375, + "learning_rate": 3.999970347395162e-05, + "loss": 109.916, + "step": 25390 + }, + { + "epoch": 0.10261921403378353, + "grad_norm": 1493.9520263671875, + "learning_rate": 3.9999688072572966e-05, + "loss": 185.7153, + "step": 25400 + }, + { + "epoch": 0.10265961529915117, + "grad_norm": 512.5177001953125, + "learning_rate": 3.99996722812901e-05, + "loss": 133.9293, + "step": 25410 + }, + { + "epoch": 0.1027000165645188, + "grad_norm": 737.1036376953125, + "learning_rate": 3.9999656100103325e-05, + "loss": 122.4244, + "step": 25420 + }, + { + "epoch": 0.10274041782988644, + "grad_norm": 911.5401000976562, + "learning_rate": 3.999963952901296e-05, + "loss": 105.7961, + "step": 25430 + }, + { + "epoch": 0.10278081909525406, + "grad_norm": 1607.9185791015625, + "learning_rate": 3.999962256801932e-05, + "loss": 150.6174, + "step": 25440 + }, + { + "epoch": 0.10282122036062169, + "grad_norm": 719.7101440429688, + "learning_rate": 3.999960521712274e-05, + "loss": 194.3386, + "step": 25450 + }, + { + "epoch": 0.10286162162598933, + "grad_norm": 865.4852294921875, + "learning_rate": 3.999958747632357e-05, + "loss": 184.0372, + "step": 25460 + }, + { + "epoch": 0.10290202289135696, + "grad_norm": 1728.7254638671875, + "learning_rate": 3.9999569345622135e-05, + "loss": 152.5841, + "step": 25470 + }, + { + "epoch": 0.10294242415672458, + "grad_norm": 1164.2593994140625, + "learning_rate": 3.999955082501881e-05, + "loss": 110.4721, + "step": 25480 + }, + { + "epoch": 0.10298282542209222, + "grad_norm": 610.85791015625, + "learning_rate": 3.999953191451394e-05, + "loss": 117.8417, + "step": 25490 + }, + { + "epoch": 0.10302322668745985, + "grad_norm": 1170.373046875, + "learning_rate": 3.9999512614107894e-05, + "loss": 187.4558, + "step": 25500 + }, + { + "epoch": 0.10306362795282749, + "grad_norm": 2835.0576171875, + "learning_rate": 3.999949292380106e-05, + "loss": 123.7854, + "step": 25510 + }, + { + "epoch": 0.10310402921819511, + "grad_norm": 466.51031494140625, + "learning_rate": 3.9999472843593816e-05, + "loss": 150.1682, + "step": 25520 + }, + { + "epoch": 0.10314443048356274, + "grad_norm": 476.54339599609375, + "learning_rate": 3.999945237348655e-05, + "loss": 197.996, + "step": 25530 + }, + { + "epoch": 0.10318483174893038, + "grad_norm": 1193.14892578125, + "learning_rate": 3.999943151347967e-05, + "loss": 151.1272, + "step": 25540 + }, + { + "epoch": 0.103225233014298, + "grad_norm": 1146.5665283203125, + "learning_rate": 3.999941026357356e-05, + "loss": 176.5072, + "step": 25550 + }, + { + "epoch": 0.10326563427966563, + "grad_norm": 1187.28271484375, + "learning_rate": 3.999938862376866e-05, + "loss": 83.6073, + "step": 25560 + }, + { + "epoch": 0.10330603554503327, + "grad_norm": 6700.4619140625, + "learning_rate": 3.999936659406539e-05, + "loss": 122.252, + "step": 25570 + }, + { + "epoch": 0.1033464368104009, + "grad_norm": 853.3544921875, + "learning_rate": 3.999934417446416e-05, + "loss": 96.8904, + "step": 25580 + }, + { + "epoch": 0.10338683807576854, + "grad_norm": 1048.498291015625, + "learning_rate": 3.9999321364965415e-05, + "loss": 160.1672, + "step": 25590 + }, + { + "epoch": 0.10342723934113617, + "grad_norm": 1161.0396728515625, + "learning_rate": 3.9999298165569614e-05, + "loss": 134.9645, + "step": 25600 + }, + { + "epoch": 0.10346764060650379, + "grad_norm": 514.4901733398438, + "learning_rate": 3.9999274576277196e-05, + "loss": 126.711, + "step": 25610 + }, + { + "epoch": 0.10350804187187143, + "grad_norm": 1365.298583984375, + "learning_rate": 3.999925059708863e-05, + "loss": 119.4331, + "step": 25620 + }, + { + "epoch": 0.10354844313723906, + "grad_norm": 938.417724609375, + "learning_rate": 3.999922622800437e-05, + "loss": 128.3232, + "step": 25630 + }, + { + "epoch": 0.10358884440260668, + "grad_norm": 2659.393798828125, + "learning_rate": 3.99992014690249e-05, + "loss": 224.3084, + "step": 25640 + }, + { + "epoch": 0.10362924566797432, + "grad_norm": 759.932373046875, + "learning_rate": 3.99991763201507e-05, + "loss": 177.2599, + "step": 25650 + }, + { + "epoch": 0.10366964693334195, + "grad_norm": 1109.677490234375, + "learning_rate": 3.999915078138226e-05, + "loss": 145.2375, + "step": 25660 + }, + { + "epoch": 0.10371004819870959, + "grad_norm": 4549.8056640625, + "learning_rate": 3.999912485272008e-05, + "loss": 142.6902, + "step": 25670 + }, + { + "epoch": 0.10375044946407722, + "grad_norm": 454.1202087402344, + "learning_rate": 3.9999098534164675e-05, + "loss": 92.9833, + "step": 25680 + }, + { + "epoch": 0.10379085072944484, + "grad_norm": 498.8601989746094, + "learning_rate": 3.999907182571654e-05, + "loss": 130.8194, + "step": 25690 + }, + { + "epoch": 0.10383125199481248, + "grad_norm": 1956.301513671875, + "learning_rate": 3.99990447273762e-05, + "loss": 176.8726, + "step": 25700 + }, + { + "epoch": 0.10387165326018011, + "grad_norm": 879.5240478515625, + "learning_rate": 3.999901723914419e-05, + "loss": 138.8061, + "step": 25710 + }, + { + "epoch": 0.10391205452554773, + "grad_norm": 419.71014404296875, + "learning_rate": 3.999898936102104e-05, + "loss": 125.8183, + "step": 25720 + }, + { + "epoch": 0.10395245579091537, + "grad_norm": 1345.93310546875, + "learning_rate": 3.9998961093007295e-05, + "loss": 183.5303, + "step": 25730 + }, + { + "epoch": 0.103992857056283, + "grad_norm": 822.0501098632812, + "learning_rate": 3.9998932435103513e-05, + "loss": 116.5825, + "step": 25740 + }, + { + "epoch": 0.10403325832165064, + "grad_norm": 1995.70947265625, + "learning_rate": 3.999890338731025e-05, + "loss": 127.3333, + "step": 25750 + }, + { + "epoch": 0.10407365958701827, + "grad_norm": 2894.054443359375, + "learning_rate": 3.999887394962806e-05, + "loss": 141.1835, + "step": 25760 + }, + { + "epoch": 0.10411406085238589, + "grad_norm": 1300.9569091796875, + "learning_rate": 3.999884412205753e-05, + "loss": 146.5445, + "step": 25770 + }, + { + "epoch": 0.10415446211775353, + "grad_norm": 838.9808959960938, + "learning_rate": 3.9998813904599234e-05, + "loss": 94.2982, + "step": 25780 + }, + { + "epoch": 0.10419486338312116, + "grad_norm": 1040.0491943359375, + "learning_rate": 3.999878329725377e-05, + "loss": 168.7387, + "step": 25790 + }, + { + "epoch": 0.10423526464848878, + "grad_norm": 551.9451293945312, + "learning_rate": 3.9998752300021736e-05, + "loss": 175.7625, + "step": 25800 + }, + { + "epoch": 0.10427566591385642, + "grad_norm": 445.8089904785156, + "learning_rate": 3.999872091290372e-05, + "loss": 106.5375, + "step": 25810 + }, + { + "epoch": 0.10431606717922405, + "grad_norm": 850.1509399414062, + "learning_rate": 3.999868913590034e-05, + "loss": 189.1629, + "step": 25820 + }, + { + "epoch": 0.10435646844459169, + "grad_norm": 551.35009765625, + "learning_rate": 3.999865696901223e-05, + "loss": 143.7296, + "step": 25830 + }, + { + "epoch": 0.10439686970995932, + "grad_norm": 652.6134643554688, + "learning_rate": 3.9998624412240004e-05, + "loss": 156.4553, + "step": 25840 + }, + { + "epoch": 0.10443727097532694, + "grad_norm": 741.5278930664062, + "learning_rate": 3.999859146558429e-05, + "loss": 148.1739, + "step": 25850 + }, + { + "epoch": 0.10447767224069458, + "grad_norm": 620.0882568359375, + "learning_rate": 3.999855812904575e-05, + "loss": 115.8366, + "step": 25860 + }, + { + "epoch": 0.10451807350606221, + "grad_norm": 1748.702392578125, + "learning_rate": 3.999852440262502e-05, + "loss": 139.4175, + "step": 25870 + }, + { + "epoch": 0.10455847477142984, + "grad_norm": 721.612548828125, + "learning_rate": 3.999849028632276e-05, + "loss": 132.4315, + "step": 25880 + }, + { + "epoch": 0.10459887603679748, + "grad_norm": 2573.0400390625, + "learning_rate": 3.9998455780139635e-05, + "loss": 167.0896, + "step": 25890 + }, + { + "epoch": 0.1046392773021651, + "grad_norm": 951.5845336914062, + "learning_rate": 3.999842088407633e-05, + "loss": 133.6499, + "step": 25900 + }, + { + "epoch": 0.10467967856753274, + "grad_norm": 2242.032958984375, + "learning_rate": 3.99983855981335e-05, + "loss": 155.8213, + "step": 25910 + }, + { + "epoch": 0.10472007983290037, + "grad_norm": 1808.51220703125, + "learning_rate": 3.9998349922311856e-05, + "loss": 94.1596, + "step": 25920 + }, + { + "epoch": 0.104760481098268, + "grad_norm": 663.6629638671875, + "learning_rate": 3.999831385661208e-05, + "loss": 109.4908, + "step": 25930 + }, + { + "epoch": 0.10480088236363563, + "grad_norm": 615.5850830078125, + "learning_rate": 3.999827740103487e-05, + "loss": 147.7094, + "step": 25940 + }, + { + "epoch": 0.10484128362900326, + "grad_norm": 1001.3383178710938, + "learning_rate": 3.9998240555580954e-05, + "loss": 150.2801, + "step": 25950 + }, + { + "epoch": 0.10488168489437089, + "grad_norm": 1923.8321533203125, + "learning_rate": 3.999820332025105e-05, + "loss": 121.6272, + "step": 25960 + }, + { + "epoch": 0.10492208615973853, + "grad_norm": 1065.7213134765625, + "learning_rate": 3.999816569504587e-05, + "loss": 138.8477, + "step": 25970 + }, + { + "epoch": 0.10496248742510615, + "grad_norm": 1114.1907958984375, + "learning_rate": 3.9998127679966154e-05, + "loss": 151.9689, + "step": 25980 + }, + { + "epoch": 0.10500288869047379, + "grad_norm": 1261.9344482421875, + "learning_rate": 3.999808927501264e-05, + "loss": 148.3912, + "step": 25990 + }, + { + "epoch": 0.10504328995584142, + "grad_norm": 1042.77685546875, + "learning_rate": 3.999805048018609e-05, + "loss": 130.1658, + "step": 26000 + }, + { + "epoch": 0.10508369122120904, + "grad_norm": 964.5862426757812, + "learning_rate": 3.9998011295487236e-05, + "loss": 112.7408, + "step": 26010 + }, + { + "epoch": 0.10512409248657668, + "grad_norm": 875.4702758789062, + "learning_rate": 3.999797172091687e-05, + "loss": 126.6196, + "step": 26020 + }, + { + "epoch": 0.10516449375194431, + "grad_norm": 662.7470092773438, + "learning_rate": 3.9997931756475744e-05, + "loss": 115.7688, + "step": 26030 + }, + { + "epoch": 0.10520489501731194, + "grad_norm": 2155.029052734375, + "learning_rate": 3.9997891402164644e-05, + "loss": 160.2535, + "step": 26040 + }, + { + "epoch": 0.10524529628267958, + "grad_norm": 965.6383056640625, + "learning_rate": 3.999785065798435e-05, + "loss": 120.9679, + "step": 26050 + }, + { + "epoch": 0.1052856975480472, + "grad_norm": 903.6862182617188, + "learning_rate": 3.999780952393566e-05, + "loss": 135.1932, + "step": 26060 + }, + { + "epoch": 0.10532609881341484, + "grad_norm": 654.3475952148438, + "learning_rate": 3.999776800001939e-05, + "loss": 125.433, + "step": 26070 + }, + { + "epoch": 0.10536650007878247, + "grad_norm": 1122.2447509765625, + "learning_rate": 3.9997726086236325e-05, + "loss": 148.301, + "step": 26080 + }, + { + "epoch": 0.1054069013441501, + "grad_norm": 1409.646240234375, + "learning_rate": 3.999768378258731e-05, + "loss": 129.3439, + "step": 26090 + }, + { + "epoch": 0.10544730260951773, + "grad_norm": 1177.88134765625, + "learning_rate": 3.999764108907314e-05, + "loss": 82.6178, + "step": 26100 + }, + { + "epoch": 0.10548770387488536, + "grad_norm": 1165.595703125, + "learning_rate": 3.999759800569467e-05, + "loss": 120.6849, + "step": 26110 + }, + { + "epoch": 0.10552810514025299, + "grad_norm": 1408.8302001953125, + "learning_rate": 3.999755453245272e-05, + "loss": 74.9575, + "step": 26120 + }, + { + "epoch": 0.10556850640562063, + "grad_norm": 1000.903564453125, + "learning_rate": 3.999751066934816e-05, + "loss": 110.9637, + "step": 26130 + }, + { + "epoch": 0.10560890767098825, + "grad_norm": 864.552978515625, + "learning_rate": 3.999746641638183e-05, + "loss": 121.8153, + "step": 26140 + }, + { + "epoch": 0.10564930893635589, + "grad_norm": 1474.13232421875, + "learning_rate": 3.9997421773554596e-05, + "loss": 192.2268, + "step": 26150 + }, + { + "epoch": 0.10568971020172352, + "grad_norm": 636.214111328125, + "learning_rate": 3.9997376740867334e-05, + "loss": 143.2324, + "step": 26160 + }, + { + "epoch": 0.10573011146709115, + "grad_norm": 1080.510009765625, + "learning_rate": 3.9997331318320906e-05, + "loss": 145.9672, + "step": 26170 + }, + { + "epoch": 0.10577051273245879, + "grad_norm": 763.9314575195312, + "learning_rate": 3.9997285505916215e-05, + "loss": 105.953, + "step": 26180 + }, + { + "epoch": 0.10581091399782641, + "grad_norm": 475.3988037109375, + "learning_rate": 3.999723930365415e-05, + "loss": 121.5401, + "step": 26190 + }, + { + "epoch": 0.10585131526319404, + "grad_norm": 394.64471435546875, + "learning_rate": 3.999719271153561e-05, + "loss": 120.2106, + "step": 26200 + }, + { + "epoch": 0.10589171652856168, + "grad_norm": 1418.583740234375, + "learning_rate": 3.9997145729561505e-05, + "loss": 121.0687, + "step": 26210 + }, + { + "epoch": 0.1059321177939293, + "grad_norm": 1838.7020263671875, + "learning_rate": 3.999709835773274e-05, + "loss": 175.7411, + "step": 26220 + }, + { + "epoch": 0.10597251905929694, + "grad_norm": 1785.2110595703125, + "learning_rate": 3.9997050596050255e-05, + "loss": 127.3275, + "step": 26230 + }, + { + "epoch": 0.10601292032466457, + "grad_norm": 4083.837158203125, + "learning_rate": 3.999700244451497e-05, + "loss": 103.6764, + "step": 26240 + }, + { + "epoch": 0.1060533215900322, + "grad_norm": 756.5249633789062, + "learning_rate": 3.999695390312783e-05, + "loss": 68.4414, + "step": 26250 + }, + { + "epoch": 0.10609372285539984, + "grad_norm": 654.0457763671875, + "learning_rate": 3.999690497188978e-05, + "loss": 98.9523, + "step": 26260 + }, + { + "epoch": 0.10613412412076746, + "grad_norm": 3902.94873046875, + "learning_rate": 3.999685565080176e-05, + "loss": 162.1784, + "step": 26270 + }, + { + "epoch": 0.10617452538613509, + "grad_norm": 693.3687744140625, + "learning_rate": 3.999680593986475e-05, + "loss": 111.5076, + "step": 26280 + }, + { + "epoch": 0.10621492665150273, + "grad_norm": 1095.3060302734375, + "learning_rate": 3.999675583907972e-05, + "loss": 169.3297, + "step": 26290 + }, + { + "epoch": 0.10625532791687035, + "grad_norm": 1285.42919921875, + "learning_rate": 3.999670534844763e-05, + "loss": 163.9293, + "step": 26300 + }, + { + "epoch": 0.106295729182238, + "grad_norm": 1732.74462890625, + "learning_rate": 3.9996654467969485e-05, + "loss": 119.1369, + "step": 26310 + }, + { + "epoch": 0.10633613044760562, + "grad_norm": 1789.8643798828125, + "learning_rate": 3.999660319764626e-05, + "loss": 106.7088, + "step": 26320 + }, + { + "epoch": 0.10637653171297325, + "grad_norm": 819.8723754882812, + "learning_rate": 3.9996551537478965e-05, + "loss": 228.2511, + "step": 26330 + }, + { + "epoch": 0.10641693297834089, + "grad_norm": 2320.794189453125, + "learning_rate": 3.99964994874686e-05, + "loss": 138.5962, + "step": 26340 + }, + { + "epoch": 0.10645733424370851, + "grad_norm": 1431.3857421875, + "learning_rate": 3.9996447047616185e-05, + "loss": 169.6191, + "step": 26350 + }, + { + "epoch": 0.10649773550907614, + "grad_norm": 1313.1748046875, + "learning_rate": 3.999639421792274e-05, + "loss": 151.7788, + "step": 26360 + }, + { + "epoch": 0.10653813677444378, + "grad_norm": 2081.768798828125, + "learning_rate": 3.999634099838929e-05, + "loss": 140.8083, + "step": 26370 + }, + { + "epoch": 0.1065785380398114, + "grad_norm": 529.9302368164062, + "learning_rate": 3.9996287389016876e-05, + "loss": 143.9028, + "step": 26380 + }, + { + "epoch": 0.10661893930517904, + "grad_norm": 547.4563598632812, + "learning_rate": 3.999623338980655e-05, + "loss": 184.9177, + "step": 26390 + }, + { + "epoch": 0.10665934057054667, + "grad_norm": 904.5349731445312, + "learning_rate": 3.999617900075936e-05, + "loss": 107.4025, + "step": 26400 + }, + { + "epoch": 0.1066997418359143, + "grad_norm": 1460.05908203125, + "learning_rate": 3.9996124221876364e-05, + "loss": 143.8187, + "step": 26410 + }, + { + "epoch": 0.10674014310128194, + "grad_norm": 1365.607177734375, + "learning_rate": 3.9996069053158626e-05, + "loss": 215.0475, + "step": 26420 + }, + { + "epoch": 0.10678054436664956, + "grad_norm": 1043.517822265625, + "learning_rate": 3.9996013494607234e-05, + "loss": 135.6547, + "step": 26430 + }, + { + "epoch": 0.10682094563201719, + "grad_norm": 1315.619384765625, + "learning_rate": 3.999595754622326e-05, + "loss": 167.2073, + "step": 26440 + }, + { + "epoch": 0.10686134689738483, + "grad_norm": 4006.69091796875, + "learning_rate": 3.99959012080078e-05, + "loss": 151.6038, + "step": 26450 + }, + { + "epoch": 0.10690174816275246, + "grad_norm": 1135.918701171875, + "learning_rate": 3.999584447996196e-05, + "loss": 137.4702, + "step": 26460 + }, + { + "epoch": 0.1069421494281201, + "grad_norm": 730.3721923828125, + "learning_rate": 3.9995787362086824e-05, + "loss": 108.0853, + "step": 26470 + }, + { + "epoch": 0.10698255069348772, + "grad_norm": 1244.3421630859375, + "learning_rate": 3.9995729854383526e-05, + "loss": 131.8052, + "step": 26480 + }, + { + "epoch": 0.10702295195885535, + "grad_norm": 1085.71435546875, + "learning_rate": 3.999567195685318e-05, + "loss": 162.3974, + "step": 26490 + }, + { + "epoch": 0.10706335322422299, + "grad_norm": 885.6151123046875, + "learning_rate": 3.999561366949691e-05, + "loss": 126.0882, + "step": 26500 + }, + { + "epoch": 0.10710375448959061, + "grad_norm": 7842.38037109375, + "learning_rate": 3.9995554992315863e-05, + "loss": 157.5577, + "step": 26510 + }, + { + "epoch": 0.10714415575495824, + "grad_norm": 553.1751098632812, + "learning_rate": 3.999549592531118e-05, + "loss": 97.4429, + "step": 26520 + }, + { + "epoch": 0.10718455702032588, + "grad_norm": 1150.8751220703125, + "learning_rate": 3.9995436468484e-05, + "loss": 159.9094, + "step": 26530 + }, + { + "epoch": 0.1072249582856935, + "grad_norm": 924.4302368164062, + "learning_rate": 3.999537662183549e-05, + "loss": 120.61, + "step": 26540 + }, + { + "epoch": 0.10726535955106115, + "grad_norm": 981.9138793945312, + "learning_rate": 3.9995316385366825e-05, + "loss": 109.1108, + "step": 26550 + }, + { + "epoch": 0.10730576081642877, + "grad_norm": 1046.45556640625, + "learning_rate": 3.999525575907918e-05, + "loss": 179.5183, + "step": 26560 + }, + { + "epoch": 0.1073461620817964, + "grad_norm": 1549.3922119140625, + "learning_rate": 3.999519474297372e-05, + "loss": 155.7728, + "step": 26570 + }, + { + "epoch": 0.10738656334716404, + "grad_norm": 640.2333374023438, + "learning_rate": 3.9995133337051645e-05, + "loss": 194.4919, + "step": 26580 + }, + { + "epoch": 0.10742696461253166, + "grad_norm": 500.25335693359375, + "learning_rate": 3.999507154131415e-05, + "loss": 169.2083, + "step": 26590 + }, + { + "epoch": 0.10746736587789929, + "grad_norm": 2017.2098388671875, + "learning_rate": 3.999500935576245e-05, + "loss": 164.5651, + "step": 26600 + }, + { + "epoch": 0.10750776714326693, + "grad_norm": 1395.294189453125, + "learning_rate": 3.999494678039774e-05, + "loss": 143.3787, + "step": 26610 + }, + { + "epoch": 0.10754816840863456, + "grad_norm": 322.99188232421875, + "learning_rate": 3.999488381522125e-05, + "loss": 86.5321, + "step": 26620 + }, + { + "epoch": 0.1075885696740022, + "grad_norm": 875.3024291992188, + "learning_rate": 3.999482046023421e-05, + "loss": 122.2109, + "step": 26630 + }, + { + "epoch": 0.10762897093936982, + "grad_norm": 1007.3595581054688, + "learning_rate": 3.9994756715437846e-05, + "loss": 176.4227, + "step": 26640 + }, + { + "epoch": 0.10766937220473745, + "grad_norm": 1908.96923828125, + "learning_rate": 3.9994692580833406e-05, + "loss": 127.4309, + "step": 26650 + }, + { + "epoch": 0.10770977347010509, + "grad_norm": 3114.40576171875, + "learning_rate": 3.999462805642214e-05, + "loss": 174.1603, + "step": 26660 + }, + { + "epoch": 0.10775017473547271, + "grad_norm": 1950.231201171875, + "learning_rate": 3.999456314220531e-05, + "loss": 156.5334, + "step": 26670 + }, + { + "epoch": 0.10779057600084034, + "grad_norm": 1874.5142822265625, + "learning_rate": 3.9994497838184173e-05, + "loss": 109.2236, + "step": 26680 + }, + { + "epoch": 0.10783097726620798, + "grad_norm": 1265.1324462890625, + "learning_rate": 3.9994432144360014e-05, + "loss": 139.3108, + "step": 26690 + }, + { + "epoch": 0.1078713785315756, + "grad_norm": 868.2112426757812, + "learning_rate": 3.99943660607341e-05, + "loss": 112.361, + "step": 26700 + }, + { + "epoch": 0.10791177979694325, + "grad_norm": 1146.9168701171875, + "learning_rate": 3.9994299587307724e-05, + "loss": 118.7573, + "step": 26710 + }, + { + "epoch": 0.10795218106231087, + "grad_norm": 507.3197021484375, + "learning_rate": 3.999423272408219e-05, + "loss": 140.2255, + "step": 26720 + }, + { + "epoch": 0.1079925823276785, + "grad_norm": 3563.962158203125, + "learning_rate": 3.9994165471058795e-05, + "loss": 178.1527, + "step": 26730 + }, + { + "epoch": 0.10803298359304614, + "grad_norm": 1183.19921875, + "learning_rate": 3.999409782823884e-05, + "loss": 164.3575, + "step": 26740 + }, + { + "epoch": 0.10807338485841377, + "grad_norm": 843.54541015625, + "learning_rate": 3.999402979562367e-05, + "loss": 139.0061, + "step": 26750 + }, + { + "epoch": 0.10811378612378139, + "grad_norm": 675.668701171875, + "learning_rate": 3.9993961373214585e-05, + "loss": 112.5527, + "step": 26760 + }, + { + "epoch": 0.10815418738914903, + "grad_norm": 491.5544738769531, + "learning_rate": 3.9993892561012935e-05, + "loss": 161.9316, + "step": 26770 + }, + { + "epoch": 0.10819458865451666, + "grad_norm": 1235.862548828125, + "learning_rate": 3.999382335902005e-05, + "loss": 176.2126, + "step": 26780 + }, + { + "epoch": 0.1082349899198843, + "grad_norm": 902.8421020507812, + "learning_rate": 3.999375376723729e-05, + "loss": 154.6755, + "step": 26790 + }, + { + "epoch": 0.10827539118525192, + "grad_norm": 2088.233642578125, + "learning_rate": 3.9993683785666e-05, + "loss": 125.4781, + "step": 26800 + }, + { + "epoch": 0.10831579245061955, + "grad_norm": 687.3153076171875, + "learning_rate": 3.999361341430756e-05, + "loss": 133.3401, + "step": 26810 + }, + { + "epoch": 0.10835619371598719, + "grad_norm": 539.6790161132812, + "learning_rate": 3.999354265316333e-05, + "loss": 137.7443, + "step": 26820 + }, + { + "epoch": 0.10839659498135482, + "grad_norm": 3402.142333984375, + "learning_rate": 3.999347150223469e-05, + "loss": 151.5117, + "step": 26830 + }, + { + "epoch": 0.10843699624672244, + "grad_norm": 642.5357055664062, + "learning_rate": 3.999339996152303e-05, + "loss": 142.0471, + "step": 26840 + }, + { + "epoch": 0.10847739751209008, + "grad_norm": 465.2022399902344, + "learning_rate": 3.999332803102974e-05, + "loss": 119.495, + "step": 26850 + }, + { + "epoch": 0.10851779877745771, + "grad_norm": 625.1735229492188, + "learning_rate": 3.999325571075624e-05, + "loss": 117.1716, + "step": 26860 + }, + { + "epoch": 0.10855820004282535, + "grad_norm": 434.5945129394531, + "learning_rate": 3.999318300070392e-05, + "loss": 97.0967, + "step": 26870 + }, + { + "epoch": 0.10859860130819297, + "grad_norm": 1035.56494140625, + "learning_rate": 3.999310990087421e-05, + "loss": 119.3203, + "step": 26880 + }, + { + "epoch": 0.1086390025735606, + "grad_norm": 789.8671875, + "learning_rate": 3.999303641126852e-05, + "loss": 113.0917, + "step": 26890 + }, + { + "epoch": 0.10867940383892824, + "grad_norm": 1357.037109375, + "learning_rate": 3.999296253188829e-05, + "loss": 209.6412, + "step": 26900 + }, + { + "epoch": 0.10871980510429587, + "grad_norm": 1005.5947875976562, + "learning_rate": 3.999288826273497e-05, + "loss": 218.9116, + "step": 26910 + }, + { + "epoch": 0.10876020636966349, + "grad_norm": 1217.528076171875, + "learning_rate": 3.999281360381e-05, + "loss": 138.5479, + "step": 26920 + }, + { + "epoch": 0.10880060763503113, + "grad_norm": 675.9392700195312, + "learning_rate": 3.999273855511483e-05, + "loss": 144.6215, + "step": 26930 + }, + { + "epoch": 0.10884100890039876, + "grad_norm": 756.1974487304688, + "learning_rate": 3.999266311665094e-05, + "loss": 99.6617, + "step": 26940 + }, + { + "epoch": 0.1088814101657664, + "grad_norm": 459.0271301269531, + "learning_rate": 3.999258728841977e-05, + "loss": 171.2121, + "step": 26950 + }, + { + "epoch": 0.10892181143113402, + "grad_norm": 1129.8890380859375, + "learning_rate": 3.999251107042284e-05, + "loss": 95.4528, + "step": 26960 + }, + { + "epoch": 0.10896221269650165, + "grad_norm": 473.2735290527344, + "learning_rate": 3.99924344626616e-05, + "loss": 118.7021, + "step": 26970 + }, + { + "epoch": 0.10900261396186929, + "grad_norm": 827.1238403320312, + "learning_rate": 3.999235746513757e-05, + "loss": 137.2943, + "step": 26980 + }, + { + "epoch": 0.10904301522723692, + "grad_norm": 619.547119140625, + "learning_rate": 3.999228007785222e-05, + "loss": 134.7832, + "step": 26990 + }, + { + "epoch": 0.10908341649260454, + "grad_norm": 430.4872741699219, + "learning_rate": 3.999220230080709e-05, + "loss": 110.9278, + "step": 27000 + }, + { + "epoch": 0.10912381775797218, + "grad_norm": 580.2001953125, + "learning_rate": 3.999212413400368e-05, + "loss": 140.1889, + "step": 27010 + }, + { + "epoch": 0.10916421902333981, + "grad_norm": 1044.234375, + "learning_rate": 3.999204557744352e-05, + "loss": 138.3689, + "step": 27020 + }, + { + "epoch": 0.10920462028870745, + "grad_norm": 595.6070556640625, + "learning_rate": 3.999196663112813e-05, + "loss": 183.3841, + "step": 27030 + }, + { + "epoch": 0.10924502155407508, + "grad_norm": 752.8215942382812, + "learning_rate": 3.9991887295059065e-05, + "loss": 142.6584, + "step": 27040 + }, + { + "epoch": 0.1092854228194427, + "grad_norm": 573.3093872070312, + "learning_rate": 3.999180756923787e-05, + "loss": 191.9731, + "step": 27050 + }, + { + "epoch": 0.10932582408481034, + "grad_norm": 610.300537109375, + "learning_rate": 3.999172745366609e-05, + "loss": 143.4345, + "step": 27060 + }, + { + "epoch": 0.10936622535017797, + "grad_norm": 903.7313842773438, + "learning_rate": 3.999164694834529e-05, + "loss": 151.6566, + "step": 27070 + }, + { + "epoch": 0.1094066266155456, + "grad_norm": 1318.819091796875, + "learning_rate": 3.999156605327704e-05, + "loss": 131.4579, + "step": 27080 + }, + { + "epoch": 0.10944702788091323, + "grad_norm": 2685.3056640625, + "learning_rate": 3.999148476846292e-05, + "loss": 109.8586, + "step": 27090 + }, + { + "epoch": 0.10948742914628086, + "grad_norm": 750.9153442382812, + "learning_rate": 3.9991403093904505e-05, + "loss": 198.4575, + "step": 27100 + }, + { + "epoch": 0.1095278304116485, + "grad_norm": 1062.3038330078125, + "learning_rate": 3.99913210296034e-05, + "loss": 162.1222, + "step": 27110 + }, + { + "epoch": 0.10956823167701613, + "grad_norm": 1137.71533203125, + "learning_rate": 3.99912385755612e-05, + "loss": 76.6659, + "step": 27120 + }, + { + "epoch": 0.10960863294238375, + "grad_norm": 680.3126831054688, + "learning_rate": 3.9991155731779506e-05, + "loss": 115.5408, + "step": 27130 + }, + { + "epoch": 0.10964903420775139, + "grad_norm": 822.4907836914062, + "learning_rate": 3.999107249825994e-05, + "loss": 114.5771, + "step": 27140 + }, + { + "epoch": 0.10968943547311902, + "grad_norm": 778.6092529296875, + "learning_rate": 3.999098887500413e-05, + "loss": 126.7003, + "step": 27150 + }, + { + "epoch": 0.10972983673848664, + "grad_norm": 422.9194030761719, + "learning_rate": 3.999090486201369e-05, + "loss": 135.2064, + "step": 27160 + }, + { + "epoch": 0.10977023800385428, + "grad_norm": 4133.42578125, + "learning_rate": 3.999082045929028e-05, + "loss": 181.055, + "step": 27170 + }, + { + "epoch": 0.10981063926922191, + "grad_norm": 598.9992065429688, + "learning_rate": 3.999073566683552e-05, + "loss": 112.7797, + "step": 27180 + }, + { + "epoch": 0.10985104053458955, + "grad_norm": 943.6561279296875, + "learning_rate": 3.999065048465108e-05, + "loss": 147.7453, + "step": 27190 + }, + { + "epoch": 0.10989144179995718, + "grad_norm": 848.5738525390625, + "learning_rate": 3.9990564912738626e-05, + "loss": 96.1952, + "step": 27200 + }, + { + "epoch": 0.1099318430653248, + "grad_norm": 669.5361938476562, + "learning_rate": 3.999047895109981e-05, + "loss": 135.6896, + "step": 27210 + }, + { + "epoch": 0.10997224433069244, + "grad_norm": 3429.886474609375, + "learning_rate": 3.999039259973632e-05, + "loss": 150.3811, + "step": 27220 + }, + { + "epoch": 0.11001264559606007, + "grad_norm": 432.8880615234375, + "learning_rate": 3.999030585864983e-05, + "loss": 109.6579, + "step": 27230 + }, + { + "epoch": 0.1100530468614277, + "grad_norm": 1381.6793212890625, + "learning_rate": 3.999021872784203e-05, + "loss": 116.5078, + "step": 27240 + }, + { + "epoch": 0.11009344812679533, + "grad_norm": 651.842529296875, + "learning_rate": 3.9990131207314634e-05, + "loss": 98.7368, + "step": 27250 + }, + { + "epoch": 0.11013384939216296, + "grad_norm": 1877.3756103515625, + "learning_rate": 3.9990043297069335e-05, + "loss": 123.1046, + "step": 27260 + }, + { + "epoch": 0.1101742506575306, + "grad_norm": 1399.174072265625, + "learning_rate": 3.998995499710785e-05, + "loss": 118.4867, + "step": 27270 + }, + { + "epoch": 0.11021465192289823, + "grad_norm": 929.4814453125, + "learning_rate": 3.99898663074319e-05, + "loss": 101.7252, + "step": 27280 + }, + { + "epoch": 0.11025505318826585, + "grad_norm": 2416.958251953125, + "learning_rate": 3.9989777228043216e-05, + "loss": 118.6349, + "step": 27290 + }, + { + "epoch": 0.11029545445363349, + "grad_norm": 1126.2501220703125, + "learning_rate": 3.998968775894354e-05, + "loss": 151.9945, + "step": 27300 + }, + { + "epoch": 0.11033585571900112, + "grad_norm": 823.9119873046875, + "learning_rate": 3.9989597900134594e-05, + "loss": 91.0396, + "step": 27310 + }, + { + "epoch": 0.11037625698436875, + "grad_norm": 411.7733459472656, + "learning_rate": 3.998950765161816e-05, + "loss": 173.589, + "step": 27320 + }, + { + "epoch": 0.11041665824973639, + "grad_norm": 718.1305541992188, + "learning_rate": 3.9989417013395975e-05, + "loss": 123.1365, + "step": 27330 + }, + { + "epoch": 0.11045705951510401, + "grad_norm": 1297.20654296875, + "learning_rate": 3.998932598546982e-05, + "loss": 197.4148, + "step": 27340 + }, + { + "epoch": 0.11049746078047164, + "grad_norm": 2216.52294921875, + "learning_rate": 3.998923456784146e-05, + "loss": 182.3558, + "step": 27350 + }, + { + "epoch": 0.11053786204583928, + "grad_norm": 691.38720703125, + "learning_rate": 3.998914276051269e-05, + "loss": 122.3042, + "step": 27360 + }, + { + "epoch": 0.1105782633112069, + "grad_norm": 713.3118896484375, + "learning_rate": 3.9989050563485276e-05, + "loss": 201.8379, + "step": 27370 + }, + { + "epoch": 0.11061866457657454, + "grad_norm": 526.919677734375, + "learning_rate": 3.998895797676103e-05, + "loss": 119.1798, + "step": 27380 + }, + { + "epoch": 0.11065906584194217, + "grad_norm": 613.4383544921875, + "learning_rate": 3.9988865000341764e-05, + "loss": 129.4594, + "step": 27390 + }, + { + "epoch": 0.1106994671073098, + "grad_norm": 1263.92529296875, + "learning_rate": 3.998877163422929e-05, + "loss": 143.8302, + "step": 27400 + }, + { + "epoch": 0.11073986837267744, + "grad_norm": 1486.3411865234375, + "learning_rate": 3.9988677878425414e-05, + "loss": 151.2901, + "step": 27410 + }, + { + "epoch": 0.11078026963804506, + "grad_norm": 3510.363037109375, + "learning_rate": 3.998858373293198e-05, + "loss": 144.6092, + "step": 27420 + }, + { + "epoch": 0.11082067090341269, + "grad_norm": 854.3351440429688, + "learning_rate": 3.99884891977508e-05, + "loss": 122.5985, + "step": 27430 + }, + { + "epoch": 0.11086107216878033, + "grad_norm": 1258.461181640625, + "learning_rate": 3.998839427288375e-05, + "loss": 112.3812, + "step": 27440 + }, + { + "epoch": 0.11090147343414795, + "grad_norm": 1034.778564453125, + "learning_rate": 3.998829895833265e-05, + "loss": 156.2802, + "step": 27450 + }, + { + "epoch": 0.1109418746995156, + "grad_norm": 988.9387817382812, + "learning_rate": 3.9988203254099373e-05, + "loss": 152.5029, + "step": 27460 + }, + { + "epoch": 0.11098227596488322, + "grad_norm": 578.75634765625, + "learning_rate": 3.9988107160185785e-05, + "loss": 144.8219, + "step": 27470 + }, + { + "epoch": 0.11102267723025085, + "grad_norm": 1668.7718505859375, + "learning_rate": 3.998801067659376e-05, + "loss": 131.4268, + "step": 27480 + }, + { + "epoch": 0.11106307849561849, + "grad_norm": 686.6883544921875, + "learning_rate": 3.998791380332517e-05, + "loss": 109.0343, + "step": 27490 + }, + { + "epoch": 0.11110347976098611, + "grad_norm": 645.2587280273438, + "learning_rate": 3.998781654038192e-05, + "loss": 169.5766, + "step": 27500 + }, + { + "epoch": 0.11114388102635374, + "grad_norm": 766.1406860351562, + "learning_rate": 3.998771888776589e-05, + "loss": 79.9745, + "step": 27510 + }, + { + "epoch": 0.11118428229172138, + "grad_norm": 235.2288818359375, + "learning_rate": 3.998762084547899e-05, + "loss": 134.7412, + "step": 27520 + }, + { + "epoch": 0.111224683557089, + "grad_norm": 730.3519897460938, + "learning_rate": 3.9987522413523135e-05, + "loss": 127.3266, + "step": 27530 + }, + { + "epoch": 0.11126508482245664, + "grad_norm": 1727.0653076171875, + "learning_rate": 3.998742359190023e-05, + "loss": 125.8219, + "step": 27540 + }, + { + "epoch": 0.11130548608782427, + "grad_norm": 660.0388793945312, + "learning_rate": 3.998732438061222e-05, + "loss": 155.9062, + "step": 27550 + }, + { + "epoch": 0.1113458873531919, + "grad_norm": 692.3839111328125, + "learning_rate": 3.998722477966103e-05, + "loss": 87.8351, + "step": 27560 + }, + { + "epoch": 0.11138628861855954, + "grad_norm": 1181.7989501953125, + "learning_rate": 3.99871247890486e-05, + "loss": 132.0551, + "step": 27570 + }, + { + "epoch": 0.11142668988392716, + "grad_norm": 1333.6834716796875, + "learning_rate": 3.998702440877689e-05, + "loss": 121.5415, + "step": 27580 + }, + { + "epoch": 0.11146709114929479, + "grad_norm": 719.7330322265625, + "learning_rate": 3.998692363884784e-05, + "loss": 113.2745, + "step": 27590 + }, + { + "epoch": 0.11150749241466243, + "grad_norm": 1030.0650634765625, + "learning_rate": 3.998682247926343e-05, + "loss": 190.0598, + "step": 27600 + }, + { + "epoch": 0.11154789368003006, + "grad_norm": 1507.22119140625, + "learning_rate": 3.998672093002562e-05, + "loss": 128.9125, + "step": 27610 + }, + { + "epoch": 0.1115882949453977, + "grad_norm": 1017.4216918945312, + "learning_rate": 3.99866189911364e-05, + "loss": 191.0413, + "step": 27620 + }, + { + "epoch": 0.11162869621076532, + "grad_norm": 1253.16259765625, + "learning_rate": 3.998651666259775e-05, + "loss": 95.7137, + "step": 27630 + }, + { + "epoch": 0.11166909747613295, + "grad_norm": 474.02740478515625, + "learning_rate": 3.998641394441167e-05, + "loss": 85.0046, + "step": 27640 + }, + { + "epoch": 0.11170949874150059, + "grad_norm": 967.4948120117188, + "learning_rate": 3.998631083658016e-05, + "loss": 154.0693, + "step": 27650 + }, + { + "epoch": 0.11174990000686821, + "grad_norm": 920.3174438476562, + "learning_rate": 3.9986207339105235e-05, + "loss": 123.1377, + "step": 27660 + }, + { + "epoch": 0.11179030127223584, + "grad_norm": 1150.312255859375, + "learning_rate": 3.99861034519889e-05, + "loss": 134.3786, + "step": 27670 + }, + { + "epoch": 0.11183070253760348, + "grad_norm": 1655.823974609375, + "learning_rate": 3.99859991752332e-05, + "loss": 119.7579, + "step": 27680 + }, + { + "epoch": 0.1118711038029711, + "grad_norm": 880.95947265625, + "learning_rate": 3.998589450884014e-05, + "loss": 110.3076, + "step": 27690 + }, + { + "epoch": 0.11191150506833875, + "grad_norm": 2082.101318359375, + "learning_rate": 3.998578945281179e-05, + "loss": 130.3395, + "step": 27700 + }, + { + "epoch": 0.11195190633370637, + "grad_norm": 981.1495361328125, + "learning_rate": 3.998568400715018e-05, + "loss": 115.434, + "step": 27710 + }, + { + "epoch": 0.111992307599074, + "grad_norm": 777.9559326171875, + "learning_rate": 3.998557817185737e-05, + "loss": 84.2251, + "step": 27720 + }, + { + "epoch": 0.11203270886444164, + "grad_norm": 2671.027587890625, + "learning_rate": 3.998547194693543e-05, + "loss": 170.6464, + "step": 27730 + }, + { + "epoch": 0.11207311012980926, + "grad_norm": 956.1553344726562, + "learning_rate": 3.9985365332386424e-05, + "loss": 121.7109, + "step": 27740 + }, + { + "epoch": 0.11211351139517689, + "grad_norm": 394.7966613769531, + "learning_rate": 3.998525832821242e-05, + "loss": 142.3825, + "step": 27750 + }, + { + "epoch": 0.11215391266054453, + "grad_norm": 753.6959838867188, + "learning_rate": 3.998515093441553e-05, + "loss": 107.6905, + "step": 27760 + }, + { + "epoch": 0.11219431392591216, + "grad_norm": 880.3074340820312, + "learning_rate": 3.998504315099783e-05, + "loss": 99.0025, + "step": 27770 + }, + { + "epoch": 0.1122347151912798, + "grad_norm": 612.2764282226562, + "learning_rate": 3.998493497796142e-05, + "loss": 111.8413, + "step": 27780 + }, + { + "epoch": 0.11227511645664742, + "grad_norm": 1153.637939453125, + "learning_rate": 3.998482641530842e-05, + "loss": 106.1542, + "step": 27790 + }, + { + "epoch": 0.11231551772201505, + "grad_norm": 1715.0885009765625, + "learning_rate": 3.998471746304094e-05, + "loss": 189.9561, + "step": 27800 + }, + { + "epoch": 0.11235591898738269, + "grad_norm": 1114.067138671875, + "learning_rate": 3.99846081211611e-05, + "loss": 164.1016, + "step": 27810 + }, + { + "epoch": 0.11239632025275031, + "grad_norm": 898.3765869140625, + "learning_rate": 3.998449838967104e-05, + "loss": 125.1826, + "step": 27820 + }, + { + "epoch": 0.11243672151811794, + "grad_norm": 774.5704956054688, + "learning_rate": 3.9984388268572894e-05, + "loss": 143.2113, + "step": 27830 + }, + { + "epoch": 0.11247712278348558, + "grad_norm": 965.7283325195312, + "learning_rate": 3.998427775786881e-05, + "loss": 86.8089, + "step": 27840 + }, + { + "epoch": 0.1125175240488532, + "grad_norm": 1064.5927734375, + "learning_rate": 3.998416685756094e-05, + "loss": 173.673, + "step": 27850 + }, + { + "epoch": 0.11255792531422085, + "grad_norm": 747.6638793945312, + "learning_rate": 3.998405556765145e-05, + "loss": 140.5325, + "step": 27860 + }, + { + "epoch": 0.11259832657958847, + "grad_norm": 898.859375, + "learning_rate": 3.9983943888142505e-05, + "loss": 109.8828, + "step": 27870 + }, + { + "epoch": 0.1126387278449561, + "grad_norm": 1207.3333740234375, + "learning_rate": 3.998383181903629e-05, + "loss": 130.7373, + "step": 27880 + }, + { + "epoch": 0.11267912911032374, + "grad_norm": 688.1647338867188, + "learning_rate": 3.9983719360334985e-05, + "loss": 118.4504, + "step": 27890 + }, + { + "epoch": 0.11271953037569137, + "grad_norm": 595.9921875, + "learning_rate": 3.9983606512040786e-05, + "loss": 137.0864, + "step": 27900 + }, + { + "epoch": 0.11275993164105899, + "grad_norm": 909.3873901367188, + "learning_rate": 3.998349327415588e-05, + "loss": 91.4581, + "step": 27910 + }, + { + "epoch": 0.11280033290642663, + "grad_norm": 1299.664794921875, + "learning_rate": 3.998337964668249e-05, + "loss": 134.5161, + "step": 27920 + }, + { + "epoch": 0.11284073417179426, + "grad_norm": 619.64892578125, + "learning_rate": 3.998326562962283e-05, + "loss": 113.7857, + "step": 27930 + }, + { + "epoch": 0.1128811354371619, + "grad_norm": 2946.740478515625, + "learning_rate": 3.998315122297911e-05, + "loss": 119.7745, + "step": 27940 + }, + { + "epoch": 0.11292153670252952, + "grad_norm": 735.4960327148438, + "learning_rate": 3.9983036426753574e-05, + "loss": 186.0922, + "step": 27950 + }, + { + "epoch": 0.11296193796789715, + "grad_norm": 992.9759521484375, + "learning_rate": 3.998292124094845e-05, + "loss": 113.9569, + "step": 27960 + }, + { + "epoch": 0.11300233923326479, + "grad_norm": 1156.81689453125, + "learning_rate": 3.9982805665566e-05, + "loss": 181.8506, + "step": 27970 + }, + { + "epoch": 0.11304274049863242, + "grad_norm": 1523.46435546875, + "learning_rate": 3.998268970060846e-05, + "loss": 95.1897, + "step": 27980 + }, + { + "epoch": 0.11308314176400004, + "grad_norm": 828.2333374023438, + "learning_rate": 3.998257334607809e-05, + "loss": 192.7159, + "step": 27990 + }, + { + "epoch": 0.11312354302936768, + "grad_norm": 2007.5953369140625, + "learning_rate": 3.998245660197717e-05, + "loss": 124.4013, + "step": 28000 + }, + { + "epoch": 0.11316394429473531, + "grad_norm": 1072.2530517578125, + "learning_rate": 3.9982339468307974e-05, + "loss": 125.8463, + "step": 28010 + }, + { + "epoch": 0.11320434556010295, + "grad_norm": 861.5543212890625, + "learning_rate": 3.998222194507277e-05, + "loss": 115.0076, + "step": 28020 + }, + { + "epoch": 0.11324474682547057, + "grad_norm": 521.9403686523438, + "learning_rate": 3.9982104032273875e-05, + "loss": 140.046, + "step": 28030 + }, + { + "epoch": 0.1132851480908382, + "grad_norm": 821.1924438476562, + "learning_rate": 3.998198572991357e-05, + "loss": 149.4015, + "step": 28040 + }, + { + "epoch": 0.11332554935620584, + "grad_norm": 1583.7169189453125, + "learning_rate": 3.998186703799417e-05, + "loss": 160.3213, + "step": 28050 + }, + { + "epoch": 0.11336595062157347, + "grad_norm": 610.5123901367188, + "learning_rate": 3.998174795651798e-05, + "loss": 91.9125, + "step": 28060 + }, + { + "epoch": 0.11340635188694109, + "grad_norm": 1009.0413818359375, + "learning_rate": 3.998162848548733e-05, + "loss": 198.3852, + "step": 28070 + }, + { + "epoch": 0.11344675315230873, + "grad_norm": 650.27294921875, + "learning_rate": 3.998150862490453e-05, + "loss": 150.4999, + "step": 28080 + }, + { + "epoch": 0.11348715441767636, + "grad_norm": 1563.2398681640625, + "learning_rate": 3.998138837477195e-05, + "loss": 233.0465, + "step": 28090 + }, + { + "epoch": 0.113527555683044, + "grad_norm": 1080.60498046875, + "learning_rate": 3.998126773509191e-05, + "loss": 140.7859, + "step": 28100 + }, + { + "epoch": 0.11356795694841162, + "grad_norm": 1319.2265625, + "learning_rate": 3.998114670586678e-05, + "loss": 122.1235, + "step": 28110 + }, + { + "epoch": 0.11360835821377925, + "grad_norm": 950.8355102539062, + "learning_rate": 3.998102528709889e-05, + "loss": 126.7984, + "step": 28120 + }, + { + "epoch": 0.11364875947914689, + "grad_norm": 790.1392211914062, + "learning_rate": 3.998090347879063e-05, + "loss": 126.6533, + "step": 28130 + }, + { + "epoch": 0.11368916074451452, + "grad_norm": 623.5432739257812, + "learning_rate": 3.998078128094437e-05, + "loss": 137.5285, + "step": 28140 + }, + { + "epoch": 0.11372956200988214, + "grad_norm": 597.8995971679688, + "learning_rate": 3.99806586935625e-05, + "loss": 94.1703, + "step": 28150 + }, + { + "epoch": 0.11376996327524978, + "grad_norm": 780.518798828125, + "learning_rate": 3.9980535716647394e-05, + "loss": 140.0151, + "step": 28160 + }, + { + "epoch": 0.11381036454061741, + "grad_norm": 927.0537719726562, + "learning_rate": 3.998041235020146e-05, + "loss": 130.0645, + "step": 28170 + }, + { + "epoch": 0.11385076580598505, + "grad_norm": 2056.5009765625, + "learning_rate": 3.99802885942271e-05, + "loss": 145.7187, + "step": 28180 + }, + { + "epoch": 0.11389116707135268, + "grad_norm": 1126.5228271484375, + "learning_rate": 3.998016444872673e-05, + "loss": 151.028, + "step": 28190 + }, + { + "epoch": 0.1139315683367203, + "grad_norm": 797.4663696289062, + "learning_rate": 3.998003991370277e-05, + "loss": 112.7953, + "step": 28200 + }, + { + "epoch": 0.11397196960208794, + "grad_norm": 813.968505859375, + "learning_rate": 3.9979914989157634e-05, + "loss": 114.2405, + "step": 28210 + }, + { + "epoch": 0.11401237086745557, + "grad_norm": 897.2117919921875, + "learning_rate": 3.997978967509378e-05, + "loss": 101.2041, + "step": 28220 + }, + { + "epoch": 0.1140527721328232, + "grad_norm": 925.91845703125, + "learning_rate": 3.997966397151364e-05, + "loss": 106.3773, + "step": 28230 + }, + { + "epoch": 0.11409317339819083, + "grad_norm": 965.0462036132812, + "learning_rate": 3.997953787841965e-05, + "loss": 145.5343, + "step": 28240 + }, + { + "epoch": 0.11413357466355846, + "grad_norm": 1008.1298217773438, + "learning_rate": 3.99794113958143e-05, + "loss": 154.1163, + "step": 28250 + }, + { + "epoch": 0.1141739759289261, + "grad_norm": 936.7122192382812, + "learning_rate": 3.997928452370003e-05, + "loss": 104.7568, + "step": 28260 + }, + { + "epoch": 0.11421437719429373, + "grad_norm": 512.8252563476562, + "learning_rate": 3.997915726207932e-05, + "loss": 151.193, + "step": 28270 + }, + { + "epoch": 0.11425477845966135, + "grad_norm": 1699.3017578125, + "learning_rate": 3.9979029610954664e-05, + "loss": 194.843, + "step": 28280 + }, + { + "epoch": 0.11429517972502899, + "grad_norm": 825.3704223632812, + "learning_rate": 3.997890157032853e-05, + "loss": 112.8255, + "step": 28290 + }, + { + "epoch": 0.11433558099039662, + "grad_norm": 955.8322143554688, + "learning_rate": 3.997877314020343e-05, + "loss": 84.8495, + "step": 28300 + }, + { + "epoch": 0.11437598225576424, + "grad_norm": 1212.8798828125, + "learning_rate": 3.9978644320581856e-05, + "loss": 195.61, + "step": 28310 + }, + { + "epoch": 0.11441638352113188, + "grad_norm": 796.518798828125, + "learning_rate": 3.997851511146633e-05, + "loss": 197.771, + "step": 28320 + }, + { + "epoch": 0.11445678478649951, + "grad_norm": 427.9107360839844, + "learning_rate": 3.997838551285936e-05, + "loss": 144.6287, + "step": 28330 + }, + { + "epoch": 0.11449718605186715, + "grad_norm": 885.8049926757812, + "learning_rate": 3.9978255524763494e-05, + "loss": 103.4699, + "step": 28340 + }, + { + "epoch": 0.11453758731723478, + "grad_norm": 2357.444580078125, + "learning_rate": 3.9978125147181235e-05, + "loss": 165.6908, + "step": 28350 + }, + { + "epoch": 0.1145779885826024, + "grad_norm": 542.120361328125, + "learning_rate": 3.997799438011515e-05, + "loss": 88.4278, + "step": 28360 + }, + { + "epoch": 0.11461838984797004, + "grad_norm": 1002.8199462890625, + "learning_rate": 3.9977863223567774e-05, + "loss": 122.9399, + "step": 28370 + }, + { + "epoch": 0.11465879111333767, + "grad_norm": 1675.7783203125, + "learning_rate": 3.997773167754167e-05, + "loss": 122.7957, + "step": 28380 + }, + { + "epoch": 0.1146991923787053, + "grad_norm": 861.1492919921875, + "learning_rate": 3.9977599742039404e-05, + "loss": 118.3554, + "step": 28390 + }, + { + "epoch": 0.11473959364407293, + "grad_norm": 508.73638916015625, + "learning_rate": 3.9977467417063544e-05, + "loss": 129.1348, + "step": 28400 + }, + { + "epoch": 0.11477999490944056, + "grad_norm": 1106.717529296875, + "learning_rate": 3.9977334702616676e-05, + "loss": 130.643, + "step": 28410 + }, + { + "epoch": 0.1148203961748082, + "grad_norm": 1490.5709228515625, + "learning_rate": 3.997720159870137e-05, + "loss": 121.0768, + "step": 28420 + }, + { + "epoch": 0.11486079744017583, + "grad_norm": 1031.2021484375, + "learning_rate": 3.997706810532025e-05, + "loss": 124.4945, + "step": 28430 + }, + { + "epoch": 0.11490119870554345, + "grad_norm": 639.9796752929688, + "learning_rate": 3.9976934222475893e-05, + "loss": 142.1417, + "step": 28440 + }, + { + "epoch": 0.11494159997091109, + "grad_norm": 984.5978393554688, + "learning_rate": 3.997679995017092e-05, + "loss": 127.999, + "step": 28450 + }, + { + "epoch": 0.11498200123627872, + "grad_norm": 1058.95703125, + "learning_rate": 3.997666528840795e-05, + "loss": 96.5518, + "step": 28460 + }, + { + "epoch": 0.11502240250164635, + "grad_norm": 1090.6737060546875, + "learning_rate": 3.99765302371896e-05, + "loss": 113.0321, + "step": 28470 + }, + { + "epoch": 0.11506280376701399, + "grad_norm": 947.4888916015625, + "learning_rate": 3.9976394796518514e-05, + "loss": 110.2801, + "step": 28480 + }, + { + "epoch": 0.11510320503238161, + "grad_norm": 448.30877685546875, + "learning_rate": 3.997625896639733e-05, + "loss": 174.8779, + "step": 28490 + }, + { + "epoch": 0.11514360629774925, + "grad_norm": 722.8981323242188, + "learning_rate": 3.9976122746828684e-05, + "loss": 164.538, + "step": 28500 + }, + { + "epoch": 0.11518400756311688, + "grad_norm": 1285.0491943359375, + "learning_rate": 3.997598613781525e-05, + "loss": 141.1096, + "step": 28510 + }, + { + "epoch": 0.1152244088284845, + "grad_norm": 403.45599365234375, + "learning_rate": 3.997584913935967e-05, + "loss": 94.504, + "step": 28520 + }, + { + "epoch": 0.11526481009385214, + "grad_norm": 693.1490478515625, + "learning_rate": 3.997571175146463e-05, + "loss": 78.3341, + "step": 28530 + }, + { + "epoch": 0.11530521135921977, + "grad_norm": 1381.054443359375, + "learning_rate": 3.997557397413281e-05, + "loss": 107.657, + "step": 28540 + }, + { + "epoch": 0.1153456126245874, + "grad_norm": 1110.701171875, + "learning_rate": 3.9975435807366895e-05, + "loss": 139.7479, + "step": 28550 + }, + { + "epoch": 0.11538601388995504, + "grad_norm": 1617.94775390625, + "learning_rate": 3.997529725116957e-05, + "loss": 107.3495, + "step": 28560 + }, + { + "epoch": 0.11542641515532266, + "grad_norm": 1402.1943359375, + "learning_rate": 3.9975158305543536e-05, + "loss": 139.7567, + "step": 28570 + }, + { + "epoch": 0.1154668164206903, + "grad_norm": 800.7005615234375, + "learning_rate": 3.9975018970491515e-05, + "loss": 119.3514, + "step": 28580 + }, + { + "epoch": 0.11550721768605793, + "grad_norm": 889.363037109375, + "learning_rate": 3.997487924601621e-05, + "loss": 166.7882, + "step": 28590 + }, + { + "epoch": 0.11554761895142555, + "grad_norm": 6848.908203125, + "learning_rate": 3.997473913212036e-05, + "loss": 157.7564, + "step": 28600 + }, + { + "epoch": 0.1155880202167932, + "grad_norm": 870.1151733398438, + "learning_rate": 3.9974598628806675e-05, + "loss": 146.5816, + "step": 28610 + }, + { + "epoch": 0.11562842148216082, + "grad_norm": 883.14697265625, + "learning_rate": 3.99744577360779e-05, + "loss": 132.0117, + "step": 28620 + }, + { + "epoch": 0.11566882274752845, + "grad_norm": 968.9771118164062, + "learning_rate": 3.9974316453936806e-05, + "loss": 101.9885, + "step": 28630 + }, + { + "epoch": 0.11570922401289609, + "grad_norm": 835.9603881835938, + "learning_rate": 3.997417478238612e-05, + "loss": 96.9856, + "step": 28640 + }, + { + "epoch": 0.11574962527826371, + "grad_norm": 922.0848999023438, + "learning_rate": 3.9974032721428615e-05, + "loss": 180.5169, + "step": 28650 + }, + { + "epoch": 0.11579002654363135, + "grad_norm": 755.614501953125, + "learning_rate": 3.9973890271067056e-05, + "loss": 166.4798, + "step": 28660 + }, + { + "epoch": 0.11583042780899898, + "grad_norm": 4686.51318359375, + "learning_rate": 3.997374743130423e-05, + "loss": 157.9636, + "step": 28670 + }, + { + "epoch": 0.1158708290743666, + "grad_norm": 860.5631103515625, + "learning_rate": 3.9973604202142906e-05, + "loss": 162.2773, + "step": 28680 + }, + { + "epoch": 0.11591123033973424, + "grad_norm": 2215.999267578125, + "learning_rate": 3.997346058358589e-05, + "loss": 170.0328, + "step": 28690 + }, + { + "epoch": 0.11595163160510187, + "grad_norm": 1153.8663330078125, + "learning_rate": 3.997331657563598e-05, + "loss": 137.3884, + "step": 28700 + }, + { + "epoch": 0.1159920328704695, + "grad_norm": 1097.7645263671875, + "learning_rate": 3.997317217829598e-05, + "loss": 155.5978, + "step": 28710 + }, + { + "epoch": 0.11603243413583714, + "grad_norm": 1121.33203125, + "learning_rate": 3.99730273915687e-05, + "loss": 152.9339, + "step": 28720 + }, + { + "epoch": 0.11607283540120476, + "grad_norm": 2548.195556640625, + "learning_rate": 3.997288221545697e-05, + "loss": 111.9545, + "step": 28730 + }, + { + "epoch": 0.1161132366665724, + "grad_norm": 1072.523193359375, + "learning_rate": 3.997273664996361e-05, + "loss": 152.1098, + "step": 28740 + }, + { + "epoch": 0.11615363793194003, + "grad_norm": 439.6767578125, + "learning_rate": 3.9972590695091476e-05, + "loss": 74.8738, + "step": 28750 + }, + { + "epoch": 0.11619403919730766, + "grad_norm": 1397.55810546875, + "learning_rate": 3.997244435084341e-05, + "loss": 97.7852, + "step": 28760 + }, + { + "epoch": 0.1162344404626753, + "grad_norm": 6666.3212890625, + "learning_rate": 3.997229761722225e-05, + "loss": 165.3239, + "step": 28770 + }, + { + "epoch": 0.11627484172804292, + "grad_norm": 1242.32568359375, + "learning_rate": 3.997215049423086e-05, + "loss": 104.8004, + "step": 28780 + }, + { + "epoch": 0.11631524299341055, + "grad_norm": 1075.94580078125, + "learning_rate": 3.9972002981872124e-05, + "loss": 162.0838, + "step": 28790 + }, + { + "epoch": 0.11635564425877819, + "grad_norm": 1146.7579345703125, + "learning_rate": 3.9971855080148906e-05, + "loss": 143.8984, + "step": 28800 + }, + { + "epoch": 0.11639604552414581, + "grad_norm": 1337.8802490234375, + "learning_rate": 3.997170678906409e-05, + "loss": 168.5373, + "step": 28810 + }, + { + "epoch": 0.11643644678951345, + "grad_norm": 653.3975830078125, + "learning_rate": 3.997155810862057e-05, + "loss": 104.7905, + "step": 28820 + }, + { + "epoch": 0.11647684805488108, + "grad_norm": 678.8736572265625, + "learning_rate": 3.997140903882124e-05, + "loss": 113.749, + "step": 28830 + }, + { + "epoch": 0.1165172493202487, + "grad_norm": 5161.3095703125, + "learning_rate": 3.9971259579669006e-05, + "loss": 147.4017, + "step": 28840 + }, + { + "epoch": 0.11655765058561635, + "grad_norm": 1926.521484375, + "learning_rate": 3.997110973116679e-05, + "loss": 152.8937, + "step": 28850 + }, + { + "epoch": 0.11659805185098397, + "grad_norm": 591.2763061523438, + "learning_rate": 3.99709594933175e-05, + "loss": 105.2316, + "step": 28860 + }, + { + "epoch": 0.1166384531163516, + "grad_norm": 1824.0159912109375, + "learning_rate": 3.997080886612408e-05, + "loss": 140.4338, + "step": 28870 + }, + { + "epoch": 0.11667885438171924, + "grad_norm": 817.1720581054688, + "learning_rate": 3.9970657849589465e-05, + "loss": 93.7546, + "step": 28880 + }, + { + "epoch": 0.11671925564708686, + "grad_norm": 902.584716796875, + "learning_rate": 3.9970506443716586e-05, + "loss": 203.1233, + "step": 28890 + }, + { + "epoch": 0.1167596569124545, + "grad_norm": 1576.1380615234375, + "learning_rate": 3.99703546485084e-05, + "loss": 186.9483, + "step": 28900 + }, + { + "epoch": 0.11680005817782213, + "grad_norm": 978.4227294921875, + "learning_rate": 3.997020246396787e-05, + "loss": 136.0491, + "step": 28910 + }, + { + "epoch": 0.11684045944318976, + "grad_norm": 847.8509521484375, + "learning_rate": 3.9970049890097965e-05, + "loss": 111.4331, + "step": 28920 + }, + { + "epoch": 0.1168808607085574, + "grad_norm": 478.8564453125, + "learning_rate": 3.996989692690165e-05, + "loss": 76.0094, + "step": 28930 + }, + { + "epoch": 0.11692126197392502, + "grad_norm": 516.1453247070312, + "learning_rate": 3.996974357438192e-05, + "loss": 138.7733, + "step": 28940 + }, + { + "epoch": 0.11696166323929265, + "grad_norm": 956.2125244140625, + "learning_rate": 3.996958983254175e-05, + "loss": 178.8476, + "step": 28950 + }, + { + "epoch": 0.11700206450466029, + "grad_norm": 740.4742431640625, + "learning_rate": 3.996943570138416e-05, + "loss": 146.161, + "step": 28960 + }, + { + "epoch": 0.11704246577002791, + "grad_norm": 999.9993286132812, + "learning_rate": 3.996928118091213e-05, + "loss": 90.8432, + "step": 28970 + }, + { + "epoch": 0.11708286703539555, + "grad_norm": 1374.812255859375, + "learning_rate": 3.996912627112868e-05, + "loss": 152.9369, + "step": 28980 + }, + { + "epoch": 0.11712326830076318, + "grad_norm": 997.8038330078125, + "learning_rate": 3.996897097203684e-05, + "loss": 148.3639, + "step": 28990 + }, + { + "epoch": 0.11716366956613081, + "grad_norm": 742.9439086914062, + "learning_rate": 3.9968815283639625e-05, + "loss": 93.0244, + "step": 29000 + }, + { + "epoch": 0.11720407083149845, + "grad_norm": 511.66217041015625, + "learning_rate": 3.996865920594007e-05, + "loss": 193.8935, + "step": 29010 + }, + { + "epoch": 0.11724447209686607, + "grad_norm": 1232.9365234375, + "learning_rate": 3.996850273894124e-05, + "loss": 173.2582, + "step": 29020 + }, + { + "epoch": 0.1172848733622337, + "grad_norm": 1136.0797119140625, + "learning_rate": 3.996834588264615e-05, + "loss": 111.3706, + "step": 29030 + }, + { + "epoch": 0.11732527462760134, + "grad_norm": 1067.701904296875, + "learning_rate": 3.9968188637057886e-05, + "loss": 140.8814, + "step": 29040 + }, + { + "epoch": 0.11736567589296897, + "grad_norm": 5416.84814453125, + "learning_rate": 3.99680310021795e-05, + "loss": 178.5406, + "step": 29050 + }, + { + "epoch": 0.1174060771583366, + "grad_norm": 731.7986450195312, + "learning_rate": 3.9967872978014074e-05, + "loss": 75.8865, + "step": 29060 + }, + { + "epoch": 0.11744647842370423, + "grad_norm": 493.8356628417969, + "learning_rate": 3.996771456456468e-05, + "loss": 122.2606, + "step": 29070 + }, + { + "epoch": 0.11748687968907186, + "grad_norm": 931.1875, + "learning_rate": 3.996755576183442e-05, + "loss": 132.5147, + "step": 29080 + }, + { + "epoch": 0.1175272809544395, + "grad_norm": 714.7224731445312, + "learning_rate": 3.9967396569826374e-05, + "loss": 117.3321, + "step": 29090 + }, + { + "epoch": 0.11756768221980712, + "grad_norm": 1460.06494140625, + "learning_rate": 3.996723698854365e-05, + "loss": 104.3878, + "step": 29100 + }, + { + "epoch": 0.11760808348517475, + "grad_norm": 1124.1864013671875, + "learning_rate": 3.996707701798936e-05, + "loss": 138.5073, + "step": 29110 + }, + { + "epoch": 0.11764848475054239, + "grad_norm": 1219.954345703125, + "learning_rate": 3.9966916658166625e-05, + "loss": 115.9869, + "step": 29120 + }, + { + "epoch": 0.11768888601591002, + "grad_norm": 721.1972045898438, + "learning_rate": 3.996675590907857e-05, + "loss": 90.3769, + "step": 29130 + }, + { + "epoch": 0.11772928728127766, + "grad_norm": 996.167724609375, + "learning_rate": 3.996659477072833e-05, + "loss": 121.5288, + "step": 29140 + }, + { + "epoch": 0.11776968854664528, + "grad_norm": 558.3349609375, + "learning_rate": 3.996643324311905e-05, + "loss": 155.8622, + "step": 29150 + }, + { + "epoch": 0.11781008981201291, + "grad_norm": 1078.3984375, + "learning_rate": 3.9966271326253874e-05, + "loss": 105.7121, + "step": 29160 + }, + { + "epoch": 0.11785049107738055, + "grad_norm": 2245.874755859375, + "learning_rate": 3.996610902013595e-05, + "loss": 200.5702, + "step": 29170 + }, + { + "epoch": 0.11789089234274817, + "grad_norm": 740.459228515625, + "learning_rate": 3.996594632476846e-05, + "loss": 200.0761, + "step": 29180 + }, + { + "epoch": 0.1179312936081158, + "grad_norm": 780.41064453125, + "learning_rate": 3.996578324015456e-05, + "loss": 174.2602, + "step": 29190 + }, + { + "epoch": 0.11797169487348344, + "grad_norm": 602.91357421875, + "learning_rate": 3.996561976629744e-05, + "loss": 91.5366, + "step": 29200 + }, + { + "epoch": 0.11801209613885107, + "grad_norm": 1793.7286376953125, + "learning_rate": 3.996545590320029e-05, + "loss": 162.2789, + "step": 29210 + }, + { + "epoch": 0.1180524974042187, + "grad_norm": 1984.9677734375, + "learning_rate": 3.996529165086629e-05, + "loss": 113.9522, + "step": 29220 + }, + { + "epoch": 0.11809289866958633, + "grad_norm": 682.4430541992188, + "learning_rate": 3.9965127009298655e-05, + "loss": 136.2211, + "step": 29230 + }, + { + "epoch": 0.11813329993495396, + "grad_norm": 2774.407958984375, + "learning_rate": 3.9964961978500586e-05, + "loss": 142.688, + "step": 29240 + }, + { + "epoch": 0.1181737012003216, + "grad_norm": 598.268310546875, + "learning_rate": 3.9964796558475314e-05, + "loss": 146.0322, + "step": 29250 + }, + { + "epoch": 0.11821410246568922, + "grad_norm": 744.6815185546875, + "learning_rate": 3.996463074922604e-05, + "loss": 93.9266, + "step": 29260 + }, + { + "epoch": 0.11825450373105685, + "grad_norm": 1888.498779296875, + "learning_rate": 3.996446455075602e-05, + "loss": 124.2294, + "step": 29270 + }, + { + "epoch": 0.11829490499642449, + "grad_norm": 770.5491943359375, + "learning_rate": 3.996429796306848e-05, + "loss": 171.4095, + "step": 29280 + }, + { + "epoch": 0.11833530626179212, + "grad_norm": 1411.420654296875, + "learning_rate": 3.996413098616668e-05, + "loss": 181.4194, + "step": 29290 + }, + { + "epoch": 0.11837570752715976, + "grad_norm": 1203.4844970703125, + "learning_rate": 3.9963963620053865e-05, + "loss": 152.6307, + "step": 29300 + }, + { + "epoch": 0.11841610879252738, + "grad_norm": 850.9598999023438, + "learning_rate": 3.99637958647333e-05, + "loss": 164.7981, + "step": 29310 + }, + { + "epoch": 0.11845651005789501, + "grad_norm": 513.0284423828125, + "learning_rate": 3.996362772020826e-05, + "loss": 149.5965, + "step": 29320 + }, + { + "epoch": 0.11849691132326265, + "grad_norm": 1255.2744140625, + "learning_rate": 3.9963459186482014e-05, + "loss": 150.4263, + "step": 29330 + }, + { + "epoch": 0.11853731258863028, + "grad_norm": 1872.178955078125, + "learning_rate": 3.9963290263557856e-05, + "loss": 181.3987, + "step": 29340 + }, + { + "epoch": 0.1185777138539979, + "grad_norm": 1170.229736328125, + "learning_rate": 3.996312095143908e-05, + "loss": 154.5381, + "step": 29350 + }, + { + "epoch": 0.11861811511936554, + "grad_norm": 717.1659545898438, + "learning_rate": 3.996295125012898e-05, + "loss": 135.3547, + "step": 29360 + }, + { + "epoch": 0.11865851638473317, + "grad_norm": 698.9420776367188, + "learning_rate": 3.9962781159630865e-05, + "loss": 114.2775, + "step": 29370 + }, + { + "epoch": 0.11869891765010081, + "grad_norm": 910.4652099609375, + "learning_rate": 3.9962610679948065e-05, + "loss": 140.9357, + "step": 29380 + }, + { + "epoch": 0.11873931891546843, + "grad_norm": 784.2194213867188, + "learning_rate": 3.9962439811083875e-05, + "loss": 165.8024, + "step": 29390 + }, + { + "epoch": 0.11877972018083606, + "grad_norm": 439.1904602050781, + "learning_rate": 3.9962268553041656e-05, + "loss": 116.7436, + "step": 29400 + }, + { + "epoch": 0.1188201214462037, + "grad_norm": 1084.863525390625, + "learning_rate": 3.996209690582473e-05, + "loss": 112.852, + "step": 29410 + }, + { + "epoch": 0.11886052271157133, + "grad_norm": 1265.5203857421875, + "learning_rate": 3.996192486943645e-05, + "loss": 146.1069, + "step": 29420 + }, + { + "epoch": 0.11890092397693895, + "grad_norm": 1041.3350830078125, + "learning_rate": 3.996175244388017e-05, + "loss": 150.0021, + "step": 29430 + }, + { + "epoch": 0.11894132524230659, + "grad_norm": 844.8540649414062, + "learning_rate": 3.9961579629159244e-05, + "loss": 121.348, + "step": 29440 + }, + { + "epoch": 0.11898172650767422, + "grad_norm": 460.4432373046875, + "learning_rate": 3.9961406425277045e-05, + "loss": 115.2796, + "step": 29450 + }, + { + "epoch": 0.11902212777304186, + "grad_norm": 693.8685302734375, + "learning_rate": 3.9961232832236956e-05, + "loss": 132.799, + "step": 29460 + }, + { + "epoch": 0.11906252903840948, + "grad_norm": 898.72216796875, + "learning_rate": 3.9961058850042345e-05, + "loss": 129.042, + "step": 29470 + }, + { + "epoch": 0.11910293030377711, + "grad_norm": 781.861328125, + "learning_rate": 3.9960884478696627e-05, + "loss": 129.5373, + "step": 29480 + }, + { + "epoch": 0.11914333156914475, + "grad_norm": 4428.20166015625, + "learning_rate": 3.996070971820319e-05, + "loss": 144.0128, + "step": 29490 + }, + { + "epoch": 0.11918373283451238, + "grad_norm": 726.5905151367188, + "learning_rate": 3.9960534568565436e-05, + "loss": 140.3079, + "step": 29500 + }, + { + "epoch": 0.11922413409988, + "grad_norm": 1062.7734375, + "learning_rate": 3.996035902978679e-05, + "loss": 146.0585, + "step": 29510 + }, + { + "epoch": 0.11926453536524764, + "grad_norm": 899.1897583007812, + "learning_rate": 3.996018310187066e-05, + "loss": 102.1037, + "step": 29520 + }, + { + "epoch": 0.11930493663061527, + "grad_norm": 636.4273681640625, + "learning_rate": 3.9960006784820485e-05, + "loss": 184.1907, + "step": 29530 + }, + { + "epoch": 0.11934533789598291, + "grad_norm": 3166.360107421875, + "learning_rate": 3.99598300786397e-05, + "loss": 139.1012, + "step": 29540 + }, + { + "epoch": 0.11938573916135053, + "grad_norm": 1295.75146484375, + "learning_rate": 3.995965298333176e-05, + "loss": 119.2074, + "step": 29550 + }, + { + "epoch": 0.11942614042671816, + "grad_norm": 703.0324096679688, + "learning_rate": 3.99594754989001e-05, + "loss": 122.3113, + "step": 29560 + }, + { + "epoch": 0.1194665416920858, + "grad_norm": 1563.4483642578125, + "learning_rate": 3.9959297625348196e-05, + "loss": 139.6801, + "step": 29570 + }, + { + "epoch": 0.11950694295745343, + "grad_norm": 945.3995971679688, + "learning_rate": 3.995911936267951e-05, + "loss": 132.8504, + "step": 29580 + }, + { + "epoch": 0.11954734422282105, + "grad_norm": 820.8397216796875, + "learning_rate": 3.995894071089751e-05, + "loss": 120.4974, + "step": 29590 + }, + { + "epoch": 0.1195877454881887, + "grad_norm": 1405.9710693359375, + "learning_rate": 3.995876167000569e-05, + "loss": 84.0473, + "step": 29600 + }, + { + "epoch": 0.11962814675355632, + "grad_norm": 1970.69140625, + "learning_rate": 3.9958582240007536e-05, + "loss": 166.8219, + "step": 29610 + }, + { + "epoch": 0.11966854801892396, + "grad_norm": 765.3466796875, + "learning_rate": 3.995840242090655e-05, + "loss": 119.5127, + "step": 29620 + }, + { + "epoch": 0.11970894928429159, + "grad_norm": 432.30084228515625, + "learning_rate": 3.995822221270622e-05, + "loss": 142.3104, + "step": 29630 + }, + { + "epoch": 0.11974935054965921, + "grad_norm": 832.0032348632812, + "learning_rate": 3.9958041615410085e-05, + "loss": 82.6027, + "step": 29640 + }, + { + "epoch": 0.11978975181502685, + "grad_norm": 1224.7525634765625, + "learning_rate": 3.995786062902165e-05, + "loss": 184.2814, + "step": 29650 + }, + { + "epoch": 0.11983015308039448, + "grad_norm": 1010.9873657226562, + "learning_rate": 3.995767925354445e-05, + "loss": 102.6545, + "step": 29660 + }, + { + "epoch": 0.1198705543457621, + "grad_norm": 829.0925903320312, + "learning_rate": 3.9957497488982014e-05, + "loss": 129.9091, + "step": 29670 + }, + { + "epoch": 0.11991095561112974, + "grad_norm": 1016.3224487304688, + "learning_rate": 3.995731533533789e-05, + "loss": 164.9225, + "step": 29680 + }, + { + "epoch": 0.11995135687649737, + "grad_norm": 1490.7181396484375, + "learning_rate": 3.995713279261563e-05, + "loss": 161.7667, + "step": 29690 + }, + { + "epoch": 0.11999175814186501, + "grad_norm": 715.559326171875, + "learning_rate": 3.995694986081879e-05, + "loss": 155.7804, + "step": 29700 + }, + { + "epoch": 0.12003215940723264, + "grad_norm": 3111.74169921875, + "learning_rate": 3.9956766539950934e-05, + "loss": 182.9849, + "step": 29710 + }, + { + "epoch": 0.12007256067260026, + "grad_norm": 5393.111328125, + "learning_rate": 3.995658283001564e-05, + "loss": 139.6232, + "step": 29720 + }, + { + "epoch": 0.1201129619379679, + "grad_norm": 1168.79638671875, + "learning_rate": 3.99563987310165e-05, + "loss": 134.3075, + "step": 29730 + }, + { + "epoch": 0.12015336320333553, + "grad_norm": 914.0726318359375, + "learning_rate": 3.995621424295709e-05, + "loss": 130.8744, + "step": 29740 + }, + { + "epoch": 0.12019376446870315, + "grad_norm": 1739.14013671875, + "learning_rate": 3.9956029365841005e-05, + "loss": 117.7524, + "step": 29750 + }, + { + "epoch": 0.1202341657340708, + "grad_norm": 433.20416259765625, + "learning_rate": 3.995584409967185e-05, + "loss": 139.7443, + "step": 29760 + }, + { + "epoch": 0.12027456699943842, + "grad_norm": 1871.9356689453125, + "learning_rate": 3.9955658444453244e-05, + "loss": 129.5301, + "step": 29770 + }, + { + "epoch": 0.12031496826480606, + "grad_norm": 1806.603271484375, + "learning_rate": 3.995547240018881e-05, + "loss": 148.7472, + "step": 29780 + }, + { + "epoch": 0.12035536953017369, + "grad_norm": 359.63128662109375, + "learning_rate": 3.995528596688216e-05, + "loss": 95.4598, + "step": 29790 + }, + { + "epoch": 0.12039577079554131, + "grad_norm": 3681.173828125, + "learning_rate": 3.995509914453694e-05, + "loss": 126.8944, + "step": 29800 + }, + { + "epoch": 0.12043617206090895, + "grad_norm": 1531.251708984375, + "learning_rate": 3.995491193315678e-05, + "loss": 93.9308, + "step": 29810 + }, + { + "epoch": 0.12047657332627658, + "grad_norm": 1161.579345703125, + "learning_rate": 3.9954724332745344e-05, + "loss": 145.6271, + "step": 29820 + }, + { + "epoch": 0.1205169745916442, + "grad_norm": 709.6150512695312, + "learning_rate": 3.9954536343306286e-05, + "loss": 117.8341, + "step": 29830 + }, + { + "epoch": 0.12055737585701184, + "grad_norm": 1357.734375, + "learning_rate": 3.995434796484326e-05, + "loss": 116.4119, + "step": 29840 + }, + { + "epoch": 0.12059777712237947, + "grad_norm": 1223.1270751953125, + "learning_rate": 3.9954159197359956e-05, + "loss": 102.4043, + "step": 29850 + }, + { + "epoch": 0.12063817838774711, + "grad_norm": 754.6334838867188, + "learning_rate": 3.9953970040860045e-05, + "loss": 99.5778, + "step": 29860 + }, + { + "epoch": 0.12067857965311474, + "grad_norm": 871.9152221679688, + "learning_rate": 3.9953780495347214e-05, + "loss": 104.8976, + "step": 29870 + }, + { + "epoch": 0.12071898091848236, + "grad_norm": 1211.9228515625, + "learning_rate": 3.995359056082516e-05, + "loss": 110.1686, + "step": 29880 + }, + { + "epoch": 0.12075938218385, + "grad_norm": 971.658203125, + "learning_rate": 3.9953400237297584e-05, + "loss": 113.0126, + "step": 29890 + }, + { + "epoch": 0.12079978344921763, + "grad_norm": 646.0537109375, + "learning_rate": 3.99532095247682e-05, + "loss": 145.8144, + "step": 29900 + }, + { + "epoch": 0.12084018471458526, + "grad_norm": 1098.0728759765625, + "learning_rate": 3.995301842324072e-05, + "loss": 171.5564, + "step": 29910 + }, + { + "epoch": 0.1208805859799529, + "grad_norm": 561.2307739257812, + "learning_rate": 3.9952826932718874e-05, + "loss": 141.385, + "step": 29920 + }, + { + "epoch": 0.12092098724532052, + "grad_norm": 1652.14208984375, + "learning_rate": 3.995263505320639e-05, + "loss": 228.7119, + "step": 29930 + }, + { + "epoch": 0.12096138851068816, + "grad_norm": 569.643310546875, + "learning_rate": 3.9952442784707025e-05, + "loss": 135.3276, + "step": 29940 + }, + { + "epoch": 0.12100178977605579, + "grad_norm": 1374.763427734375, + "learning_rate": 3.995225012722451e-05, + "loss": 148.1691, + "step": 29950 + }, + { + "epoch": 0.12104219104142341, + "grad_norm": 1583.6549072265625, + "learning_rate": 3.995205708076261e-05, + "loss": 164.2844, + "step": 29960 + }, + { + "epoch": 0.12108259230679105, + "grad_norm": 1192.9796142578125, + "learning_rate": 3.995186364532507e-05, + "loss": 143.6574, + "step": 29970 + }, + { + "epoch": 0.12112299357215868, + "grad_norm": 1184.689453125, + "learning_rate": 3.99516698209157e-05, + "loss": 132.4517, + "step": 29980 + }, + { + "epoch": 0.1211633948375263, + "grad_norm": 2278.093994140625, + "learning_rate": 3.9951475607538234e-05, + "loss": 110.8285, + "step": 29990 + }, + { + "epoch": 0.12120379610289395, + "grad_norm": 883.920166015625, + "learning_rate": 3.9951281005196486e-05, + "loss": 180.0138, + "step": 30000 + }, + { + "epoch": 0.12124419736826157, + "grad_norm": 461.02398681640625, + "learning_rate": 3.995108601389425e-05, + "loss": 105.2683, + "step": 30010 + }, + { + "epoch": 0.12128459863362921, + "grad_norm": 1084.2962646484375, + "learning_rate": 3.995089063363531e-05, + "loss": 134.252, + "step": 30020 + }, + { + "epoch": 0.12132499989899684, + "grad_norm": 1047.1707763671875, + "learning_rate": 3.9950694864423496e-05, + "loss": 178.2405, + "step": 30030 + }, + { + "epoch": 0.12136540116436446, + "grad_norm": 925.1962890625, + "learning_rate": 3.995049870626261e-05, + "loss": 108.2017, + "step": 30040 + }, + { + "epoch": 0.1214058024297321, + "grad_norm": 1012.7600708007812, + "learning_rate": 3.9950302159156476e-05, + "loss": 155.7968, + "step": 30050 + }, + { + "epoch": 0.12144620369509973, + "grad_norm": 885.59326171875, + "learning_rate": 3.9950105223108935e-05, + "loss": 150.2429, + "step": 30060 + }, + { + "epoch": 0.12148660496046736, + "grad_norm": 774.317138671875, + "learning_rate": 3.994990789812383e-05, + "loss": 161.2016, + "step": 30070 + }, + { + "epoch": 0.121527006225835, + "grad_norm": 261.7624206542969, + "learning_rate": 3.994971018420498e-05, + "loss": 137.8015, + "step": 30080 + }, + { + "epoch": 0.12156740749120262, + "grad_norm": 1777.8197021484375, + "learning_rate": 3.9949512081356275e-05, + "loss": 150.532, + "step": 30090 + }, + { + "epoch": 0.12160780875657026, + "grad_norm": 775.0283813476562, + "learning_rate": 3.9949313589581555e-05, + "loss": 104.9756, + "step": 30100 + }, + { + "epoch": 0.12164821002193789, + "grad_norm": 1063.85107421875, + "learning_rate": 3.99491147088847e-05, + "loss": 122.0761, + "step": 30110 + }, + { + "epoch": 0.12168861128730551, + "grad_norm": 1174.099853515625, + "learning_rate": 3.994891543926958e-05, + "loss": 140.263, + "step": 30120 + }, + { + "epoch": 0.12172901255267315, + "grad_norm": 2001.0887451171875, + "learning_rate": 3.9948715780740077e-05, + "loss": 99.5806, + "step": 30130 + }, + { + "epoch": 0.12176941381804078, + "grad_norm": 638.6089477539062, + "learning_rate": 3.99485157333001e-05, + "loss": 100.2488, + "step": 30140 + }, + { + "epoch": 0.12180981508340841, + "grad_norm": 1265.1541748046875, + "learning_rate": 3.9948315296953535e-05, + "loss": 133.6423, + "step": 30150 + }, + { + "epoch": 0.12185021634877605, + "grad_norm": 1104.2303466796875, + "learning_rate": 3.9948114471704296e-05, + "loss": 151.6229, + "step": 30160 + }, + { + "epoch": 0.12189061761414367, + "grad_norm": 461.6390686035156, + "learning_rate": 3.9947913257556285e-05, + "loss": 139.7, + "step": 30170 + }, + { + "epoch": 0.12193101887951131, + "grad_norm": 791.65966796875, + "learning_rate": 3.9947711654513445e-05, + "loss": 144.6293, + "step": 30180 + }, + { + "epoch": 0.12197142014487894, + "grad_norm": 1899.478759765625, + "learning_rate": 3.99475096625797e-05, + "loss": 135.0473, + "step": 30190 + }, + { + "epoch": 0.12201182141024657, + "grad_norm": 922.8239135742188, + "learning_rate": 3.994730728175897e-05, + "loss": 102.3432, + "step": 30200 + }, + { + "epoch": 0.1220522226756142, + "grad_norm": 745.9439697265625, + "learning_rate": 3.994710451205523e-05, + "loss": 97.5144, + "step": 30210 + }, + { + "epoch": 0.12209262394098183, + "grad_norm": 548.1506958007812, + "learning_rate": 3.994690135347241e-05, + "loss": 107.6679, + "step": 30220 + }, + { + "epoch": 0.12213302520634946, + "grad_norm": 5742.05908203125, + "learning_rate": 3.9946697806014476e-05, + "loss": 165.3908, + "step": 30230 + }, + { + "epoch": 0.1221734264717171, + "grad_norm": 963.951904296875, + "learning_rate": 3.99464938696854e-05, + "loss": 104.4414, + "step": 30240 + }, + { + "epoch": 0.12221382773708472, + "grad_norm": 618.6597290039062, + "learning_rate": 3.994628954448916e-05, + "loss": 98.4233, + "step": 30250 + }, + { + "epoch": 0.12225422900245236, + "grad_norm": 983.8966064453125, + "learning_rate": 3.994608483042974e-05, + "loss": 119.7646, + "step": 30260 + }, + { + "epoch": 0.12229463026781999, + "grad_norm": 575.9495849609375, + "learning_rate": 3.9945879727511126e-05, + "loss": 75.1087, + "step": 30270 + }, + { + "epoch": 0.12233503153318762, + "grad_norm": 460.2945861816406, + "learning_rate": 3.9945674235737314e-05, + "loss": 81.882, + "step": 30280 + }, + { + "epoch": 0.12237543279855526, + "grad_norm": 349.8999328613281, + "learning_rate": 3.994546835511232e-05, + "loss": 84.1899, + "step": 30290 + }, + { + "epoch": 0.12241583406392288, + "grad_norm": 1153.4415283203125, + "learning_rate": 3.994526208564014e-05, + "loss": 118.0794, + "step": 30300 + }, + { + "epoch": 0.12245623532929051, + "grad_norm": 2126.23583984375, + "learning_rate": 3.994505542732482e-05, + "loss": 188.862, + "step": 30310 + }, + { + "epoch": 0.12249663659465815, + "grad_norm": 1328.92138671875, + "learning_rate": 3.994484838017037e-05, + "loss": 112.7441, + "step": 30320 + }, + { + "epoch": 0.12253703786002577, + "grad_norm": 1279.64501953125, + "learning_rate": 3.994464094418083e-05, + "loss": 126.941, + "step": 30330 + }, + { + "epoch": 0.12257743912539341, + "grad_norm": 2971.946533203125, + "learning_rate": 3.994443311936025e-05, + "loss": 160.3979, + "step": 30340 + }, + { + "epoch": 0.12261784039076104, + "grad_norm": 1164.39697265625, + "learning_rate": 3.994422490571268e-05, + "loss": 100.254, + "step": 30350 + }, + { + "epoch": 0.12265824165612867, + "grad_norm": 768.1001586914062, + "learning_rate": 3.994401630324217e-05, + "loss": 156.6558, + "step": 30360 + }, + { + "epoch": 0.1226986429214963, + "grad_norm": 620.89599609375, + "learning_rate": 3.99438073119528e-05, + "loss": 167.1784, + "step": 30370 + }, + { + "epoch": 0.12273904418686393, + "grad_norm": 545.6172485351562, + "learning_rate": 3.994359793184864e-05, + "loss": 111.0658, + "step": 30380 + }, + { + "epoch": 0.12277944545223156, + "grad_norm": 344.2869567871094, + "learning_rate": 3.994338816293377e-05, + "loss": 99.611, + "step": 30390 + }, + { + "epoch": 0.1228198467175992, + "grad_norm": 558.8851928710938, + "learning_rate": 3.994317800521228e-05, + "loss": 125.3433, + "step": 30400 + }, + { + "epoch": 0.12286024798296682, + "grad_norm": 732.8953247070312, + "learning_rate": 3.9942967458688267e-05, + "loss": 114.8939, + "step": 30410 + }, + { + "epoch": 0.12290064924833445, + "grad_norm": 583.1746215820312, + "learning_rate": 3.9942756523365835e-05, + "loss": 68.2959, + "step": 30420 + }, + { + "epoch": 0.12294105051370209, + "grad_norm": 1182.6888427734375, + "learning_rate": 3.99425451992491e-05, + "loss": 143.0462, + "step": 30430 + }, + { + "epoch": 0.12298145177906972, + "grad_norm": 946.85302734375, + "learning_rate": 3.994233348634218e-05, + "loss": 121.51, + "step": 30440 + }, + { + "epoch": 0.12302185304443736, + "grad_norm": 688.4850463867188, + "learning_rate": 3.99421213846492e-05, + "loss": 107.4189, + "step": 30450 + }, + { + "epoch": 0.12306225430980498, + "grad_norm": 1068.85595703125, + "learning_rate": 3.9941908894174295e-05, + "loss": 121.6054, + "step": 30460 + }, + { + "epoch": 0.12310265557517261, + "grad_norm": 1055.9439697265625, + "learning_rate": 3.994169601492161e-05, + "loss": 147.6767, + "step": 30470 + }, + { + "epoch": 0.12314305684054025, + "grad_norm": 1311.8382568359375, + "learning_rate": 3.994148274689529e-05, + "loss": 157.4325, + "step": 30480 + }, + { + "epoch": 0.12318345810590788, + "grad_norm": 2528.730712890625, + "learning_rate": 3.99412690900995e-05, + "loss": 190.1822, + "step": 30490 + }, + { + "epoch": 0.1232238593712755, + "grad_norm": 1317.169677734375, + "learning_rate": 3.994105504453841e-05, + "loss": 138.9288, + "step": 30500 + }, + { + "epoch": 0.12326426063664314, + "grad_norm": 1399.8465576171875, + "learning_rate": 3.994084061021618e-05, + "loss": 134.1226, + "step": 30510 + }, + { + "epoch": 0.12330466190201077, + "grad_norm": 791.5870971679688, + "learning_rate": 3.9940625787136995e-05, + "loss": 132.1646, + "step": 30520 + }, + { + "epoch": 0.12334506316737841, + "grad_norm": 1604.4136962890625, + "learning_rate": 3.9940410575305044e-05, + "loss": 93.9567, + "step": 30530 + }, + { + "epoch": 0.12338546443274603, + "grad_norm": 1028.586669921875, + "learning_rate": 3.9940194974724524e-05, + "loss": 134.2086, + "step": 30540 + }, + { + "epoch": 0.12342586569811366, + "grad_norm": 792.4896850585938, + "learning_rate": 3.993997898539964e-05, + "loss": 146.2178, + "step": 30550 + }, + { + "epoch": 0.1234662669634813, + "grad_norm": 1534.54736328125, + "learning_rate": 3.993976260733459e-05, + "loss": 94.1073, + "step": 30560 + }, + { + "epoch": 0.12350666822884893, + "grad_norm": 946.388671875, + "learning_rate": 3.993954584053361e-05, + "loss": 120.739, + "step": 30570 + }, + { + "epoch": 0.12354706949421655, + "grad_norm": 1862.2171630859375, + "learning_rate": 3.993932868500092e-05, + "loss": 128.6191, + "step": 30580 + }, + { + "epoch": 0.12358747075958419, + "grad_norm": 1872.319580078125, + "learning_rate": 3.9939111140740754e-05, + "loss": 126.3997, + "step": 30590 + }, + { + "epoch": 0.12362787202495182, + "grad_norm": 739.9329223632812, + "learning_rate": 3.993889320775735e-05, + "loss": 112.3236, + "step": 30600 + }, + { + "epoch": 0.12366827329031946, + "grad_norm": 943.5808715820312, + "learning_rate": 3.993867488605495e-05, + "loss": 117.2642, + "step": 30610 + }, + { + "epoch": 0.12370867455568708, + "grad_norm": 1135.6785888671875, + "learning_rate": 3.993845617563782e-05, + "loss": 205.5956, + "step": 30620 + }, + { + "epoch": 0.12374907582105471, + "grad_norm": 960.75439453125, + "learning_rate": 3.993823707651023e-05, + "loss": 138.0256, + "step": 30630 + }, + { + "epoch": 0.12378947708642235, + "grad_norm": 1506.778076171875, + "learning_rate": 3.993801758867643e-05, + "loss": 203.702, + "step": 30640 + }, + { + "epoch": 0.12382987835178998, + "grad_norm": 998.5537719726562, + "learning_rate": 3.993779771214072e-05, + "loss": 127.5338, + "step": 30650 + }, + { + "epoch": 0.1238702796171576, + "grad_norm": 613.2711791992188, + "learning_rate": 3.9937577446907376e-05, + "loss": 102.5558, + "step": 30660 + }, + { + "epoch": 0.12391068088252524, + "grad_norm": 595.644775390625, + "learning_rate": 3.99373567929807e-05, + "loss": 89.8477, + "step": 30670 + }, + { + "epoch": 0.12395108214789287, + "grad_norm": 476.3948669433594, + "learning_rate": 3.9937135750364983e-05, + "loss": 134.7985, + "step": 30680 + }, + { + "epoch": 0.12399148341326051, + "grad_norm": 565.1109008789062, + "learning_rate": 3.9936914319064543e-05, + "loss": 168.9743, + "step": 30690 + }, + { + "epoch": 0.12403188467862813, + "grad_norm": 4369.56201171875, + "learning_rate": 3.9936692499083696e-05, + "loss": 116.8529, + "step": 30700 + }, + { + "epoch": 0.12407228594399576, + "grad_norm": 1264.1217041015625, + "learning_rate": 3.9936470290426765e-05, + "loss": 106.0809, + "step": 30710 + }, + { + "epoch": 0.1241126872093634, + "grad_norm": 1371.2681884765625, + "learning_rate": 3.993624769309808e-05, + "loss": 134.2531, + "step": 30720 + }, + { + "epoch": 0.12415308847473103, + "grad_norm": 408.6277770996094, + "learning_rate": 3.9936024707101984e-05, + "loss": 62.8921, + "step": 30730 + }, + { + "epoch": 0.12419348974009865, + "grad_norm": 2125.443359375, + "learning_rate": 3.993580133244282e-05, + "loss": 153.5595, + "step": 30740 + }, + { + "epoch": 0.1242338910054663, + "grad_norm": 1352.392578125, + "learning_rate": 3.993557756912495e-05, + "loss": 88.2407, + "step": 30750 + }, + { + "epoch": 0.12427429227083392, + "grad_norm": 1089.726806640625, + "learning_rate": 3.9935353417152724e-05, + "loss": 98.3618, + "step": 30760 + }, + { + "epoch": 0.12431469353620156, + "grad_norm": 783.1983032226562, + "learning_rate": 3.9935128876530524e-05, + "loss": 111.6247, + "step": 30770 + }, + { + "epoch": 0.12435509480156919, + "grad_norm": 812.6660766601562, + "learning_rate": 3.9934903947262726e-05, + "loss": 117.4974, + "step": 30780 + }, + { + "epoch": 0.12439549606693681, + "grad_norm": 1292.669189453125, + "learning_rate": 3.99346786293537e-05, + "loss": 131.0912, + "step": 30790 + }, + { + "epoch": 0.12443589733230445, + "grad_norm": 2615.35302734375, + "learning_rate": 3.993445292280787e-05, + "loss": 197.0276, + "step": 30800 + }, + { + "epoch": 0.12447629859767208, + "grad_norm": 1446.1331787109375, + "learning_rate": 3.9934226827629603e-05, + "loss": 105.8567, + "step": 30810 + }, + { + "epoch": 0.1245166998630397, + "grad_norm": 1050.431640625, + "learning_rate": 3.993400034382333e-05, + "loss": 96.1579, + "step": 30820 + }, + { + "epoch": 0.12455710112840734, + "grad_norm": 1392.2281494140625, + "learning_rate": 3.993377347139345e-05, + "loss": 145.462, + "step": 30830 + }, + { + "epoch": 0.12459750239377497, + "grad_norm": 869.6813354492188, + "learning_rate": 3.99335462103444e-05, + "loss": 131.8903, + "step": 30840 + }, + { + "epoch": 0.12463790365914261, + "grad_norm": 930.6217651367188, + "learning_rate": 3.99333185606806e-05, + "loss": 88.7037, + "step": 30850 + }, + { + "epoch": 0.12467830492451024, + "grad_norm": 292.8157958984375, + "learning_rate": 3.99330905224065e-05, + "loss": 164.7592, + "step": 30860 + }, + { + "epoch": 0.12471870618987786, + "grad_norm": 594.364501953125, + "learning_rate": 3.993286209552652e-05, + "loss": 158.7634, + "step": 30870 + }, + { + "epoch": 0.1247591074552455, + "grad_norm": 1538.90869140625, + "learning_rate": 3.9932633280045154e-05, + "loss": 107.2989, + "step": 30880 + }, + { + "epoch": 0.12479950872061313, + "grad_norm": 638.981689453125, + "learning_rate": 3.993240407596682e-05, + "loss": 202.6024, + "step": 30890 + }, + { + "epoch": 0.12483990998598075, + "grad_norm": 1048.1873779296875, + "learning_rate": 3.993217448329602e-05, + "loss": 76.1761, + "step": 30900 + }, + { + "epoch": 0.1248803112513484, + "grad_norm": 632.6129760742188, + "learning_rate": 3.993194450203721e-05, + "loss": 128.1872, + "step": 30910 + }, + { + "epoch": 0.12492071251671602, + "grad_norm": 862.3527221679688, + "learning_rate": 3.993171413219489e-05, + "loss": 97.4334, + "step": 30920 + }, + { + "epoch": 0.12496111378208366, + "grad_norm": 1667.7735595703125, + "learning_rate": 3.9931483373773524e-05, + "loss": 105.477, + "step": 30930 + }, + { + "epoch": 0.12500151504745127, + "grad_norm": 458.65814208984375, + "learning_rate": 3.9931252226777646e-05, + "loss": 181.268, + "step": 30940 + }, + { + "epoch": 0.1250419163128189, + "grad_norm": 1190.4853515625, + "learning_rate": 3.993102069121173e-05, + "loss": 90.5898, + "step": 30950 + }, + { + "epoch": 0.12508231757818655, + "grad_norm": 884.3414306640625, + "learning_rate": 3.9930788767080316e-05, + "loss": 206.0014, + "step": 30960 + }, + { + "epoch": 0.1251227188435542, + "grad_norm": 564.6066284179688, + "learning_rate": 3.993055645438791e-05, + "loss": 116.7004, + "step": 30970 + }, + { + "epoch": 0.1251631201089218, + "grad_norm": 867.6273193359375, + "learning_rate": 3.993032375313904e-05, + "loss": 104.7762, + "step": 30980 + }, + { + "epoch": 0.12520352137428944, + "grad_norm": 1010.0617065429688, + "learning_rate": 3.993009066333826e-05, + "loss": 109.204, + "step": 30990 + }, + { + "epoch": 0.12524392263965708, + "grad_norm": 660.765380859375, + "learning_rate": 3.992985718499009e-05, + "loss": 140.8548, + "step": 31000 + }, + { + "epoch": 0.1252843239050247, + "grad_norm": 485.31658935546875, + "learning_rate": 3.99296233180991e-05, + "loss": 125.3216, + "step": 31010 + }, + { + "epoch": 0.12532472517039234, + "grad_norm": 1063.8095703125, + "learning_rate": 3.9929389062669834e-05, + "loss": 151.2857, + "step": 31020 + }, + { + "epoch": 0.12536512643575998, + "grad_norm": 995.3087158203125, + "learning_rate": 3.9929154418706876e-05, + "loss": 133.3932, + "step": 31030 + }, + { + "epoch": 0.1254055277011276, + "grad_norm": 1130.556884765625, + "learning_rate": 3.992891938621479e-05, + "loss": 155.4986, + "step": 31040 + }, + { + "epoch": 0.12544592896649523, + "grad_norm": 1329.005126953125, + "learning_rate": 3.992868396519816e-05, + "loss": 94.2178, + "step": 31050 + }, + { + "epoch": 0.12548633023186287, + "grad_norm": 1111.6929931640625, + "learning_rate": 3.9928448155661576e-05, + "loss": 129.7246, + "step": 31060 + }, + { + "epoch": 0.12552673149723048, + "grad_norm": 1785.7513427734375, + "learning_rate": 3.992821195760964e-05, + "loss": 124.3434, + "step": 31070 + }, + { + "epoch": 0.12556713276259812, + "grad_norm": 982.6594848632812, + "learning_rate": 3.992797537104695e-05, + "loss": 111.4861, + "step": 31080 + }, + { + "epoch": 0.12560753402796576, + "grad_norm": 1758.6605224609375, + "learning_rate": 3.9927738395978116e-05, + "loss": 154.5131, + "step": 31090 + }, + { + "epoch": 0.12564793529333337, + "grad_norm": 579.9406127929688, + "learning_rate": 3.9927501032407775e-05, + "loss": 139.0912, + "step": 31100 + }, + { + "epoch": 0.125688336558701, + "grad_norm": 1245.1885986328125, + "learning_rate": 3.992726328034053e-05, + "loss": 113.022, + "step": 31110 + }, + { + "epoch": 0.12572873782406865, + "grad_norm": 1417.814697265625, + "learning_rate": 3.9927025139781023e-05, + "loss": 117.7914, + "step": 31120 + }, + { + "epoch": 0.1257691390894363, + "grad_norm": 403.3697814941406, + "learning_rate": 3.9926786610733917e-05, + "loss": 113.4135, + "step": 31130 + }, + { + "epoch": 0.1258095403548039, + "grad_norm": 1015.7533569335938, + "learning_rate": 3.9926547693203836e-05, + "loss": 122.1789, + "step": 31140 + }, + { + "epoch": 0.12584994162017155, + "grad_norm": 479.70819091796875, + "learning_rate": 3.9926308387195456e-05, + "loss": 110.8888, + "step": 31150 + }, + { + "epoch": 0.12589034288553919, + "grad_norm": 1415.302490234375, + "learning_rate": 3.992606869271343e-05, + "loss": 133.1723, + "step": 31160 + }, + { + "epoch": 0.1259307441509068, + "grad_norm": 1434.7115478515625, + "learning_rate": 3.992582860976244e-05, + "loss": 144.0729, + "step": 31170 + }, + { + "epoch": 0.12597114541627444, + "grad_norm": 575.958251953125, + "learning_rate": 3.9925588138347155e-05, + "loss": 79.1792, + "step": 31180 + }, + { + "epoch": 0.12601154668164208, + "grad_norm": 1123.8984375, + "learning_rate": 3.992534727847228e-05, + "loss": 71.4283, + "step": 31190 + }, + { + "epoch": 0.1260519479470097, + "grad_norm": 1188.633056640625, + "learning_rate": 3.99251060301425e-05, + "loss": 138.0617, + "step": 31200 + }, + { + "epoch": 0.12609234921237733, + "grad_norm": 2768.794189453125, + "learning_rate": 3.992486439336252e-05, + "loss": 103.115, + "step": 31210 + }, + { + "epoch": 0.12613275047774497, + "grad_norm": 1906.1749267578125, + "learning_rate": 3.992462236813704e-05, + "loss": 158.833, + "step": 31220 + }, + { + "epoch": 0.12617315174311258, + "grad_norm": 560.2239990234375, + "learning_rate": 3.99243799544708e-05, + "loss": 159.7004, + "step": 31230 + }, + { + "epoch": 0.12621355300848022, + "grad_norm": 750.6891479492188, + "learning_rate": 3.9924137152368516e-05, + "loss": 86.4602, + "step": 31240 + }, + { + "epoch": 0.12625395427384786, + "grad_norm": 1308.8642578125, + "learning_rate": 3.9923893961834914e-05, + "loss": 117.4999, + "step": 31250 + }, + { + "epoch": 0.12629435553921547, + "grad_norm": 2875.593017578125, + "learning_rate": 3.9923650382874744e-05, + "loss": 174.0236, + "step": 31260 + }, + { + "epoch": 0.12633475680458311, + "grad_norm": 657.4924926757812, + "learning_rate": 3.9923406415492755e-05, + "loss": 86.7831, + "step": 31270 + }, + { + "epoch": 0.12637515806995075, + "grad_norm": 977.7444458007812, + "learning_rate": 3.99231620596937e-05, + "loss": 79.6685, + "step": 31280 + }, + { + "epoch": 0.1264155593353184, + "grad_norm": 768.6956787109375, + "learning_rate": 3.9922917315482344e-05, + "loss": 155.2602, + "step": 31290 + }, + { + "epoch": 0.126455960600686, + "grad_norm": 733.1231079101562, + "learning_rate": 3.9922672182863456e-05, + "loss": 113.5985, + "step": 31300 + }, + { + "epoch": 0.12649636186605365, + "grad_norm": 1471.4981689453125, + "learning_rate": 3.992242666184181e-05, + "loss": 124.8558, + "step": 31310 + }, + { + "epoch": 0.1265367631314213, + "grad_norm": 592.75830078125, + "learning_rate": 3.9922180752422214e-05, + "loss": 129.1641, + "step": 31320 + }, + { + "epoch": 0.1265771643967889, + "grad_norm": 1093.2265625, + "learning_rate": 3.9921934454609435e-05, + "loss": 68.1542, + "step": 31330 + }, + { + "epoch": 0.12661756566215654, + "grad_norm": 772.689453125, + "learning_rate": 3.99216877684083e-05, + "loss": 97.903, + "step": 31340 + }, + { + "epoch": 0.12665796692752418, + "grad_norm": 812.4371337890625, + "learning_rate": 3.9921440693823596e-05, + "loss": 94.6013, + "step": 31350 + }, + { + "epoch": 0.1266983681928918, + "grad_norm": 713.4705810546875, + "learning_rate": 3.992119323086015e-05, + "loss": 85.8545, + "step": 31360 + }, + { + "epoch": 0.12673876945825943, + "grad_norm": 352.6268615722656, + "learning_rate": 3.9920945379522784e-05, + "loss": 118.3227, + "step": 31370 + }, + { + "epoch": 0.12677917072362707, + "grad_norm": 1456.767578125, + "learning_rate": 3.992069713981634e-05, + "loss": 175.772, + "step": 31380 + }, + { + "epoch": 0.12681957198899468, + "grad_norm": 2872.789794921875, + "learning_rate": 3.992044851174564e-05, + "loss": 152.7629, + "step": 31390 + }, + { + "epoch": 0.12685997325436232, + "grad_norm": 476.4214782714844, + "learning_rate": 3.992019949531555e-05, + "loss": 99.5367, + "step": 31400 + }, + { + "epoch": 0.12690037451972996, + "grad_norm": 1680.3076171875, + "learning_rate": 3.991995009053091e-05, + "loss": 156.3354, + "step": 31410 + }, + { + "epoch": 0.12694077578509758, + "grad_norm": 885.752685546875, + "learning_rate": 3.9919700297396585e-05, + "loss": 131.6531, + "step": 31420 + }, + { + "epoch": 0.12698117705046522, + "grad_norm": 400.1026916503906, + "learning_rate": 3.991945011591745e-05, + "loss": 139.463, + "step": 31430 + }, + { + "epoch": 0.12702157831583286, + "grad_norm": 689.3010864257812, + "learning_rate": 3.9919199546098377e-05, + "loss": 76.8132, + "step": 31440 + }, + { + "epoch": 0.1270619795812005, + "grad_norm": 2046.041015625, + "learning_rate": 3.991894858794426e-05, + "loss": 183.7754, + "step": 31450 + }, + { + "epoch": 0.1271023808465681, + "grad_norm": 1872.15478515625, + "learning_rate": 3.991869724145998e-05, + "loss": 121.2389, + "step": 31460 + }, + { + "epoch": 0.12714278211193575, + "grad_norm": 1319.3197021484375, + "learning_rate": 3.991844550665045e-05, + "loss": 124.9229, + "step": 31470 + }, + { + "epoch": 0.1271831833773034, + "grad_norm": 1074.6365966796875, + "learning_rate": 3.991819338352057e-05, + "loss": 144.648, + "step": 31480 + }, + { + "epoch": 0.127223584642671, + "grad_norm": 3560.509033203125, + "learning_rate": 3.991794087207524e-05, + "loss": 129.7296, + "step": 31490 + }, + { + "epoch": 0.12726398590803864, + "grad_norm": 1501.7806396484375, + "learning_rate": 3.991768797231941e-05, + "loss": 131.7872, + "step": 31500 + }, + { + "epoch": 0.12730438717340628, + "grad_norm": 583.6295166015625, + "learning_rate": 3.9917434684258e-05, + "loss": 169.7168, + "step": 31510 + }, + { + "epoch": 0.1273447884387739, + "grad_norm": 1119.2364501953125, + "learning_rate": 3.9917181007895946e-05, + "loss": 108.4777, + "step": 31520 + }, + { + "epoch": 0.12738518970414153, + "grad_norm": 1140.518310546875, + "learning_rate": 3.9916926943238185e-05, + "loss": 135.4062, + "step": 31530 + }, + { + "epoch": 0.12742559096950917, + "grad_norm": 424.3011779785156, + "learning_rate": 3.991667249028969e-05, + "loss": 133.524, + "step": 31540 + }, + { + "epoch": 0.12746599223487678, + "grad_norm": 777.0897827148438, + "learning_rate": 3.9916417649055404e-05, + "loss": 112.0013, + "step": 31550 + }, + { + "epoch": 0.12750639350024442, + "grad_norm": 531.0302734375, + "learning_rate": 3.991616241954031e-05, + "loss": 155.1041, + "step": 31560 + }, + { + "epoch": 0.12754679476561206, + "grad_norm": 1085.863037109375, + "learning_rate": 3.9915906801749365e-05, + "loss": 171.455, + "step": 31570 + }, + { + "epoch": 0.12758719603097968, + "grad_norm": 464.064453125, + "learning_rate": 3.991565079568757e-05, + "loss": 138.0405, + "step": 31580 + }, + { + "epoch": 0.12762759729634732, + "grad_norm": 1220.650146484375, + "learning_rate": 3.991539440135991e-05, + "loss": 113.5109, + "step": 31590 + }, + { + "epoch": 0.12766799856171496, + "grad_norm": 683.8206787109375, + "learning_rate": 3.9915137618771386e-05, + "loss": 100.4204, + "step": 31600 + }, + { + "epoch": 0.1277083998270826, + "grad_norm": 574.3087158203125, + "learning_rate": 3.9914880447927e-05, + "loss": 107.8954, + "step": 31610 + }, + { + "epoch": 0.1277488010924502, + "grad_norm": 1586.7393798828125, + "learning_rate": 3.991462288883176e-05, + "loss": 126.5419, + "step": 31620 + }, + { + "epoch": 0.12778920235781785, + "grad_norm": 651.4215698242188, + "learning_rate": 3.99143649414907e-05, + "loss": 130.9087, + "step": 31630 + }, + { + "epoch": 0.1278296036231855, + "grad_norm": 1002.7998046875, + "learning_rate": 3.9914106605908845e-05, + "loss": 160.7912, + "step": 31640 + }, + { + "epoch": 0.1278700048885531, + "grad_norm": 1397.1976318359375, + "learning_rate": 3.991384788209123e-05, + "loss": 165.7528, + "step": 31650 + }, + { + "epoch": 0.12791040615392074, + "grad_norm": 1764.273681640625, + "learning_rate": 3.991358877004289e-05, + "loss": 180.5545, + "step": 31660 + }, + { + "epoch": 0.12795080741928838, + "grad_norm": 960.62939453125, + "learning_rate": 3.991332926976888e-05, + "loss": 143.0168, + "step": 31670 + }, + { + "epoch": 0.127991208684656, + "grad_norm": 859.4241333007812, + "learning_rate": 3.9913069381274274e-05, + "loss": 89.2684, + "step": 31680 + }, + { + "epoch": 0.12803160995002363, + "grad_norm": 1876.795166015625, + "learning_rate": 3.991280910456413e-05, + "loss": 131.8177, + "step": 31690 + }, + { + "epoch": 0.12807201121539127, + "grad_norm": 472.3818664550781, + "learning_rate": 3.991254843964352e-05, + "loss": 173.1073, + "step": 31700 + }, + { + "epoch": 0.12811241248075889, + "grad_norm": 1113.1005859375, + "learning_rate": 3.9912287386517524e-05, + "loss": 107.4966, + "step": 31710 + }, + { + "epoch": 0.12815281374612653, + "grad_norm": 651.1411743164062, + "learning_rate": 3.9912025945191233e-05, + "loss": 151.8742, + "step": 31720 + }, + { + "epoch": 0.12819321501149417, + "grad_norm": 826.5155639648438, + "learning_rate": 3.991176411566974e-05, + "loss": 104.4479, + "step": 31730 + }, + { + "epoch": 0.12823361627686178, + "grad_norm": 2747.4013671875, + "learning_rate": 3.991150189795816e-05, + "loss": 172.6664, + "step": 31740 + }, + { + "epoch": 0.12827401754222942, + "grad_norm": 1335.5511474609375, + "learning_rate": 3.99112392920616e-05, + "loss": 117.9947, + "step": 31750 + }, + { + "epoch": 0.12831441880759706, + "grad_norm": 647.8828735351562, + "learning_rate": 3.991097629798519e-05, + "loss": 159.7582, + "step": 31760 + }, + { + "epoch": 0.1283548200729647, + "grad_norm": 1227.281982421875, + "learning_rate": 3.991071291573403e-05, + "loss": 132.6805, + "step": 31770 + }, + { + "epoch": 0.1283952213383323, + "grad_norm": 643.1395874023438, + "learning_rate": 3.991044914531327e-05, + "loss": 149.7703, + "step": 31780 + }, + { + "epoch": 0.12843562260369995, + "grad_norm": 534.9096069335938, + "learning_rate": 3.991018498672806e-05, + "loss": 142.436, + "step": 31790 + }, + { + "epoch": 0.1284760238690676, + "grad_norm": 698.7767333984375, + "learning_rate": 3.990992043998354e-05, + "loss": 127.7158, + "step": 31800 + }, + { + "epoch": 0.1285164251344352, + "grad_norm": 2005.7313232421875, + "learning_rate": 3.990965550508488e-05, + "loss": 132.9686, + "step": 31810 + }, + { + "epoch": 0.12855682639980284, + "grad_norm": 680.7493286132812, + "learning_rate": 3.990939018203723e-05, + "loss": 124.5448, + "step": 31820 + }, + { + "epoch": 0.12859722766517048, + "grad_norm": 1002.3414306640625, + "learning_rate": 3.990912447084576e-05, + "loss": 115.5465, + "step": 31830 + }, + { + "epoch": 0.1286376289305381, + "grad_norm": 883.9556884765625, + "learning_rate": 3.990885837151567e-05, + "loss": 138.4741, + "step": 31840 + }, + { + "epoch": 0.12867803019590573, + "grad_norm": 724.6188354492188, + "learning_rate": 3.990859188405213e-05, + "loss": 142.1841, + "step": 31850 + }, + { + "epoch": 0.12871843146127337, + "grad_norm": 480.30596923828125, + "learning_rate": 3.990832500846034e-05, + "loss": 95.7523, + "step": 31860 + }, + { + "epoch": 0.128758832726641, + "grad_norm": 643.943359375, + "learning_rate": 3.990805774474551e-05, + "loss": 96.5253, + "step": 31870 + }, + { + "epoch": 0.12879923399200863, + "grad_norm": 991.5172119140625, + "learning_rate": 3.990779009291284e-05, + "loss": 132.3297, + "step": 31880 + }, + { + "epoch": 0.12883963525737627, + "grad_norm": 919.9249877929688, + "learning_rate": 3.9907522052967556e-05, + "loss": 101.8825, + "step": 31890 + }, + { + "epoch": 0.12888003652274388, + "grad_norm": 1053.57177734375, + "learning_rate": 3.990725362491488e-05, + "loss": 127.1022, + "step": 31900 + }, + { + "epoch": 0.12892043778811152, + "grad_norm": 1388.8475341796875, + "learning_rate": 3.990698480876005e-05, + "loss": 79.0154, + "step": 31910 + }, + { + "epoch": 0.12896083905347916, + "grad_norm": 1099.4844970703125, + "learning_rate": 3.9906715604508295e-05, + "loss": 129.8633, + "step": 31920 + }, + { + "epoch": 0.1290012403188468, + "grad_norm": 1470.076416015625, + "learning_rate": 3.990644601216487e-05, + "loss": 134.4355, + "step": 31930 + }, + { + "epoch": 0.1290416415842144, + "grad_norm": 372.6900634765625, + "learning_rate": 3.990617603173504e-05, + "loss": 116.0335, + "step": 31940 + }, + { + "epoch": 0.12908204284958205, + "grad_norm": 737.8951416015625, + "learning_rate": 3.9905905663224054e-05, + "loss": 105.1867, + "step": 31950 + }, + { + "epoch": 0.1291224441149497, + "grad_norm": 953.7412719726562, + "learning_rate": 3.9905634906637185e-05, + "loss": 86.5359, + "step": 31960 + }, + { + "epoch": 0.1291628453803173, + "grad_norm": 482.3385925292969, + "learning_rate": 3.9905363761979724e-05, + "loss": 106.8281, + "step": 31970 + }, + { + "epoch": 0.12920324664568494, + "grad_norm": 415.83416748046875, + "learning_rate": 3.9905092229256945e-05, + "loss": 79.0064, + "step": 31980 + }, + { + "epoch": 0.12924364791105258, + "grad_norm": 967.6205444335938, + "learning_rate": 3.9904820308474145e-05, + "loss": 148.0533, + "step": 31990 + }, + { + "epoch": 0.1292840491764202, + "grad_norm": 1125.236328125, + "learning_rate": 3.9904547999636625e-05, + "loss": 135.8924, + "step": 32000 + }, + { + "epoch": 0.12932445044178784, + "grad_norm": 884.6973266601562, + "learning_rate": 3.9904275302749696e-05, + "loss": 156.9301, + "step": 32010 + }, + { + "epoch": 0.12936485170715548, + "grad_norm": 1190.716796875, + "learning_rate": 3.990400221781867e-05, + "loss": 146.7083, + "step": 32020 + }, + { + "epoch": 0.1294052529725231, + "grad_norm": 1096.0106201171875, + "learning_rate": 3.990372874484887e-05, + "loss": 113.6559, + "step": 32030 + }, + { + "epoch": 0.12944565423789073, + "grad_norm": 1505.442626953125, + "learning_rate": 3.9903454883845645e-05, + "loss": 137.5528, + "step": 32040 + }, + { + "epoch": 0.12948605550325837, + "grad_norm": 1529.5111083984375, + "learning_rate": 3.9903180634814304e-05, + "loss": 102.4819, + "step": 32050 + }, + { + "epoch": 0.12952645676862598, + "grad_norm": 4978.8369140625, + "learning_rate": 3.990290599776022e-05, + "loss": 123.1511, + "step": 32060 + }, + { + "epoch": 0.12956685803399362, + "grad_norm": 1162.8614501953125, + "learning_rate": 3.990263097268873e-05, + "loss": 134.065, + "step": 32070 + }, + { + "epoch": 0.12960725929936126, + "grad_norm": 488.1653747558594, + "learning_rate": 3.990235555960521e-05, + "loss": 202.1292, + "step": 32080 + }, + { + "epoch": 0.1296476605647289, + "grad_norm": 524.121337890625, + "learning_rate": 3.990207975851502e-05, + "loss": 124.1722, + "step": 32090 + }, + { + "epoch": 0.1296880618300965, + "grad_norm": 1515.552490234375, + "learning_rate": 3.990180356942353e-05, + "loss": 120.6186, + "step": 32100 + }, + { + "epoch": 0.12972846309546415, + "grad_norm": 1028.3267822265625, + "learning_rate": 3.990152699233614e-05, + "loss": 98.9408, + "step": 32110 + }, + { + "epoch": 0.1297688643608318, + "grad_norm": 1426.6649169921875, + "learning_rate": 3.990125002725824e-05, + "loss": 161.4573, + "step": 32120 + }, + { + "epoch": 0.1298092656261994, + "grad_norm": 1053.0567626953125, + "learning_rate": 3.990097267419522e-05, + "loss": 107.5194, + "step": 32130 + }, + { + "epoch": 0.12984966689156704, + "grad_norm": 504.8323669433594, + "learning_rate": 3.990069493315249e-05, + "loss": 96.8242, + "step": 32140 + }, + { + "epoch": 0.12989006815693468, + "grad_norm": 843.9279174804688, + "learning_rate": 3.990041680413547e-05, + "loss": 93.5062, + "step": 32150 + }, + { + "epoch": 0.1299304694223023, + "grad_norm": 1068.4307861328125, + "learning_rate": 3.9900138287149575e-05, + "loss": 173.6988, + "step": 32160 + }, + { + "epoch": 0.12997087068766994, + "grad_norm": 1699.3997802734375, + "learning_rate": 3.989985938220025e-05, + "loss": 164.1665, + "step": 32170 + }, + { + "epoch": 0.13001127195303758, + "grad_norm": 1452.5870361328125, + "learning_rate": 3.9899580089292904e-05, + "loss": 113.6706, + "step": 32180 + }, + { + "epoch": 0.1300516732184052, + "grad_norm": 1213.0133056640625, + "learning_rate": 3.9899300408433005e-05, + "loss": 118.0306, + "step": 32190 + }, + { + "epoch": 0.13009207448377283, + "grad_norm": 911.9202270507812, + "learning_rate": 3.989902033962601e-05, + "loss": 138.9242, + "step": 32200 + }, + { + "epoch": 0.13013247574914047, + "grad_norm": 1037.6766357421875, + "learning_rate": 3.989873988287736e-05, + "loss": 111.1891, + "step": 32210 + }, + { + "epoch": 0.13017287701450808, + "grad_norm": 860.2420654296875, + "learning_rate": 3.989845903819253e-05, + "loss": 117.9012, + "step": 32220 + }, + { + "epoch": 0.13021327827987572, + "grad_norm": 805.6640014648438, + "learning_rate": 3.9898177805577e-05, + "loss": 108.6428, + "step": 32230 + }, + { + "epoch": 0.13025367954524336, + "grad_norm": 2415.4267578125, + "learning_rate": 3.989789618503624e-05, + "loss": 117.275, + "step": 32240 + }, + { + "epoch": 0.130294080810611, + "grad_norm": 629.11865234375, + "learning_rate": 3.989761417657577e-05, + "loss": 134.3529, + "step": 32250 + }, + { + "epoch": 0.1303344820759786, + "grad_norm": 928.9827270507812, + "learning_rate": 3.989733178020105e-05, + "loss": 78.0116, + "step": 32260 + }, + { + "epoch": 0.13037488334134625, + "grad_norm": 1351.3050537109375, + "learning_rate": 3.989704899591761e-05, + "loss": 89.963, + "step": 32270 + }, + { + "epoch": 0.1304152846067139, + "grad_norm": 930.9780883789062, + "learning_rate": 3.989676582373096e-05, + "loss": 132.8917, + "step": 32280 + }, + { + "epoch": 0.1304556858720815, + "grad_norm": 1168.9969482421875, + "learning_rate": 3.989648226364661e-05, + "loss": 176.8101, + "step": 32290 + }, + { + "epoch": 0.13049608713744915, + "grad_norm": 405.0583801269531, + "learning_rate": 3.98961983156701e-05, + "loss": 72.4666, + "step": 32300 + }, + { + "epoch": 0.13053648840281679, + "grad_norm": 806.337890625, + "learning_rate": 3.9895913979806955e-05, + "loss": 135.5496, + "step": 32310 + }, + { + "epoch": 0.1305768896681844, + "grad_norm": 1002.3172607421875, + "learning_rate": 3.9895629256062726e-05, + "loss": 131.3572, + "step": 32320 + }, + { + "epoch": 0.13061729093355204, + "grad_norm": 1441.03173828125, + "learning_rate": 3.989534414444296e-05, + "loss": 130.9144, + "step": 32330 + }, + { + "epoch": 0.13065769219891968, + "grad_norm": 789.7969360351562, + "learning_rate": 3.989505864495322e-05, + "loss": 85.2817, + "step": 32340 + }, + { + "epoch": 0.1306980934642873, + "grad_norm": 3201.517822265625, + "learning_rate": 3.989477275759907e-05, + "loss": 119.8846, + "step": 32350 + }, + { + "epoch": 0.13073849472965493, + "grad_norm": 2336.458984375, + "learning_rate": 3.989448648238608e-05, + "loss": 202.2657, + "step": 32360 + }, + { + "epoch": 0.13077889599502257, + "grad_norm": 639.8535766601562, + "learning_rate": 3.989419981931984e-05, + "loss": 86.1869, + "step": 32370 + }, + { + "epoch": 0.13081929726039018, + "grad_norm": 542.9017333984375, + "learning_rate": 3.989391276840592e-05, + "loss": 133.2799, + "step": 32380 + }, + { + "epoch": 0.13085969852575782, + "grad_norm": 1021.6851806640625, + "learning_rate": 3.989362532964994e-05, + "loss": 115.8828, + "step": 32390 + }, + { + "epoch": 0.13090009979112546, + "grad_norm": 732.2811279296875, + "learning_rate": 3.989333750305749e-05, + "loss": 148.0998, + "step": 32400 + }, + { + "epoch": 0.1309405010564931, + "grad_norm": 1172.5616455078125, + "learning_rate": 3.9893049288634174e-05, + "loss": 149.8155, + "step": 32410 + }, + { + "epoch": 0.13098090232186071, + "grad_norm": 1125.781005859375, + "learning_rate": 3.989276068638563e-05, + "loss": 104.2196, + "step": 32420 + }, + { + "epoch": 0.13102130358722835, + "grad_norm": 757.4611206054688, + "learning_rate": 3.9892471696317467e-05, + "loss": 133.4355, + "step": 32430 + }, + { + "epoch": 0.131061704852596, + "grad_norm": 845.2111206054688, + "learning_rate": 3.9892182318435336e-05, + "loss": 119.0212, + "step": 32440 + }, + { + "epoch": 0.1311021061179636, + "grad_norm": 1066.4530029296875, + "learning_rate": 3.989189255274487e-05, + "loss": 121.471, + "step": 32450 + }, + { + "epoch": 0.13114250738333125, + "grad_norm": 930.4403686523438, + "learning_rate": 3.989160239925171e-05, + "loss": 164.7945, + "step": 32460 + }, + { + "epoch": 0.1311829086486989, + "grad_norm": 815.77978515625, + "learning_rate": 3.989131185796153e-05, + "loss": 130.6201, + "step": 32470 + }, + { + "epoch": 0.1312233099140665, + "grad_norm": 997.6732788085938, + "learning_rate": 3.989102092887997e-05, + "loss": 95.1348, + "step": 32480 + }, + { + "epoch": 0.13126371117943414, + "grad_norm": 1120.035400390625, + "learning_rate": 3.9890729612012726e-05, + "loss": 129.8036, + "step": 32490 + }, + { + "epoch": 0.13130411244480178, + "grad_norm": 1269.23779296875, + "learning_rate": 3.989043790736547e-05, + "loss": 153.4209, + "step": 32500 + }, + { + "epoch": 0.1313445137101694, + "grad_norm": 2129.856201171875, + "learning_rate": 3.989014581494388e-05, + "loss": 148.8988, + "step": 32510 + }, + { + "epoch": 0.13138491497553703, + "grad_norm": 822.850830078125, + "learning_rate": 3.9889853334753666e-05, + "loss": 119.3159, + "step": 32520 + }, + { + "epoch": 0.13142531624090467, + "grad_norm": 907.6431274414062, + "learning_rate": 3.988956046680051e-05, + "loss": 189.7014, + "step": 32530 + }, + { + "epoch": 0.13146571750627228, + "grad_norm": 2083.8251953125, + "learning_rate": 3.9889267211090145e-05, + "loss": 154.3238, + "step": 32540 + }, + { + "epoch": 0.13150611877163992, + "grad_norm": 1202.81591796875, + "learning_rate": 3.988897356762827e-05, + "loss": 107.1396, + "step": 32550 + }, + { + "epoch": 0.13154652003700756, + "grad_norm": 3312.362548828125, + "learning_rate": 3.988867953642062e-05, + "loss": 163.3171, + "step": 32560 + }, + { + "epoch": 0.1315869213023752, + "grad_norm": 422.867431640625, + "learning_rate": 3.9888385117472914e-05, + "loss": 106.9872, + "step": 32570 + }, + { + "epoch": 0.13162732256774282, + "grad_norm": 1280.3533935546875, + "learning_rate": 3.9888090310790904e-05, + "loss": 120.0603, + "step": 32580 + }, + { + "epoch": 0.13166772383311046, + "grad_norm": 604.4456176757812, + "learning_rate": 3.9887795116380336e-05, + "loss": 132.4813, + "step": 32590 + }, + { + "epoch": 0.1317081250984781, + "grad_norm": 1531.5316162109375, + "learning_rate": 3.988749953424696e-05, + "loss": 123.8846, + "step": 32600 + }, + { + "epoch": 0.1317485263638457, + "grad_norm": 657.6782836914062, + "learning_rate": 3.988720356439655e-05, + "loss": 162.9868, + "step": 32610 + }, + { + "epoch": 0.13178892762921335, + "grad_norm": 1038.39306640625, + "learning_rate": 3.988690720683486e-05, + "loss": 144.4954, + "step": 32620 + }, + { + "epoch": 0.131829328894581, + "grad_norm": 431.6632080078125, + "learning_rate": 3.988661046156768e-05, + "loss": 142.1506, + "step": 32630 + }, + { + "epoch": 0.1318697301599486, + "grad_norm": 504.2084655761719, + "learning_rate": 3.988631332860079e-05, + "loss": 125.2048, + "step": 32640 + }, + { + "epoch": 0.13191013142531624, + "grad_norm": 807.3241577148438, + "learning_rate": 3.988601580793998e-05, + "loss": 138.2131, + "step": 32650 + }, + { + "epoch": 0.13195053269068388, + "grad_norm": 1043.9158935546875, + "learning_rate": 3.988571789959106e-05, + "loss": 142.3187, + "step": 32660 + }, + { + "epoch": 0.1319909339560515, + "grad_norm": 641.268798828125, + "learning_rate": 3.988541960355982e-05, + "loss": 98.4692, + "step": 32670 + }, + { + "epoch": 0.13203133522141913, + "grad_norm": 760.7156982421875, + "learning_rate": 3.98851209198521e-05, + "loss": 91.766, + "step": 32680 + }, + { + "epoch": 0.13207173648678677, + "grad_norm": 621.8873291015625, + "learning_rate": 3.988482184847371e-05, + "loss": 105.6898, + "step": 32690 + }, + { + "epoch": 0.13211213775215438, + "grad_norm": 589.4898681640625, + "learning_rate": 3.988452238943047e-05, + "loss": 123.0693, + "step": 32700 + }, + { + "epoch": 0.13215253901752202, + "grad_norm": 1140.7652587890625, + "learning_rate": 3.9884222542728236e-05, + "loss": 107.226, + "step": 32710 + }, + { + "epoch": 0.13219294028288966, + "grad_norm": 1137.0020751953125, + "learning_rate": 3.988392230837285e-05, + "loss": 160.6211, + "step": 32720 + }, + { + "epoch": 0.1322333415482573, + "grad_norm": 862.18408203125, + "learning_rate": 3.988362168637015e-05, + "loss": 124.0001, + "step": 32730 + }, + { + "epoch": 0.13227374281362492, + "grad_norm": 4666.337890625, + "learning_rate": 3.9883320676726015e-05, + "loss": 104.9396, + "step": 32740 + }, + { + "epoch": 0.13231414407899256, + "grad_norm": 1240.4056396484375, + "learning_rate": 3.988301927944631e-05, + "loss": 141.3902, + "step": 32750 + }, + { + "epoch": 0.1323545453443602, + "grad_norm": 911.8882446289062, + "learning_rate": 3.988271749453691e-05, + "loss": 172.4724, + "step": 32760 + }, + { + "epoch": 0.1323949466097278, + "grad_norm": 956.4234619140625, + "learning_rate": 3.988241532200369e-05, + "loss": 118.4634, + "step": 32770 + }, + { + "epoch": 0.13243534787509545, + "grad_norm": 1146.9935302734375, + "learning_rate": 3.9882112761852544e-05, + "loss": 104.8114, + "step": 32780 + }, + { + "epoch": 0.1324757491404631, + "grad_norm": 1518.0933837890625, + "learning_rate": 3.9881809814089376e-05, + "loss": 143.4335, + "step": 32790 + }, + { + "epoch": 0.1325161504058307, + "grad_norm": 528.15576171875, + "learning_rate": 3.9881506478720095e-05, + "loss": 105.5051, + "step": 32800 + }, + { + "epoch": 0.13255655167119834, + "grad_norm": 1856.3134765625, + "learning_rate": 3.9881202755750604e-05, + "loss": 129.2274, + "step": 32810 + }, + { + "epoch": 0.13259695293656598, + "grad_norm": 432.3136901855469, + "learning_rate": 3.988089864518683e-05, + "loss": 104.9324, + "step": 32820 + }, + { + "epoch": 0.1326373542019336, + "grad_norm": 696.2965698242188, + "learning_rate": 3.988059414703471e-05, + "loss": 118.6406, + "step": 32830 + }, + { + "epoch": 0.13267775546730123, + "grad_norm": 1090.203125, + "learning_rate": 3.9880289261300166e-05, + "loss": 182.2755, + "step": 32840 + }, + { + "epoch": 0.13271815673266887, + "grad_norm": 1153.8629150390625, + "learning_rate": 3.987998398798914e-05, + "loss": 175.9572, + "step": 32850 + }, + { + "epoch": 0.13275855799803649, + "grad_norm": 846.61474609375, + "learning_rate": 3.987967832710761e-05, + "loss": 141.9597, + "step": 32860 + }, + { + "epoch": 0.13279895926340413, + "grad_norm": 553.5564575195312, + "learning_rate": 3.98793722786615e-05, + "loss": 109.3537, + "step": 32870 + }, + { + "epoch": 0.13283936052877177, + "grad_norm": 1201.911865234375, + "learning_rate": 3.9879065842656796e-05, + "loss": 101.3175, + "step": 32880 + }, + { + "epoch": 0.1328797617941394, + "grad_norm": 1346.0640869140625, + "learning_rate": 3.987875901909947e-05, + "loss": 149.1813, + "step": 32890 + }, + { + "epoch": 0.13292016305950702, + "grad_norm": 757.3881225585938, + "learning_rate": 3.9878451807995496e-05, + "loss": 106.9992, + "step": 32900 + }, + { + "epoch": 0.13296056432487466, + "grad_norm": 991.7479858398438, + "learning_rate": 3.987814420935088e-05, + "loss": 94.5428, + "step": 32910 + }, + { + "epoch": 0.1330009655902423, + "grad_norm": 795.5956420898438, + "learning_rate": 3.987783622317161e-05, + "loss": 106.2584, + "step": 32920 + }, + { + "epoch": 0.1330413668556099, + "grad_norm": 983.3010864257812, + "learning_rate": 3.987752784946368e-05, + "loss": 155.4717, + "step": 32930 + }, + { + "epoch": 0.13308176812097755, + "grad_norm": 586.8764038085938, + "learning_rate": 3.9877219088233115e-05, + "loss": 134.5889, + "step": 32940 + }, + { + "epoch": 0.1331221693863452, + "grad_norm": 951.673583984375, + "learning_rate": 3.987690993948594e-05, + "loss": 132.4143, + "step": 32950 + }, + { + "epoch": 0.1331625706517128, + "grad_norm": 511.3675842285156, + "learning_rate": 3.9876600403228154e-05, + "loss": 105.5843, + "step": 32960 + }, + { + "epoch": 0.13320297191708044, + "grad_norm": 591.7822265625, + "learning_rate": 3.987629047946582e-05, + "loss": 121.6479, + "step": 32970 + }, + { + "epoch": 0.13324337318244808, + "grad_norm": 851.306884765625, + "learning_rate": 3.9875980168204976e-05, + "loss": 126.3882, + "step": 32980 + }, + { + "epoch": 0.1332837744478157, + "grad_norm": 800.1739501953125, + "learning_rate": 3.987566946945166e-05, + "loss": 103.6379, + "step": 32990 + }, + { + "epoch": 0.13332417571318333, + "grad_norm": 1116.7933349609375, + "learning_rate": 3.987535838321193e-05, + "loss": 125.1347, + "step": 33000 + }, + { + "epoch": 0.13336457697855097, + "grad_norm": 409.66534423828125, + "learning_rate": 3.987504690949186e-05, + "loss": 116.4005, + "step": 33010 + }, + { + "epoch": 0.1334049782439186, + "grad_norm": 556.071533203125, + "learning_rate": 3.987473504829752e-05, + "loss": 91.4894, + "step": 33020 + }, + { + "epoch": 0.13344537950928623, + "grad_norm": 1512.168701171875, + "learning_rate": 3.987442279963499e-05, + "loss": 94.1634, + "step": 33030 + }, + { + "epoch": 0.13348578077465387, + "grad_norm": 616.458984375, + "learning_rate": 3.9874110163510345e-05, + "loss": 183.157, + "step": 33040 + }, + { + "epoch": 0.1335261820400215, + "grad_norm": 1501.3927001953125, + "learning_rate": 3.98737971399297e-05, + "loss": 93.1757, + "step": 33050 + }, + { + "epoch": 0.13356658330538912, + "grad_norm": 937.7486572265625, + "learning_rate": 3.987348372889915e-05, + "loss": 138.0116, + "step": 33060 + }, + { + "epoch": 0.13360698457075676, + "grad_norm": 827.442138671875, + "learning_rate": 3.9873169930424796e-05, + "loss": 109.7301, + "step": 33070 + }, + { + "epoch": 0.1336473858361244, + "grad_norm": 711.6142578125, + "learning_rate": 3.987285574451276e-05, + "loss": 141.4624, + "step": 33080 + }, + { + "epoch": 0.133687787101492, + "grad_norm": 604.0659790039062, + "learning_rate": 3.987254117116918e-05, + "loss": 130.6602, + "step": 33090 + }, + { + "epoch": 0.13372818836685965, + "grad_norm": 756.3602905273438, + "learning_rate": 3.987222621040017e-05, + "loss": 164.283, + "step": 33100 + }, + { + "epoch": 0.1337685896322273, + "grad_norm": 537.4063720703125, + "learning_rate": 3.987191086221189e-05, + "loss": 126.9599, + "step": 33110 + }, + { + "epoch": 0.1338089908975949, + "grad_norm": 799.9998168945312, + "learning_rate": 3.9871595126610466e-05, + "loss": 111.4898, + "step": 33120 + }, + { + "epoch": 0.13384939216296254, + "grad_norm": 1364.9775390625, + "learning_rate": 3.987127900360207e-05, + "loss": 128.8734, + "step": 33130 + }, + { + "epoch": 0.13388979342833018, + "grad_norm": 1269.8323974609375, + "learning_rate": 3.9870962493192856e-05, + "loss": 123.7381, + "step": 33140 + }, + { + "epoch": 0.1339301946936978, + "grad_norm": 2066.228759765625, + "learning_rate": 3.9870645595389e-05, + "loss": 119.1901, + "step": 33150 + }, + { + "epoch": 0.13397059595906544, + "grad_norm": 506.2632141113281, + "learning_rate": 3.987032831019668e-05, + "loss": 150.6209, + "step": 33160 + }, + { + "epoch": 0.13401099722443308, + "grad_norm": 882.3668212890625, + "learning_rate": 3.987001063762208e-05, + "loss": 100.7261, + "step": 33170 + }, + { + "epoch": 0.1340513984898007, + "grad_norm": 692.6370239257812, + "learning_rate": 3.986969257767139e-05, + "loss": 98.7437, + "step": 33180 + }, + { + "epoch": 0.13409179975516833, + "grad_norm": 1298.2685546875, + "learning_rate": 3.9869374130350805e-05, + "loss": 101.4399, + "step": 33190 + }, + { + "epoch": 0.13413220102053597, + "grad_norm": 1227.4071044921875, + "learning_rate": 3.986905529566655e-05, + "loss": 104.891, + "step": 33200 + }, + { + "epoch": 0.1341726022859036, + "grad_norm": 780.17236328125, + "learning_rate": 3.986873607362484e-05, + "loss": 76.5751, + "step": 33210 + }, + { + "epoch": 0.13421300355127122, + "grad_norm": 1044.523193359375, + "learning_rate": 3.986841646423188e-05, + "loss": 76.5698, + "step": 33220 + }, + { + "epoch": 0.13425340481663886, + "grad_norm": 481.7081604003906, + "learning_rate": 3.9868096467493924e-05, + "loss": 132.5312, + "step": 33230 + }, + { + "epoch": 0.1342938060820065, + "grad_norm": 822.1024169921875, + "learning_rate": 3.9867776083417186e-05, + "loss": 103.4199, + "step": 33240 + }, + { + "epoch": 0.1343342073473741, + "grad_norm": 1468.8111572265625, + "learning_rate": 3.986745531200793e-05, + "loss": 121.0184, + "step": 33250 + }, + { + "epoch": 0.13437460861274175, + "grad_norm": 1686.716796875, + "learning_rate": 3.9867134153272404e-05, + "loss": 113.0985, + "step": 33260 + }, + { + "epoch": 0.1344150098781094, + "grad_norm": 688.4912109375, + "learning_rate": 3.9866812607216875e-05, + "loss": 117.663, + "step": 33270 + }, + { + "epoch": 0.134455411143477, + "grad_norm": 673.8190307617188, + "learning_rate": 3.9866490673847596e-05, + "loss": 161.9355, + "step": 33280 + }, + { + "epoch": 0.13449581240884464, + "grad_norm": 641.7383422851562, + "learning_rate": 3.9866168353170855e-05, + "loss": 103.2926, + "step": 33290 + }, + { + "epoch": 0.13453621367421228, + "grad_norm": 1211.460693359375, + "learning_rate": 3.986584564519294e-05, + "loss": 117.979, + "step": 33300 + }, + { + "epoch": 0.1345766149395799, + "grad_norm": 1202.5511474609375, + "learning_rate": 3.9865522549920135e-05, + "loss": 119.6262, + "step": 33310 + }, + { + "epoch": 0.13461701620494754, + "grad_norm": 651.3480224609375, + "learning_rate": 3.986519906735874e-05, + "loss": 170.3959, + "step": 33320 + }, + { + "epoch": 0.13465741747031518, + "grad_norm": 398.9108581542969, + "learning_rate": 3.986487519751506e-05, + "loss": 95.997, + "step": 33330 + }, + { + "epoch": 0.1346978187356828, + "grad_norm": 1876.97900390625, + "learning_rate": 3.9864550940395413e-05, + "loss": 194.9765, + "step": 33340 + }, + { + "epoch": 0.13473822000105043, + "grad_norm": 678.1907958984375, + "learning_rate": 3.9864226296006114e-05, + "loss": 128.3915, + "step": 33350 + }, + { + "epoch": 0.13477862126641807, + "grad_norm": 835.3662109375, + "learning_rate": 3.986390126435351e-05, + "loss": 112.9798, + "step": 33360 + }, + { + "epoch": 0.1348190225317857, + "grad_norm": 388.36834716796875, + "learning_rate": 3.986357584544391e-05, + "loss": 96.5176, + "step": 33370 + }, + { + "epoch": 0.13485942379715332, + "grad_norm": 773.1068115234375, + "learning_rate": 3.986325003928367e-05, + "loss": 93.5986, + "step": 33380 + }, + { + "epoch": 0.13489982506252096, + "grad_norm": 523.893310546875, + "learning_rate": 3.986292384587916e-05, + "loss": 93.4795, + "step": 33390 + }, + { + "epoch": 0.1349402263278886, + "grad_norm": 971.8486938476562, + "learning_rate": 3.986259726523671e-05, + "loss": 121.8176, + "step": 33400 + }, + { + "epoch": 0.1349806275932562, + "grad_norm": 441.7010192871094, + "learning_rate": 3.9862270297362704e-05, + "loss": 96.584, + "step": 33410 + }, + { + "epoch": 0.13502102885862385, + "grad_norm": 4621.88818359375, + "learning_rate": 3.986194294226351e-05, + "loss": 173.9111, + "step": 33420 + }, + { + "epoch": 0.1350614301239915, + "grad_norm": 908.2979736328125, + "learning_rate": 3.986161519994552e-05, + "loss": 190.957, + "step": 33430 + }, + { + "epoch": 0.1351018313893591, + "grad_norm": 983.3013916015625, + "learning_rate": 3.986128707041511e-05, + "loss": 123.9177, + "step": 33440 + }, + { + "epoch": 0.13514223265472675, + "grad_norm": 872.275146484375, + "learning_rate": 3.9860958553678694e-05, + "loss": 155.9396, + "step": 33450 + }, + { + "epoch": 0.13518263392009439, + "grad_norm": 1801.795166015625, + "learning_rate": 3.986062964974265e-05, + "loss": 111.1027, + "step": 33460 + }, + { + "epoch": 0.135223035185462, + "grad_norm": 1770.11865234375, + "learning_rate": 3.9860300358613416e-05, + "loss": 124.9061, + "step": 33470 + }, + { + "epoch": 0.13526343645082964, + "grad_norm": 552.3416137695312, + "learning_rate": 3.98599706802974e-05, + "loss": 90.7228, + "step": 33480 + }, + { + "epoch": 0.13530383771619728, + "grad_norm": 1061.998291015625, + "learning_rate": 3.985964061480103e-05, + "loss": 101.6499, + "step": 33490 + }, + { + "epoch": 0.1353442389815649, + "grad_norm": 583.8433227539062, + "learning_rate": 3.985931016213074e-05, + "loss": 78.8717, + "step": 33500 + }, + { + "epoch": 0.13538464024693253, + "grad_norm": 539.7920532226562, + "learning_rate": 3.985897932229298e-05, + "loss": 157.5889, + "step": 33510 + }, + { + "epoch": 0.13542504151230017, + "grad_norm": 1149.037841796875, + "learning_rate": 3.985864809529419e-05, + "loss": 170.6571, + "step": 33520 + }, + { + "epoch": 0.13546544277766778, + "grad_norm": 642.4573974609375, + "learning_rate": 3.985831648114083e-05, + "loss": 138.4445, + "step": 33530 + }, + { + "epoch": 0.13550584404303542, + "grad_norm": 666.746826171875, + "learning_rate": 3.985798447983937e-05, + "loss": 115.0262, + "step": 33540 + }, + { + "epoch": 0.13554624530840306, + "grad_norm": 594.228515625, + "learning_rate": 3.9857652091396276e-05, + "loss": 91.7834, + "step": 33550 + }, + { + "epoch": 0.1355866465737707, + "grad_norm": 1604.103271484375, + "learning_rate": 3.985731931581804e-05, + "loss": 123.3796, + "step": 33560 + }, + { + "epoch": 0.13562704783913831, + "grad_norm": 1439.8311767578125, + "learning_rate": 3.985698615311113e-05, + "loss": 87.9761, + "step": 33570 + }, + { + "epoch": 0.13566744910450595, + "grad_norm": 1064.0653076171875, + "learning_rate": 3.985665260328205e-05, + "loss": 141.0082, + "step": 33580 + }, + { + "epoch": 0.1357078503698736, + "grad_norm": 934.2000122070312, + "learning_rate": 3.985631866633731e-05, + "loss": 121.5419, + "step": 33590 + }, + { + "epoch": 0.1357482516352412, + "grad_norm": 675.2017822265625, + "learning_rate": 3.9855984342283414e-05, + "loss": 131.2815, + "step": 33600 + }, + { + "epoch": 0.13578865290060885, + "grad_norm": 829.5675048828125, + "learning_rate": 3.9855649631126884e-05, + "loss": 142.7964, + "step": 33610 + }, + { + "epoch": 0.1358290541659765, + "grad_norm": 597.209716796875, + "learning_rate": 3.985531453287424e-05, + "loss": 111.9063, + "step": 33620 + }, + { + "epoch": 0.1358694554313441, + "grad_norm": 328.68450927734375, + "learning_rate": 3.985497904753201e-05, + "loss": 113.645, + "step": 33630 + }, + { + "epoch": 0.13590985669671174, + "grad_norm": 1595.978271484375, + "learning_rate": 3.9854643175106756e-05, + "loss": 136.7034, + "step": 33640 + }, + { + "epoch": 0.13595025796207938, + "grad_norm": 867.8397827148438, + "learning_rate": 3.9854306915605e-05, + "loss": 170.0007, + "step": 33650 + }, + { + "epoch": 0.135990659227447, + "grad_norm": 978.2813720703125, + "learning_rate": 3.985397026903331e-05, + "loss": 113.894, + "step": 33660 + }, + { + "epoch": 0.13603106049281463, + "grad_norm": 1143.8638916015625, + "learning_rate": 3.985363323539825e-05, + "loss": 133.1518, + "step": 33670 + }, + { + "epoch": 0.13607146175818227, + "grad_norm": 1719.9393310546875, + "learning_rate": 3.9853295814706395e-05, + "loss": 139.8665, + "step": 33680 + }, + { + "epoch": 0.13611186302354988, + "grad_norm": 684.0253295898438, + "learning_rate": 3.98529580069643e-05, + "loss": 149.6588, + "step": 33690 + }, + { + "epoch": 0.13615226428891752, + "grad_norm": 777.6710205078125, + "learning_rate": 3.985261981217858e-05, + "loss": 100.7163, + "step": 33700 + }, + { + "epoch": 0.13619266555428516, + "grad_norm": 1301.423828125, + "learning_rate": 3.985228123035582e-05, + "loss": 94.2536, + "step": 33710 + }, + { + "epoch": 0.1362330668196528, + "grad_norm": 423.1181945800781, + "learning_rate": 3.98519422615026e-05, + "loss": 166.1481, + "step": 33720 + }, + { + "epoch": 0.13627346808502042, + "grad_norm": 933.4210815429688, + "learning_rate": 3.985160290562556e-05, + "loss": 168.0102, + "step": 33730 + }, + { + "epoch": 0.13631386935038806, + "grad_norm": 1161.91796875, + "learning_rate": 3.98512631627313e-05, + "loss": 114.1884, + "step": 33740 + }, + { + "epoch": 0.1363542706157557, + "grad_norm": 562.5159912109375, + "learning_rate": 3.985092303282645e-05, + "loss": 93.2983, + "step": 33750 + }, + { + "epoch": 0.1363946718811233, + "grad_norm": 996.3164672851562, + "learning_rate": 3.985058251591762e-05, + "loss": 111.7151, + "step": 33760 + }, + { + "epoch": 0.13643507314649095, + "grad_norm": 777.645263671875, + "learning_rate": 3.985024161201147e-05, + "loss": 115.3055, + "step": 33770 + }, + { + "epoch": 0.1364754744118586, + "grad_norm": 3154.5859375, + "learning_rate": 3.984990032111465e-05, + "loss": 166.999, + "step": 33780 + }, + { + "epoch": 0.1365158756772262, + "grad_norm": 2507.794921875, + "learning_rate": 3.984955864323379e-05, + "loss": 102.8698, + "step": 33790 + }, + { + "epoch": 0.13655627694259384, + "grad_norm": 942.3387451171875, + "learning_rate": 3.984921657837557e-05, + "loss": 136.6005, + "step": 33800 + }, + { + "epoch": 0.13659667820796148, + "grad_norm": 1606.1588134765625, + "learning_rate": 3.9848874126546654e-05, + "loss": 213.0493, + "step": 33810 + }, + { + "epoch": 0.1366370794733291, + "grad_norm": 789.1229858398438, + "learning_rate": 3.984853128775372e-05, + "loss": 125.0044, + "step": 33820 + }, + { + "epoch": 0.13667748073869673, + "grad_norm": 1186.13134765625, + "learning_rate": 3.9848188062003445e-05, + "loss": 159.3721, + "step": 33830 + }, + { + "epoch": 0.13671788200406437, + "grad_norm": 853.1769409179688, + "learning_rate": 3.984784444930253e-05, + "loss": 138.1079, + "step": 33840 + }, + { + "epoch": 0.13675828326943198, + "grad_norm": 654.4014282226562, + "learning_rate": 3.9847500449657666e-05, + "loss": 121.9892, + "step": 33850 + }, + { + "epoch": 0.13679868453479962, + "grad_norm": 1034.2353515625, + "learning_rate": 3.984715606307556e-05, + "loss": 105.5543, + "step": 33860 + }, + { + "epoch": 0.13683908580016726, + "grad_norm": 415.8559265136719, + "learning_rate": 3.984681128956294e-05, + "loss": 87.9513, + "step": 33870 + }, + { + "epoch": 0.1368794870655349, + "grad_norm": 561.7958984375, + "learning_rate": 3.9846466129126506e-05, + "loss": 197.0923, + "step": 33880 + }, + { + "epoch": 0.13691988833090252, + "grad_norm": 591.2171020507812, + "learning_rate": 3.9846120581773e-05, + "loss": 131.7922, + "step": 33890 + }, + { + "epoch": 0.13696028959627016, + "grad_norm": 419.35467529296875, + "learning_rate": 3.984577464750916e-05, + "loss": 110.4065, + "step": 33900 + }, + { + "epoch": 0.1370006908616378, + "grad_norm": 800.630126953125, + "learning_rate": 3.984542832634172e-05, + "loss": 166.8054, + "step": 33910 + }, + { + "epoch": 0.1370410921270054, + "grad_norm": 1054.1109619140625, + "learning_rate": 3.984508161827743e-05, + "loss": 157.2656, + "step": 33920 + }, + { + "epoch": 0.13708149339237305, + "grad_norm": 818.2747192382812, + "learning_rate": 3.984473452332307e-05, + "loss": 92.2328, + "step": 33930 + }, + { + "epoch": 0.1371218946577407, + "grad_norm": 1025.84765625, + "learning_rate": 3.9844387041485396e-05, + "loss": 144.3224, + "step": 33940 + }, + { + "epoch": 0.1371622959231083, + "grad_norm": 826.8859252929688, + "learning_rate": 3.9844039172771174e-05, + "loss": 100.5081, + "step": 33950 + }, + { + "epoch": 0.13720269718847594, + "grad_norm": 1066.3660888671875, + "learning_rate": 3.984369091718719e-05, + "loss": 136.5637, + "step": 33960 + }, + { + "epoch": 0.13724309845384358, + "grad_norm": 872.8981323242188, + "learning_rate": 3.984334227474023e-05, + "loss": 149.965, + "step": 33970 + }, + { + "epoch": 0.1372834997192112, + "grad_norm": 1562.130126953125, + "learning_rate": 3.984299324543711e-05, + "loss": 148.4507, + "step": 33980 + }, + { + "epoch": 0.13732390098457883, + "grad_norm": 1345.7823486328125, + "learning_rate": 3.984264382928461e-05, + "loss": 140.8162, + "step": 33990 + }, + { + "epoch": 0.13736430224994647, + "grad_norm": 1284.1568603515625, + "learning_rate": 3.9842294026289565e-05, + "loss": 161.3053, + "step": 34000 + }, + { + "epoch": 0.13740470351531409, + "grad_norm": 677.9437866210938, + "learning_rate": 3.984194383645877e-05, + "loss": 87.6147, + "step": 34010 + }, + { + "epoch": 0.13744510478068173, + "grad_norm": 852.3792114257812, + "learning_rate": 3.984159325979907e-05, + "loss": 144.1161, + "step": 34020 + }, + { + "epoch": 0.13748550604604937, + "grad_norm": 686.9984741210938, + "learning_rate": 3.9841242296317294e-05, + "loss": 102.9961, + "step": 34030 + }, + { + "epoch": 0.137525907311417, + "grad_norm": 1413.830322265625, + "learning_rate": 3.984089094602028e-05, + "loss": 72.2867, + "step": 34040 + }, + { + "epoch": 0.13756630857678462, + "grad_norm": 1061.6917724609375, + "learning_rate": 3.984053920891489e-05, + "loss": 139.7584, + "step": 34050 + }, + { + "epoch": 0.13760670984215226, + "grad_norm": 660.1600952148438, + "learning_rate": 3.9840187085007965e-05, + "loss": 137.5648, + "step": 34060 + }, + { + "epoch": 0.1376471111075199, + "grad_norm": 766.5801391601562, + "learning_rate": 3.983983457430639e-05, + "loss": 126.6397, + "step": 34070 + }, + { + "epoch": 0.1376875123728875, + "grad_norm": 631.565185546875, + "learning_rate": 3.983948167681701e-05, + "loss": 95.2355, + "step": 34080 + }, + { + "epoch": 0.13772791363825515, + "grad_norm": 1629.2388916015625, + "learning_rate": 3.983912839254673e-05, + "loss": 138.4958, + "step": 34090 + }, + { + "epoch": 0.1377683149036228, + "grad_norm": 906.3154296875, + "learning_rate": 3.983877472150243e-05, + "loss": 96.9015, + "step": 34100 + }, + { + "epoch": 0.1378087161689904, + "grad_norm": 1137.58642578125, + "learning_rate": 3.9838420663691e-05, + "loss": 131.9473, + "step": 34110 + }, + { + "epoch": 0.13784911743435804, + "grad_norm": 1249.020263671875, + "learning_rate": 3.9838066219119354e-05, + "loss": 123.9882, + "step": 34120 + }, + { + "epoch": 0.13788951869972568, + "grad_norm": 779.7269287109375, + "learning_rate": 3.983771138779438e-05, + "loss": 111.0432, + "step": 34130 + }, + { + "epoch": 0.1379299199650933, + "grad_norm": 6269.646484375, + "learning_rate": 3.983735616972301e-05, + "loss": 201.9346, + "step": 34140 + }, + { + "epoch": 0.13797032123046093, + "grad_norm": 672.8115234375, + "learning_rate": 3.983700056491218e-05, + "loss": 96.2786, + "step": 34150 + }, + { + "epoch": 0.13801072249582857, + "grad_norm": 825.7264404296875, + "learning_rate": 3.9836644573368804e-05, + "loss": 179.1345, + "step": 34160 + }, + { + "epoch": 0.1380511237611962, + "grad_norm": 891.5259399414062, + "learning_rate": 3.9836288195099834e-05, + "loss": 151.6588, + "step": 34170 + }, + { + "epoch": 0.13809152502656383, + "grad_norm": 744.1069946289062, + "learning_rate": 3.983593143011221e-05, + "loss": 115.4225, + "step": 34180 + }, + { + "epoch": 0.13813192629193147, + "grad_norm": 1435.605224609375, + "learning_rate": 3.983557427841289e-05, + "loss": 161.962, + "step": 34190 + }, + { + "epoch": 0.1381723275572991, + "grad_norm": 1229.789306640625, + "learning_rate": 3.9835216740008835e-05, + "loss": 104.1514, + "step": 34200 + }, + { + "epoch": 0.13821272882266672, + "grad_norm": 1522.371826171875, + "learning_rate": 3.983485881490702e-05, + "loss": 156.9785, + "step": 34210 + }, + { + "epoch": 0.13825313008803436, + "grad_norm": 647.7590942382812, + "learning_rate": 3.983450050311442e-05, + "loss": 98.0584, + "step": 34220 + }, + { + "epoch": 0.138293531353402, + "grad_norm": 948.390625, + "learning_rate": 3.9834141804638024e-05, + "loss": 126.608, + "step": 34230 + }, + { + "epoch": 0.1383339326187696, + "grad_norm": 635.7077026367188, + "learning_rate": 3.983378271948482e-05, + "loss": 97.3255, + "step": 34240 + }, + { + "epoch": 0.13837433388413725, + "grad_norm": 1193.080810546875, + "learning_rate": 3.983342324766181e-05, + "loss": 118.5187, + "step": 34250 + }, + { + "epoch": 0.1384147351495049, + "grad_norm": 617.1396484375, + "learning_rate": 3.9833063389176005e-05, + "loss": 171.116, + "step": 34260 + }, + { + "epoch": 0.1384551364148725, + "grad_norm": 583.3624267578125, + "learning_rate": 3.983270314403442e-05, + "loss": 112.251, + "step": 34270 + }, + { + "epoch": 0.13849553768024014, + "grad_norm": 728.6265258789062, + "learning_rate": 3.983234251224407e-05, + "loss": 116.511, + "step": 34280 + }, + { + "epoch": 0.13853593894560778, + "grad_norm": 2527.92236328125, + "learning_rate": 3.9831981493812e-05, + "loss": 119.5826, + "step": 34290 + }, + { + "epoch": 0.1385763402109754, + "grad_norm": 682.3001098632812, + "learning_rate": 3.9831620088745236e-05, + "loss": 144.4016, + "step": 34300 + }, + { + "epoch": 0.13861674147634304, + "grad_norm": 1025.076171875, + "learning_rate": 3.983125829705083e-05, + "loss": 182.236, + "step": 34310 + }, + { + "epoch": 0.13865714274171068, + "grad_norm": 929.4486694335938, + "learning_rate": 3.983089611873583e-05, + "loss": 92.4219, + "step": 34320 + }, + { + "epoch": 0.1386975440070783, + "grad_norm": 3130.80224609375, + "learning_rate": 3.9830533553807306e-05, + "loss": 153.2803, + "step": 34330 + }, + { + "epoch": 0.13873794527244593, + "grad_norm": 405.67132568359375, + "learning_rate": 3.9830170602272316e-05, + "loss": 124.5212, + "step": 34340 + }, + { + "epoch": 0.13877834653781357, + "grad_norm": 656.3815307617188, + "learning_rate": 3.982980726413795e-05, + "loss": 94.5574, + "step": 34350 + }, + { + "epoch": 0.1388187478031812, + "grad_norm": 413.3248596191406, + "learning_rate": 3.9829443539411275e-05, + "loss": 126.9493, + "step": 34360 + }, + { + "epoch": 0.13885914906854882, + "grad_norm": 776.604248046875, + "learning_rate": 3.9829079428099386e-05, + "loss": 126.4711, + "step": 34370 + }, + { + "epoch": 0.13889955033391646, + "grad_norm": 870.8510131835938, + "learning_rate": 3.982871493020939e-05, + "loss": 108.9551, + "step": 34380 + }, + { + "epoch": 0.1389399515992841, + "grad_norm": 900.002197265625, + "learning_rate": 3.9828350045748395e-05, + "loss": 126.8261, + "step": 34390 + }, + { + "epoch": 0.1389803528646517, + "grad_norm": 671.0736694335938, + "learning_rate": 3.9827984774723495e-05, + "loss": 104.5464, + "step": 34400 + }, + { + "epoch": 0.13902075413001935, + "grad_norm": 1552.7880859375, + "learning_rate": 3.982761911714183e-05, + "loss": 99.5087, + "step": 34410 + }, + { + "epoch": 0.139061155395387, + "grad_norm": 1086.0291748046875, + "learning_rate": 3.982725307301052e-05, + "loss": 133.8073, + "step": 34420 + }, + { + "epoch": 0.1391015566607546, + "grad_norm": 580.0475463867188, + "learning_rate": 3.98268866423367e-05, + "loss": 175.7734, + "step": 34430 + }, + { + "epoch": 0.13914195792612225, + "grad_norm": 1236.258056640625, + "learning_rate": 3.9826519825127524e-05, + "loss": 145.9607, + "step": 34440 + }, + { + "epoch": 0.13918235919148988, + "grad_norm": 1061.1773681640625, + "learning_rate": 3.9826152621390137e-05, + "loss": 81.0122, + "step": 34450 + }, + { + "epoch": 0.1392227604568575, + "grad_norm": 1066.55615234375, + "learning_rate": 3.982578503113169e-05, + "loss": 108.2622, + "step": 34460 + }, + { + "epoch": 0.13926316172222514, + "grad_norm": 517.601318359375, + "learning_rate": 3.982541705435936e-05, + "loss": 174.4923, + "step": 34470 + }, + { + "epoch": 0.13930356298759278, + "grad_norm": 829.1947021484375, + "learning_rate": 3.982504869108033e-05, + "loss": 125.6301, + "step": 34480 + }, + { + "epoch": 0.1393439642529604, + "grad_norm": 927.6253051757812, + "learning_rate": 3.982467994130175e-05, + "loss": 119.1037, + "step": 34490 + }, + { + "epoch": 0.13938436551832803, + "grad_norm": 551.0982666015625, + "learning_rate": 3.982431080503084e-05, + "loss": 114.9758, + "step": 34500 + }, + { + "epoch": 0.13942476678369567, + "grad_norm": 472.5829772949219, + "learning_rate": 3.982394128227478e-05, + "loss": 120.2955, + "step": 34510 + }, + { + "epoch": 0.1394651680490633, + "grad_norm": 685.9826049804688, + "learning_rate": 3.982357137304078e-05, + "loss": 150.0716, + "step": 34520 + }, + { + "epoch": 0.13950556931443092, + "grad_norm": 2470.068603515625, + "learning_rate": 3.982320107733605e-05, + "loss": 115.9516, + "step": 34530 + }, + { + "epoch": 0.13954597057979856, + "grad_norm": 1066.0662841796875, + "learning_rate": 3.982283039516781e-05, + "loss": 110.9118, + "step": 34540 + }, + { + "epoch": 0.1395863718451662, + "grad_norm": 664.183837890625, + "learning_rate": 3.982245932654328e-05, + "loss": 172.6846, + "step": 34550 + }, + { + "epoch": 0.13962677311053381, + "grad_norm": 1006.4192504882812, + "learning_rate": 3.982208787146971e-05, + "loss": 154.3167, + "step": 34560 + }, + { + "epoch": 0.13966717437590145, + "grad_norm": 460.2478332519531, + "learning_rate": 3.9821716029954326e-05, + "loss": 87.4316, + "step": 34570 + }, + { + "epoch": 0.1397075756412691, + "grad_norm": 506.2437438964844, + "learning_rate": 3.9821343802004386e-05, + "loss": 97.9688, + "step": 34580 + }, + { + "epoch": 0.1397479769066367, + "grad_norm": 763.325439453125, + "learning_rate": 3.982097118762714e-05, + "loss": 110.6297, + "step": 34590 + }, + { + "epoch": 0.13978837817200435, + "grad_norm": 893.7081298828125, + "learning_rate": 3.982059818682986e-05, + "loss": 135.5909, + "step": 34600 + }, + { + "epoch": 0.13982877943737199, + "grad_norm": 1345.8963623046875, + "learning_rate": 3.982022479961981e-05, + "loss": 126.0485, + "step": 34610 + }, + { + "epoch": 0.1398691807027396, + "grad_norm": 1227.2159423828125, + "learning_rate": 3.981985102600428e-05, + "loss": 137.3648, + "step": 34620 + }, + { + "epoch": 0.13990958196810724, + "grad_norm": 1111.8953857421875, + "learning_rate": 3.981947686599054e-05, + "loss": 80.5232, + "step": 34630 + }, + { + "epoch": 0.13994998323347488, + "grad_norm": 1554.022216796875, + "learning_rate": 3.98191023195859e-05, + "loss": 138.9997, + "step": 34640 + }, + { + "epoch": 0.1399903844988425, + "grad_norm": 570.2767333984375, + "learning_rate": 3.9818727386797656e-05, + "loss": 76.4406, + "step": 34650 + }, + { + "epoch": 0.14003078576421013, + "grad_norm": 978.8657836914062, + "learning_rate": 3.981835206763312e-05, + "loss": 147.5171, + "step": 34660 + }, + { + "epoch": 0.14007118702957777, + "grad_norm": 598.7446899414062, + "learning_rate": 3.981797636209961e-05, + "loss": 121.7542, + "step": 34670 + }, + { + "epoch": 0.1401115882949454, + "grad_norm": 1228.340087890625, + "learning_rate": 3.981760027020444e-05, + "loss": 132.6995, + "step": 34680 + }, + { + "epoch": 0.14015198956031302, + "grad_norm": 3053.6123046875, + "learning_rate": 3.981722379195496e-05, + "loss": 180.6254, + "step": 34690 + }, + { + "epoch": 0.14019239082568066, + "grad_norm": 1573.31201171875, + "learning_rate": 3.981684692735849e-05, + "loss": 174.4991, + "step": 34700 + }, + { + "epoch": 0.1402327920910483, + "grad_norm": 1382.7957763671875, + "learning_rate": 3.9816469676422395e-05, + "loss": 133.4625, + "step": 34710 + }, + { + "epoch": 0.14027319335641592, + "grad_norm": 524.8442993164062, + "learning_rate": 3.981609203915402e-05, + "loss": 133.2045, + "step": 34720 + }, + { + "epoch": 0.14031359462178356, + "grad_norm": 1513.3636474609375, + "learning_rate": 3.9815714015560724e-05, + "loss": 141.6531, + "step": 34730 + }, + { + "epoch": 0.1403539958871512, + "grad_norm": 820.1348876953125, + "learning_rate": 3.981533560564988e-05, + "loss": 127.8892, + "step": 34740 + }, + { + "epoch": 0.1403943971525188, + "grad_norm": 1076.8193359375, + "learning_rate": 3.9814956809428876e-05, + "loss": 141.541, + "step": 34750 + }, + { + "epoch": 0.14043479841788645, + "grad_norm": 1694.561279296875, + "learning_rate": 3.981457762690508e-05, + "loss": 179.4938, + "step": 34760 + }, + { + "epoch": 0.1404751996832541, + "grad_norm": 803.285888671875, + "learning_rate": 3.98141980580859e-05, + "loss": 115.5285, + "step": 34770 + }, + { + "epoch": 0.1405156009486217, + "grad_norm": 988.6072998046875, + "learning_rate": 3.981381810297872e-05, + "loss": 126.5839, + "step": 34780 + }, + { + "epoch": 0.14055600221398934, + "grad_norm": 1008.846435546875, + "learning_rate": 3.9813437761590954e-05, + "loss": 122.7778, + "step": 34790 + }, + { + "epoch": 0.14059640347935698, + "grad_norm": 477.3916931152344, + "learning_rate": 3.981305703393002e-05, + "loss": 100.6049, + "step": 34800 + }, + { + "epoch": 0.1406368047447246, + "grad_norm": 890.0157470703125, + "learning_rate": 3.981267592000334e-05, + "loss": 103.7307, + "step": 34810 + }, + { + "epoch": 0.14067720601009223, + "grad_norm": 832.10888671875, + "learning_rate": 3.981229441981834e-05, + "loss": 139.9157, + "step": 34820 + }, + { + "epoch": 0.14071760727545987, + "grad_norm": 951.2162475585938, + "learning_rate": 3.9811912533382465e-05, + "loss": 109.9128, + "step": 34830 + }, + { + "epoch": 0.1407580085408275, + "grad_norm": 618.0935668945312, + "learning_rate": 3.981153026070315e-05, + "loss": 140.1183, + "step": 34840 + }, + { + "epoch": 0.14079840980619512, + "grad_norm": 6555.82861328125, + "learning_rate": 3.981114760178786e-05, + "loss": 130.9935, + "step": 34850 + }, + { + "epoch": 0.14083881107156276, + "grad_norm": 1075.351806640625, + "learning_rate": 3.981076455664405e-05, + "loss": 109.4626, + "step": 34860 + }, + { + "epoch": 0.1408792123369304, + "grad_norm": 538.8637084960938, + "learning_rate": 3.981038112527918e-05, + "loss": 90.3007, + "step": 34870 + }, + { + "epoch": 0.14091961360229802, + "grad_norm": 1118.859619140625, + "learning_rate": 3.980999730770072e-05, + "loss": 98.7062, + "step": 34880 + }, + { + "epoch": 0.14096001486766566, + "grad_norm": 567.6984252929688, + "learning_rate": 3.9809613103916185e-05, + "loss": 185.1207, + "step": 34890 + }, + { + "epoch": 0.1410004161330333, + "grad_norm": 577.3124389648438, + "learning_rate": 3.980922851393303e-05, + "loss": 112.2362, + "step": 34900 + }, + { + "epoch": 0.1410408173984009, + "grad_norm": 888.9127197265625, + "learning_rate": 3.980884353775877e-05, + "loss": 140.6681, + "step": 34910 + }, + { + "epoch": 0.14108121866376855, + "grad_norm": 1655.10400390625, + "learning_rate": 3.980845817540091e-05, + "loss": 100.6395, + "step": 34920 + }, + { + "epoch": 0.1411216199291362, + "grad_norm": 512.0317993164062, + "learning_rate": 3.980807242686695e-05, + "loss": 73.9656, + "step": 34930 + }, + { + "epoch": 0.1411620211945038, + "grad_norm": 462.8865661621094, + "learning_rate": 3.980768629216443e-05, + "loss": 182.0546, + "step": 34940 + }, + { + "epoch": 0.14120242245987144, + "grad_norm": 1377.5966796875, + "learning_rate": 3.980729977130086e-05, + "loss": 149.634, + "step": 34950 + }, + { + "epoch": 0.14124282372523908, + "grad_norm": 6475.51025390625, + "learning_rate": 3.9806912864283796e-05, + "loss": 132.8484, + "step": 34960 + }, + { + "epoch": 0.1412832249906067, + "grad_norm": 615.7251586914062, + "learning_rate": 3.9806525571120754e-05, + "loss": 112.6822, + "step": 34970 + }, + { + "epoch": 0.14132362625597433, + "grad_norm": 1491.3458251953125, + "learning_rate": 3.9806137891819306e-05, + "loss": 145.9468, + "step": 34980 + }, + { + "epoch": 0.14136402752134197, + "grad_norm": 643.1716918945312, + "learning_rate": 3.9805749826387005e-05, + "loss": 116.8686, + "step": 34990 + }, + { + "epoch": 0.1414044287867096, + "grad_norm": 1224.2078857421875, + "learning_rate": 3.980536137483141e-05, + "loss": 267.4613, + "step": 35000 + }, + { + "epoch": 0.14144483005207723, + "grad_norm": 372.0586853027344, + "learning_rate": 3.98049725371601e-05, + "loss": 97.3618, + "step": 35010 + }, + { + "epoch": 0.14148523131744487, + "grad_norm": 1639.76220703125, + "learning_rate": 3.980458331338065e-05, + "loss": 127.2346, + "step": 35020 + }, + { + "epoch": 0.1415256325828125, + "grad_norm": 934.42333984375, + "learning_rate": 3.980419370350066e-05, + "loss": 140.7911, + "step": 35030 + }, + { + "epoch": 0.14156603384818012, + "grad_norm": 1280.28759765625, + "learning_rate": 3.980380370752771e-05, + "loss": 126.6859, + "step": 35040 + }, + { + "epoch": 0.14160643511354776, + "grad_norm": 1090.4830322265625, + "learning_rate": 3.980341332546942e-05, + "loss": 120.1273, + "step": 35050 + }, + { + "epoch": 0.1416468363789154, + "grad_norm": 2702.21826171875, + "learning_rate": 3.9803022557333387e-05, + "loss": 141.7671, + "step": 35060 + }, + { + "epoch": 0.141687237644283, + "grad_norm": 588.3932495117188, + "learning_rate": 3.9802631403127234e-05, + "loss": 97.5273, + "step": 35070 + }, + { + "epoch": 0.14172763890965065, + "grad_norm": 1268.1868896484375, + "learning_rate": 3.980223986285859e-05, + "loss": 169.1878, + "step": 35080 + }, + { + "epoch": 0.1417680401750183, + "grad_norm": 920.8447875976562, + "learning_rate": 3.980184793653508e-05, + "loss": 99.2415, + "step": 35090 + }, + { + "epoch": 0.1418084414403859, + "grad_norm": 1807.7882080078125, + "learning_rate": 3.9801455624164354e-05, + "loss": 154.3956, + "step": 35100 + }, + { + "epoch": 0.14184884270575354, + "grad_norm": 1301.133544921875, + "learning_rate": 3.980106292575405e-05, + "loss": 115.501, + "step": 35110 + }, + { + "epoch": 0.14188924397112118, + "grad_norm": 1520.3946533203125, + "learning_rate": 3.980066984131184e-05, + "loss": 204.2552, + "step": 35120 + }, + { + "epoch": 0.1419296452364888, + "grad_norm": 882.9544067382812, + "learning_rate": 3.980027637084537e-05, + "loss": 152.2425, + "step": 35130 + }, + { + "epoch": 0.14197004650185643, + "grad_norm": 305.9420471191406, + "learning_rate": 3.979988251436232e-05, + "loss": 129.3553, + "step": 35140 + }, + { + "epoch": 0.14201044776722407, + "grad_norm": 360.93157958984375, + "learning_rate": 3.979948827187036e-05, + "loss": 112.3378, + "step": 35150 + }, + { + "epoch": 0.14205084903259171, + "grad_norm": 792.1229248046875, + "learning_rate": 3.9799093643377195e-05, + "loss": 126.1161, + "step": 35160 + }, + { + "epoch": 0.14209125029795933, + "grad_norm": 855.732421875, + "learning_rate": 3.97986986288905e-05, + "loss": 140.2881, + "step": 35170 + }, + { + "epoch": 0.14213165156332697, + "grad_norm": 1778.4735107421875, + "learning_rate": 3.9798303228417975e-05, + "loss": 115.8388, + "step": 35180 + }, + { + "epoch": 0.1421720528286946, + "grad_norm": 620.6005859375, + "learning_rate": 3.9797907441967345e-05, + "loss": 216.7416, + "step": 35190 + }, + { + "epoch": 0.14221245409406222, + "grad_norm": 533.9338989257812, + "learning_rate": 3.979751126954632e-05, + "loss": 102.089, + "step": 35200 + }, + { + "epoch": 0.14225285535942986, + "grad_norm": 455.0955505371094, + "learning_rate": 3.979711471116261e-05, + "loss": 81.4994, + "step": 35210 + }, + { + "epoch": 0.1422932566247975, + "grad_norm": 1457.409423828125, + "learning_rate": 3.9796717766823966e-05, + "loss": 133.5144, + "step": 35220 + }, + { + "epoch": 0.1423336578901651, + "grad_norm": 906.4509887695312, + "learning_rate": 3.979632043653811e-05, + "loss": 104.2994, + "step": 35230 + }, + { + "epoch": 0.14237405915553275, + "grad_norm": 1376.327392578125, + "learning_rate": 3.9795922720312796e-05, + "loss": 178.9437, + "step": 35240 + }, + { + "epoch": 0.1424144604209004, + "grad_norm": 1815.6072998046875, + "learning_rate": 3.979552461815578e-05, + "loss": 118.9261, + "step": 35250 + }, + { + "epoch": 0.142454861686268, + "grad_norm": 911.7725830078125, + "learning_rate": 3.9795126130074824e-05, + "loss": 152.1194, + "step": 35260 + }, + { + "epoch": 0.14249526295163564, + "grad_norm": 642.30126953125, + "learning_rate": 3.9794727256077685e-05, + "loss": 139.1325, + "step": 35270 + }, + { + "epoch": 0.14253566421700328, + "grad_norm": 2802.644775390625, + "learning_rate": 3.979432799617215e-05, + "loss": 143.4861, + "step": 35280 + }, + { + "epoch": 0.1425760654823709, + "grad_norm": 1364.61328125, + "learning_rate": 3.9793928350366e-05, + "loss": 134.8358, + "step": 35290 + }, + { + "epoch": 0.14261646674773854, + "grad_norm": 776.8970947265625, + "learning_rate": 3.979352831866702e-05, + "loss": 77.6002, + "step": 35300 + }, + { + "epoch": 0.14265686801310618, + "grad_norm": 757.1177368164062, + "learning_rate": 3.979312790108303e-05, + "loss": 136.3352, + "step": 35310 + }, + { + "epoch": 0.14269726927847382, + "grad_norm": 1445.809326171875, + "learning_rate": 3.979272709762181e-05, + "loss": 115.6219, + "step": 35320 + }, + { + "epoch": 0.14273767054384143, + "grad_norm": 934.6282958984375, + "learning_rate": 3.979232590829119e-05, + "loss": 113.465, + "step": 35330 + }, + { + "epoch": 0.14277807180920907, + "grad_norm": 885.79052734375, + "learning_rate": 3.9791924333098986e-05, + "loss": 145.6691, + "step": 35340 + }, + { + "epoch": 0.1428184730745767, + "grad_norm": 1022.7625122070312, + "learning_rate": 3.979152237205302e-05, + "loss": 151.6276, + "step": 35350 + }, + { + "epoch": 0.14285887433994432, + "grad_norm": 978.7564697265625, + "learning_rate": 3.979112002516115e-05, + "loss": 119.5776, + "step": 35360 + }, + { + "epoch": 0.14289927560531196, + "grad_norm": 1293.5147705078125, + "learning_rate": 3.979071729243119e-05, + "loss": 120.0677, + "step": 35370 + }, + { + "epoch": 0.1429396768706796, + "grad_norm": 1080.4659423828125, + "learning_rate": 3.979031417387102e-05, + "loss": 120.5073, + "step": 35380 + }, + { + "epoch": 0.1429800781360472, + "grad_norm": 1714.921875, + "learning_rate": 3.978991066948848e-05, + "loss": 156.6357, + "step": 35390 + }, + { + "epoch": 0.14302047940141485, + "grad_norm": 744.2547607421875, + "learning_rate": 3.9789506779291445e-05, + "loss": 135.405, + "step": 35400 + }, + { + "epoch": 0.1430608806667825, + "grad_norm": 1246.230224609375, + "learning_rate": 3.978910250328779e-05, + "loss": 133.5551, + "step": 35410 + }, + { + "epoch": 0.1431012819321501, + "grad_norm": 1541.115234375, + "learning_rate": 3.978869784148539e-05, + "loss": 118.4711, + "step": 35420 + }, + { + "epoch": 0.14314168319751774, + "grad_norm": 748.83544921875, + "learning_rate": 3.978829279389214e-05, + "loss": 143.0336, + "step": 35430 + }, + { + "epoch": 0.14318208446288538, + "grad_norm": 1258.0997314453125, + "learning_rate": 3.978788736051593e-05, + "loss": 134.2704, + "step": 35440 + }, + { + "epoch": 0.143222485728253, + "grad_norm": 1493.4451904296875, + "learning_rate": 3.978748154136467e-05, + "loss": 122.0006, + "step": 35450 + }, + { + "epoch": 0.14326288699362064, + "grad_norm": 2789.663330078125, + "learning_rate": 3.978707533644627e-05, + "loss": 128.7108, + "step": 35460 + }, + { + "epoch": 0.14330328825898828, + "grad_norm": 1062.217041015625, + "learning_rate": 3.978666874576865e-05, + "loss": 110.9679, + "step": 35470 + }, + { + "epoch": 0.14334368952435592, + "grad_norm": 1046.535400390625, + "learning_rate": 3.978626176933974e-05, + "loss": 112.938, + "step": 35480 + }, + { + "epoch": 0.14338409078972353, + "grad_norm": 1007.4627075195312, + "learning_rate": 3.978585440716746e-05, + "loss": 129.1709, + "step": 35490 + }, + { + "epoch": 0.14342449205509117, + "grad_norm": 1156.8944091796875, + "learning_rate": 3.978544665925977e-05, + "loss": 119.5131, + "step": 35500 + }, + { + "epoch": 0.1434648933204588, + "grad_norm": 1109.68798828125, + "learning_rate": 3.978503852562461e-05, + "loss": 72.4097, + "step": 35510 + }, + { + "epoch": 0.14350529458582642, + "grad_norm": 1165.9935302734375, + "learning_rate": 3.9784630006269935e-05, + "loss": 120.3949, + "step": 35520 + }, + { + "epoch": 0.14354569585119406, + "grad_norm": 651.5706787109375, + "learning_rate": 3.9784221101203715e-05, + "loss": 112.3331, + "step": 35530 + }, + { + "epoch": 0.1435860971165617, + "grad_norm": 1412.05126953125, + "learning_rate": 3.978381181043392e-05, + "loss": 130.7323, + "step": 35540 + }, + { + "epoch": 0.1436264983819293, + "grad_norm": 682.440185546875, + "learning_rate": 3.978340213396853e-05, + "loss": 153.211, + "step": 35550 + }, + { + "epoch": 0.14366689964729695, + "grad_norm": 702.2469482421875, + "learning_rate": 3.978299207181552e-05, + "loss": 173.6467, + "step": 35560 + }, + { + "epoch": 0.1437073009126646, + "grad_norm": 750.87890625, + "learning_rate": 3.97825816239829e-05, + "loss": 102.6199, + "step": 35570 + }, + { + "epoch": 0.1437477021780322, + "grad_norm": 1289.4617919921875, + "learning_rate": 3.978217079047867e-05, + "loss": 106.9358, + "step": 35580 + }, + { + "epoch": 0.14378810344339985, + "grad_norm": 885.3905639648438, + "learning_rate": 3.9781759571310835e-05, + "loss": 111.3248, + "step": 35590 + }, + { + "epoch": 0.14382850470876749, + "grad_norm": 1187.9512939453125, + "learning_rate": 3.9781347966487415e-05, + "loss": 142.3547, + "step": 35600 + }, + { + "epoch": 0.1438689059741351, + "grad_norm": 771.9111938476562, + "learning_rate": 3.978093597601643e-05, + "loss": 102.5182, + "step": 35610 + }, + { + "epoch": 0.14390930723950274, + "grad_norm": 1321.20361328125, + "learning_rate": 3.978052359990591e-05, + "loss": 118.1426, + "step": 35620 + }, + { + "epoch": 0.14394970850487038, + "grad_norm": 536.6057739257812, + "learning_rate": 3.97801108381639e-05, + "loss": 101.3915, + "step": 35630 + }, + { + "epoch": 0.14399010977023802, + "grad_norm": 1025.593017578125, + "learning_rate": 3.977969769079845e-05, + "loss": 124.9002, + "step": 35640 + }, + { + "epoch": 0.14403051103560563, + "grad_norm": 1874.1297607421875, + "learning_rate": 3.977928415781761e-05, + "loss": 112.6598, + "step": 35650 + }, + { + "epoch": 0.14407091230097327, + "grad_norm": 829.4088134765625, + "learning_rate": 3.977887023922944e-05, + "loss": 112.2254, + "step": 35660 + }, + { + "epoch": 0.1441113135663409, + "grad_norm": 1004.2279052734375, + "learning_rate": 3.977845593504201e-05, + "loss": 162.6172, + "step": 35670 + }, + { + "epoch": 0.14415171483170852, + "grad_norm": 375.84222412109375, + "learning_rate": 3.97780412452634e-05, + "loss": 103.3971, + "step": 35680 + }, + { + "epoch": 0.14419211609707616, + "grad_norm": 695.1307983398438, + "learning_rate": 3.97776261699017e-05, + "loss": 106.9054, + "step": 35690 + }, + { + "epoch": 0.1442325173624438, + "grad_norm": 580.1751098632812, + "learning_rate": 3.977721070896499e-05, + "loss": 98.0604, + "step": 35700 + }, + { + "epoch": 0.14427291862781141, + "grad_norm": 1370.015380859375, + "learning_rate": 3.977679486246137e-05, + "loss": 176.7546, + "step": 35710 + }, + { + "epoch": 0.14431331989317905, + "grad_norm": 728.2544555664062, + "learning_rate": 3.977637863039896e-05, + "loss": 107.627, + "step": 35720 + }, + { + "epoch": 0.1443537211585467, + "grad_norm": 1004.0790405273438, + "learning_rate": 3.9775962012785863e-05, + "loss": 97.4113, + "step": 35730 + }, + { + "epoch": 0.1443941224239143, + "grad_norm": 609.0371704101562, + "learning_rate": 3.9775545009630204e-05, + "loss": 136.6018, + "step": 35740 + }, + { + "epoch": 0.14443452368928195, + "grad_norm": 521.3064575195312, + "learning_rate": 3.977512762094012e-05, + "loss": 129.6157, + "step": 35750 + }, + { + "epoch": 0.1444749249546496, + "grad_norm": 2058.58251953125, + "learning_rate": 3.977470984672374e-05, + "loss": 150.7478, + "step": 35760 + }, + { + "epoch": 0.1445153262200172, + "grad_norm": 494.3297424316406, + "learning_rate": 3.977429168698921e-05, + "loss": 139.8725, + "step": 35770 + }, + { + "epoch": 0.14455572748538484, + "grad_norm": 717.91748046875, + "learning_rate": 3.9773873141744684e-05, + "loss": 148.8352, + "step": 35780 + }, + { + "epoch": 0.14459612875075248, + "grad_norm": 462.2386474609375, + "learning_rate": 3.9773454210998315e-05, + "loss": 142.6202, + "step": 35790 + }, + { + "epoch": 0.14463653001612012, + "grad_norm": 899.5321655273438, + "learning_rate": 3.977303489475828e-05, + "loss": 137.9766, + "step": 35800 + }, + { + "epoch": 0.14467693128148773, + "grad_norm": 699.9091186523438, + "learning_rate": 3.977261519303275e-05, + "loss": 157.035, + "step": 35810 + }, + { + "epoch": 0.14471733254685537, + "grad_norm": 494.4333801269531, + "learning_rate": 3.9772195105829914e-05, + "loss": 113.3979, + "step": 35820 + }, + { + "epoch": 0.144757733812223, + "grad_norm": 1088.2659912109375, + "learning_rate": 3.977177463315795e-05, + "loss": 97.1192, + "step": 35830 + }, + { + "epoch": 0.14479813507759062, + "grad_norm": 841.1034545898438, + "learning_rate": 3.977135377502506e-05, + "loss": 105.6075, + "step": 35840 + }, + { + "epoch": 0.14483853634295826, + "grad_norm": 1257.27587890625, + "learning_rate": 3.9770932531439445e-05, + "loss": 131.2648, + "step": 35850 + }, + { + "epoch": 0.1448789376083259, + "grad_norm": 1978.4200439453125, + "learning_rate": 3.977051090240933e-05, + "loss": 101.1551, + "step": 35860 + }, + { + "epoch": 0.14491933887369352, + "grad_norm": 583.8585205078125, + "learning_rate": 3.977008888794291e-05, + "loss": 109.5632, + "step": 35870 + }, + { + "epoch": 0.14495974013906116, + "grad_norm": 614.433349609375, + "learning_rate": 3.976966648804845e-05, + "loss": 189.702, + "step": 35880 + }, + { + "epoch": 0.1450001414044288, + "grad_norm": 1222.4158935546875, + "learning_rate": 3.976924370273415e-05, + "loss": 128.2718, + "step": 35890 + }, + { + "epoch": 0.1450405426697964, + "grad_norm": 1477.7098388671875, + "learning_rate": 3.976882053200827e-05, + "loss": 94.4627, + "step": 35900 + }, + { + "epoch": 0.14508094393516405, + "grad_norm": 762.8455810546875, + "learning_rate": 3.976839697587906e-05, + "loss": 87.8193, + "step": 35910 + }, + { + "epoch": 0.1451213452005317, + "grad_norm": 630.220458984375, + "learning_rate": 3.9767973034354764e-05, + "loss": 105.5952, + "step": 35920 + }, + { + "epoch": 0.1451617464658993, + "grad_norm": 2364.343994140625, + "learning_rate": 3.9767548707443665e-05, + "loss": 164.3319, + "step": 35930 + }, + { + "epoch": 0.14520214773126694, + "grad_norm": 1054.977783203125, + "learning_rate": 3.976712399515402e-05, + "loss": 147.3589, + "step": 35940 + }, + { + "epoch": 0.14524254899663458, + "grad_norm": 978.6192626953125, + "learning_rate": 3.976669889749412e-05, + "loss": 98.5679, + "step": 35950 + }, + { + "epoch": 0.14528295026200222, + "grad_norm": 1282.5118408203125, + "learning_rate": 3.9766273414472254e-05, + "loss": 96.1684, + "step": 35960 + }, + { + "epoch": 0.14532335152736983, + "grad_norm": 1375.177978515625, + "learning_rate": 3.97658475460967e-05, + "loss": 118.4015, + "step": 35970 + }, + { + "epoch": 0.14536375279273747, + "grad_norm": 1553.1148681640625, + "learning_rate": 3.976542129237578e-05, + "loss": 98.8437, + "step": 35980 + }, + { + "epoch": 0.1454041540581051, + "grad_norm": 466.7421875, + "learning_rate": 3.976499465331779e-05, + "loss": 158.148, + "step": 35990 + }, + { + "epoch": 0.14544455532347272, + "grad_norm": 1377.8485107421875, + "learning_rate": 3.976456762893106e-05, + "loss": 178.9227, + "step": 36000 + }, + { + "epoch": 0.14548495658884036, + "grad_norm": 615.134521484375, + "learning_rate": 3.976414021922391e-05, + "loss": 76.745, + "step": 36010 + }, + { + "epoch": 0.145525357854208, + "grad_norm": 697.2374267578125, + "learning_rate": 3.9763712424204664e-05, + "loss": 111.7916, + "step": 36020 + }, + { + "epoch": 0.14556575911957562, + "grad_norm": 622.368408203125, + "learning_rate": 3.976328424388167e-05, + "loss": 162.9095, + "step": 36030 + }, + { + "epoch": 0.14560616038494326, + "grad_norm": 524.0721435546875, + "learning_rate": 3.9762855678263274e-05, + "loss": 156.6125, + "step": 36040 + }, + { + "epoch": 0.1456465616503109, + "grad_norm": 701.468994140625, + "learning_rate": 3.976242672735784e-05, + "loss": 126.2353, + "step": 36050 + }, + { + "epoch": 0.1456869629156785, + "grad_norm": 1414.1336669921875, + "learning_rate": 3.9761997391173715e-05, + "loss": 153.7609, + "step": 36060 + }, + { + "epoch": 0.14572736418104615, + "grad_norm": 528.4535522460938, + "learning_rate": 3.976156766971928e-05, + "loss": 131.2976, + "step": 36070 + }, + { + "epoch": 0.1457677654464138, + "grad_norm": 666.9011840820312, + "learning_rate": 3.976113756300291e-05, + "loss": 101.8794, + "step": 36080 + }, + { + "epoch": 0.1458081667117814, + "grad_norm": 1051.8175048828125, + "learning_rate": 3.976070707103299e-05, + "loss": 132.463, + "step": 36090 + }, + { + "epoch": 0.14584856797714904, + "grad_norm": 728.2026977539062, + "learning_rate": 3.976027619381791e-05, + "loss": 132.3403, + "step": 36100 + }, + { + "epoch": 0.14588896924251668, + "grad_norm": 1082.7073974609375, + "learning_rate": 3.975984493136607e-05, + "loss": 87.3729, + "step": 36110 + }, + { + "epoch": 0.14592937050788432, + "grad_norm": 672.6443481445312, + "learning_rate": 3.975941328368588e-05, + "loss": 206.4426, + "step": 36120 + }, + { + "epoch": 0.14596977177325193, + "grad_norm": 2723.217041015625, + "learning_rate": 3.975898125078576e-05, + "loss": 180.7279, + "step": 36130 + }, + { + "epoch": 0.14601017303861957, + "grad_norm": 1148.548828125, + "learning_rate": 3.9758548832674126e-05, + "loss": 112.6116, + "step": 36140 + }, + { + "epoch": 0.1460505743039872, + "grad_norm": 1006.9478759765625, + "learning_rate": 3.975811602935941e-05, + "loss": 87.9651, + "step": 36150 + }, + { + "epoch": 0.14609097556935483, + "grad_norm": 802.0869140625, + "learning_rate": 3.9757682840850045e-05, + "loss": 96.7793, + "step": 36160 + }, + { + "epoch": 0.14613137683472247, + "grad_norm": 1405.8912353515625, + "learning_rate": 3.975724926715449e-05, + "loss": 132.7144, + "step": 36170 + }, + { + "epoch": 0.1461717781000901, + "grad_norm": 1191.3577880859375, + "learning_rate": 3.9756815308281185e-05, + "loss": 134.2358, + "step": 36180 + }, + { + "epoch": 0.14621217936545772, + "grad_norm": 452.72442626953125, + "learning_rate": 3.975638096423859e-05, + "loss": 182.4834, + "step": 36190 + }, + { + "epoch": 0.14625258063082536, + "grad_norm": 1631.275146484375, + "learning_rate": 3.975594623503518e-05, + "loss": 100.7288, + "step": 36200 + }, + { + "epoch": 0.146292981896193, + "grad_norm": 906.3159790039062, + "learning_rate": 3.975551112067943e-05, + "loss": 96.9079, + "step": 36210 + }, + { + "epoch": 0.1463333831615606, + "grad_norm": 7756.78857421875, + "learning_rate": 3.975507562117982e-05, + "loss": 165.5746, + "step": 36220 + }, + { + "epoch": 0.14637378442692825, + "grad_norm": 1002.5360107421875, + "learning_rate": 3.975463973654483e-05, + "loss": 122.1186, + "step": 36230 + }, + { + "epoch": 0.1464141856922959, + "grad_norm": 3894.195068359375, + "learning_rate": 3.975420346678298e-05, + "loss": 106.5052, + "step": 36240 + }, + { + "epoch": 0.1464545869576635, + "grad_norm": 569.20166015625, + "learning_rate": 3.9753766811902756e-05, + "loss": 105.523, + "step": 36250 + }, + { + "epoch": 0.14649498822303114, + "grad_norm": 1008.329833984375, + "learning_rate": 3.975332977191268e-05, + "loss": 130.0896, + "step": 36260 + }, + { + "epoch": 0.14653538948839878, + "grad_norm": 2308.722900390625, + "learning_rate": 3.9752892346821274e-05, + "loss": 276.0386, + "step": 36270 + }, + { + "epoch": 0.14657579075376642, + "grad_norm": 1136.069091796875, + "learning_rate": 3.975245453663706e-05, + "loss": 122.4406, + "step": 36280 + }, + { + "epoch": 0.14661619201913403, + "grad_norm": 796.208740234375, + "learning_rate": 3.9752016341368574e-05, + "loss": 113.3347, + "step": 36290 + }, + { + "epoch": 0.14665659328450167, + "grad_norm": 951.2258911132812, + "learning_rate": 3.9751577761024366e-05, + "loss": 91.179, + "step": 36300 + }, + { + "epoch": 0.14669699454986931, + "grad_norm": 883.3008422851562, + "learning_rate": 3.9751138795612976e-05, + "loss": 133.8784, + "step": 36310 + }, + { + "epoch": 0.14673739581523693, + "grad_norm": 347.888916015625, + "learning_rate": 3.975069944514296e-05, + "loss": 136.3518, + "step": 36320 + }, + { + "epoch": 0.14677779708060457, + "grad_norm": 1272.9796142578125, + "learning_rate": 3.97502597096229e-05, + "loss": 133.9726, + "step": 36330 + }, + { + "epoch": 0.1468181983459722, + "grad_norm": 728.307373046875, + "learning_rate": 3.9749819589061354e-05, + "loss": 93.972, + "step": 36340 + }, + { + "epoch": 0.14685859961133982, + "grad_norm": 625.9700317382812, + "learning_rate": 3.974937908346691e-05, + "loss": 103.6204, + "step": 36350 + }, + { + "epoch": 0.14689900087670746, + "grad_norm": 1135.3525390625, + "learning_rate": 3.974893819284815e-05, + "loss": 144.6614, + "step": 36360 + }, + { + "epoch": 0.1469394021420751, + "grad_norm": 1339.2152099609375, + "learning_rate": 3.9748496917213675e-05, + "loss": 150.5841, + "step": 36370 + }, + { + "epoch": 0.1469798034074427, + "grad_norm": 696.838623046875, + "learning_rate": 3.9748055256572084e-05, + "loss": 96.8959, + "step": 36380 + }, + { + "epoch": 0.14702020467281035, + "grad_norm": 453.3526611328125, + "learning_rate": 3.974761321093199e-05, + "loss": 112.8567, + "step": 36390 + }, + { + "epoch": 0.147060605938178, + "grad_norm": 757.9246215820312, + "learning_rate": 3.974717078030201e-05, + "loss": 91.3925, + "step": 36400 + }, + { + "epoch": 0.1471010072035456, + "grad_norm": 721.97021484375, + "learning_rate": 3.974672796469077e-05, + "loss": 127.452, + "step": 36410 + }, + { + "epoch": 0.14714140846891324, + "grad_norm": 636.8889770507812, + "learning_rate": 3.97462847641069e-05, + "loss": 85.0422, + "step": 36420 + }, + { + "epoch": 0.14718180973428088, + "grad_norm": 1690.491455078125, + "learning_rate": 3.974584117855904e-05, + "loss": 155.7471, + "step": 36430 + }, + { + "epoch": 0.14722221099964852, + "grad_norm": 1085.442626953125, + "learning_rate": 3.974539720805585e-05, + "loss": 105.8353, + "step": 36440 + }, + { + "epoch": 0.14726261226501614, + "grad_norm": 1030.6629638671875, + "learning_rate": 3.9744952852605965e-05, + "loss": 111.4878, + "step": 36450 + }, + { + "epoch": 0.14730301353038378, + "grad_norm": 1591.381103515625, + "learning_rate": 3.974450811221806e-05, + "loss": 125.641, + "step": 36460 + }, + { + "epoch": 0.14734341479575142, + "grad_norm": 814.4616088867188, + "learning_rate": 3.974406298690081e-05, + "loss": 97.8853, + "step": 36470 + }, + { + "epoch": 0.14738381606111903, + "grad_norm": 575.5509033203125, + "learning_rate": 3.974361747666288e-05, + "loss": 97.2614, + "step": 36480 + }, + { + "epoch": 0.14742421732648667, + "grad_norm": 1403.9656982421875, + "learning_rate": 3.974317158151297e-05, + "loss": 110.5862, + "step": 36490 + }, + { + "epoch": 0.1474646185918543, + "grad_norm": 411.0549011230469, + "learning_rate": 3.974272530145976e-05, + "loss": 84.1075, + "step": 36500 + }, + { + "epoch": 0.14750501985722192, + "grad_norm": 553.505859375, + "learning_rate": 3.974227863651196e-05, + "loss": 116.0123, + "step": 36510 + }, + { + "epoch": 0.14754542112258956, + "grad_norm": 1367.4871826171875, + "learning_rate": 3.974183158667827e-05, + "loss": 123.3555, + "step": 36520 + }, + { + "epoch": 0.1475858223879572, + "grad_norm": 1020.2413330078125, + "learning_rate": 3.9741384151967416e-05, + "loss": 101.3395, + "step": 36530 + }, + { + "epoch": 0.1476262236533248, + "grad_norm": 620.882568359375, + "learning_rate": 3.974093633238811e-05, + "loss": 113.0084, + "step": 36540 + }, + { + "epoch": 0.14766662491869245, + "grad_norm": 698.5300903320312, + "learning_rate": 3.974048812794908e-05, + "loss": 111.7038, + "step": 36550 + }, + { + "epoch": 0.1477070261840601, + "grad_norm": 694.37451171875, + "learning_rate": 3.974003953865908e-05, + "loss": 110.638, + "step": 36560 + }, + { + "epoch": 0.1477474274494277, + "grad_norm": 768.0260009765625, + "learning_rate": 3.973959056452685e-05, + "loss": 143.9316, + "step": 36570 + }, + { + "epoch": 0.14778782871479534, + "grad_norm": 627.1959228515625, + "learning_rate": 3.9739141205561126e-05, + "loss": 144.8442, + "step": 36580 + }, + { + "epoch": 0.14782822998016298, + "grad_norm": 581.3162841796875, + "learning_rate": 3.973869146177069e-05, + "loss": 105.8955, + "step": 36590 + }, + { + "epoch": 0.1478686312455306, + "grad_norm": 1262.4273681640625, + "learning_rate": 3.973824133316431e-05, + "loss": 124.114, + "step": 36600 + }, + { + "epoch": 0.14790903251089824, + "grad_norm": 1194.6455078125, + "learning_rate": 3.973779081975074e-05, + "loss": 132.0601, + "step": 36610 + }, + { + "epoch": 0.14794943377626588, + "grad_norm": 426.4359130859375, + "learning_rate": 3.9737339921538775e-05, + "loss": 153.6922, + "step": 36620 + }, + { + "epoch": 0.14798983504163352, + "grad_norm": 1140.21435546875, + "learning_rate": 3.973688863853721e-05, + "loss": 123.7343, + "step": 36630 + }, + { + "epoch": 0.14803023630700113, + "grad_norm": 1143.5390625, + "learning_rate": 3.973643697075485e-05, + "loss": 114.4892, + "step": 36640 + }, + { + "epoch": 0.14807063757236877, + "grad_norm": 1819.6142578125, + "learning_rate": 3.973598491820048e-05, + "loss": 150.4484, + "step": 36650 + }, + { + "epoch": 0.1481110388377364, + "grad_norm": 3741.560546875, + "learning_rate": 3.9735532480882925e-05, + "loss": 159.0105, + "step": 36660 + }, + { + "epoch": 0.14815144010310402, + "grad_norm": 1006.7191162109375, + "learning_rate": 3.9735079658811006e-05, + "loss": 126.89, + "step": 36670 + }, + { + "epoch": 0.14819184136847166, + "grad_norm": 647.126708984375, + "learning_rate": 3.973462645199355e-05, + "loss": 114.8247, + "step": 36680 + }, + { + "epoch": 0.1482322426338393, + "grad_norm": 906.17236328125, + "learning_rate": 3.9734172860439385e-05, + "loss": 86.9737, + "step": 36690 + }, + { + "epoch": 0.1482726438992069, + "grad_norm": 493.9269714355469, + "learning_rate": 3.973371888415736e-05, + "loss": 129.1179, + "step": 36700 + }, + { + "epoch": 0.14831304516457455, + "grad_norm": 1312.663330078125, + "learning_rate": 3.9733264523156337e-05, + "loss": 170.4874, + "step": 36710 + }, + { + "epoch": 0.1483534464299422, + "grad_norm": 477.23724365234375, + "learning_rate": 3.973280977744515e-05, + "loss": 134.6251, + "step": 36720 + }, + { + "epoch": 0.1483938476953098, + "grad_norm": 1025.680908203125, + "learning_rate": 3.9732354647032687e-05, + "loss": 126.3483, + "step": 36730 + }, + { + "epoch": 0.14843424896067745, + "grad_norm": 376.9269714355469, + "learning_rate": 3.97318991319278e-05, + "loss": 103.3832, + "step": 36740 + }, + { + "epoch": 0.14847465022604509, + "grad_norm": 717.6051635742188, + "learning_rate": 3.973144323213939e-05, + "loss": 91.86, + "step": 36750 + }, + { + "epoch": 0.1485150514914127, + "grad_norm": 273.0193176269531, + "learning_rate": 3.9730986947676336e-05, + "loss": 169.6993, + "step": 36760 + }, + { + "epoch": 0.14855545275678034, + "grad_norm": 583.5360717773438, + "learning_rate": 3.973053027854753e-05, + "loss": 105.5548, + "step": 36770 + }, + { + "epoch": 0.14859585402214798, + "grad_norm": 3020.519775390625, + "learning_rate": 3.973007322476188e-05, + "loss": 126.1504, + "step": 36780 + }, + { + "epoch": 0.14863625528751562, + "grad_norm": 1493.3358154296875, + "learning_rate": 3.9729615786328286e-05, + "loss": 143.5123, + "step": 36790 + }, + { + "epoch": 0.14867665655288323, + "grad_norm": 562.4163818359375, + "learning_rate": 3.972915796325569e-05, + "loss": 87.1232, + "step": 36800 + }, + { + "epoch": 0.14871705781825087, + "grad_norm": 559.3504028320312, + "learning_rate": 3.972869975555299e-05, + "loss": 103.2926, + "step": 36810 + }, + { + "epoch": 0.1487574590836185, + "grad_norm": 1353.0863037109375, + "learning_rate": 3.972824116322913e-05, + "loss": 131.5428, + "step": 36820 + }, + { + "epoch": 0.14879786034898612, + "grad_norm": 2248.179931640625, + "learning_rate": 3.9727782186293066e-05, + "loss": 125.3711, + "step": 36830 + }, + { + "epoch": 0.14883826161435376, + "grad_norm": 747.2725219726562, + "learning_rate": 3.972732282475372e-05, + "loss": 121.071, + "step": 36840 + }, + { + "epoch": 0.1488786628797214, + "grad_norm": 576.9606323242188, + "learning_rate": 3.9726863078620066e-05, + "loss": 92.6187, + "step": 36850 + }, + { + "epoch": 0.14891906414508901, + "grad_norm": 444.50628662109375, + "learning_rate": 3.972640294790106e-05, + "loss": 150.1146, + "step": 36860 + }, + { + "epoch": 0.14895946541045665, + "grad_norm": 635.323974609375, + "learning_rate": 3.9725942432605666e-05, + "loss": 131.2632, + "step": 36870 + }, + { + "epoch": 0.1489998666758243, + "grad_norm": 1099.8741455078125, + "learning_rate": 3.972548153274287e-05, + "loss": 106.9759, + "step": 36880 + }, + { + "epoch": 0.1490402679411919, + "grad_norm": 866.4686889648438, + "learning_rate": 3.972502024832166e-05, + "loss": 141.728, + "step": 36890 + }, + { + "epoch": 0.14908066920655955, + "grad_norm": 1762.1820068359375, + "learning_rate": 3.972455857935102e-05, + "loss": 82.3566, + "step": 36900 + }, + { + "epoch": 0.1491210704719272, + "grad_norm": 644.1922607421875, + "learning_rate": 3.972409652583996e-05, + "loss": 112.4355, + "step": 36910 + }, + { + "epoch": 0.1491614717372948, + "grad_norm": 750.6581420898438, + "learning_rate": 3.9723634087797486e-05, + "loss": 107.3791, + "step": 36920 + }, + { + "epoch": 0.14920187300266244, + "grad_norm": 439.01007080078125, + "learning_rate": 3.9723171265232606e-05, + "loss": 156.9657, + "step": 36930 + }, + { + "epoch": 0.14924227426803008, + "grad_norm": 950.5374145507812, + "learning_rate": 3.972270805815435e-05, + "loss": 119.7774, + "step": 36940 + }, + { + "epoch": 0.14928267553339772, + "grad_norm": 755.5181274414062, + "learning_rate": 3.9722244466571745e-05, + "loss": 127.8649, + "step": 36950 + }, + { + "epoch": 0.14932307679876533, + "grad_norm": 1069.1998291015625, + "learning_rate": 3.972178049049383e-05, + "loss": 216.4067, + "step": 36960 + }, + { + "epoch": 0.14936347806413297, + "grad_norm": 1267.168701171875, + "learning_rate": 3.972131612992965e-05, + "loss": 161.1643, + "step": 36970 + }, + { + "epoch": 0.1494038793295006, + "grad_norm": 613.9271240234375, + "learning_rate": 3.972085138488826e-05, + "loss": 134.9328, + "step": 36980 + }, + { + "epoch": 0.14944428059486822, + "grad_norm": 2859.52001953125, + "learning_rate": 3.972038625537873e-05, + "loss": 139.243, + "step": 36990 + }, + { + "epoch": 0.14948468186023586, + "grad_norm": 541.2049560546875, + "learning_rate": 3.97199207414101e-05, + "loss": 118.2911, + "step": 37000 + }, + { + "epoch": 0.1495250831256035, + "grad_norm": 590.9548950195312, + "learning_rate": 3.9719454842991474e-05, + "loss": 103.3111, + "step": 37010 + }, + { + "epoch": 0.14956548439097112, + "grad_norm": 1368.8582763671875, + "learning_rate": 3.9718988560131915e-05, + "loss": 124.2333, + "step": 37020 + }, + { + "epoch": 0.14960588565633876, + "grad_norm": 1176.093994140625, + "learning_rate": 3.971852189284053e-05, + "loss": 120.1317, + "step": 37030 + }, + { + "epoch": 0.1496462869217064, + "grad_norm": 757.4658203125, + "learning_rate": 3.9718054841126406e-05, + "loss": 88.7326, + "step": 37040 + }, + { + "epoch": 0.149686688187074, + "grad_norm": 621.4666137695312, + "learning_rate": 3.9717587404998645e-05, + "loss": 100.7808, + "step": 37050 + }, + { + "epoch": 0.14972708945244165, + "grad_norm": 1107.5560302734375, + "learning_rate": 3.971711958446638e-05, + "loss": 148.3183, + "step": 37060 + }, + { + "epoch": 0.1497674907178093, + "grad_norm": 497.7505798339844, + "learning_rate": 3.9716651379538704e-05, + "loss": 113.9419, + "step": 37070 + }, + { + "epoch": 0.1498078919831769, + "grad_norm": 441.3013916015625, + "learning_rate": 3.971618279022477e-05, + "loss": 90.6009, + "step": 37080 + }, + { + "epoch": 0.14984829324854454, + "grad_norm": 476.7674865722656, + "learning_rate": 3.9715713816533695e-05, + "loss": 72.6046, + "step": 37090 + }, + { + "epoch": 0.14988869451391218, + "grad_norm": 1351.2138671875, + "learning_rate": 3.971524445847463e-05, + "loss": 151.4959, + "step": 37100 + }, + { + "epoch": 0.14992909577927982, + "grad_norm": 841.2885131835938, + "learning_rate": 3.971477471605672e-05, + "loss": 125.0131, + "step": 37110 + }, + { + "epoch": 0.14996949704464743, + "grad_norm": 3425.522705078125, + "learning_rate": 3.9714304589289134e-05, + "loss": 131.5076, + "step": 37120 + }, + { + "epoch": 0.15000989831001507, + "grad_norm": 983.3382568359375, + "learning_rate": 3.971383407818103e-05, + "loss": 179.2958, + "step": 37130 + }, + { + "epoch": 0.1500502995753827, + "grad_norm": 1824.768310546875, + "learning_rate": 3.971336318274158e-05, + "loss": 172.1847, + "step": 37140 + }, + { + "epoch": 0.15009070084075032, + "grad_norm": 389.3907470703125, + "learning_rate": 3.971289190297997e-05, + "loss": 113.6344, + "step": 37150 + }, + { + "epoch": 0.15013110210611796, + "grad_norm": 1252.8125, + "learning_rate": 3.971242023890537e-05, + "loss": 133.9993, + "step": 37160 + }, + { + "epoch": 0.1501715033714856, + "grad_norm": 945.156982421875, + "learning_rate": 3.9711948190526997e-05, + "loss": 146.0206, + "step": 37170 + }, + { + "epoch": 0.15021190463685322, + "grad_norm": 867.122802734375, + "learning_rate": 3.9711475757854045e-05, + "loss": 98.2192, + "step": 37180 + }, + { + "epoch": 0.15025230590222086, + "grad_norm": 1160.4625244140625, + "learning_rate": 3.971100294089573e-05, + "loss": 116.1852, + "step": 37190 + }, + { + "epoch": 0.1502927071675885, + "grad_norm": 833.1032104492188, + "learning_rate": 3.971052973966126e-05, + "loss": 150.177, + "step": 37200 + }, + { + "epoch": 0.1503331084329561, + "grad_norm": 613.8431396484375, + "learning_rate": 3.9710056154159865e-05, + "loss": 147.9923, + "step": 37210 + }, + { + "epoch": 0.15037350969832375, + "grad_norm": 750.964111328125, + "learning_rate": 3.970958218440078e-05, + "loss": 142.328, + "step": 37220 + }, + { + "epoch": 0.1504139109636914, + "grad_norm": 825.981689453125, + "learning_rate": 3.970910783039324e-05, + "loss": 82.9531, + "step": 37230 + }, + { + "epoch": 0.150454312229059, + "grad_norm": 359.0674743652344, + "learning_rate": 3.97086330921465e-05, + "loss": 82.3714, + "step": 37240 + }, + { + "epoch": 0.15049471349442664, + "grad_norm": 751.1007080078125, + "learning_rate": 3.970815796966981e-05, + "loss": 149.2768, + "step": 37250 + }, + { + "epoch": 0.15053511475979428, + "grad_norm": 687.0623779296875, + "learning_rate": 3.970768246297243e-05, + "loss": 125.8564, + "step": 37260 + }, + { + "epoch": 0.15057551602516192, + "grad_norm": 865.3146362304688, + "learning_rate": 3.970720657206363e-05, + "loss": 131.9514, + "step": 37270 + }, + { + "epoch": 0.15061591729052953, + "grad_norm": 971.5889892578125, + "learning_rate": 3.97067302969527e-05, + "loss": 120.4343, + "step": 37280 + }, + { + "epoch": 0.15065631855589717, + "grad_norm": 1014.9862670898438, + "learning_rate": 3.970625363764892e-05, + "loss": 155.3358, + "step": 37290 + }, + { + "epoch": 0.1506967198212648, + "grad_norm": 909.3902587890625, + "learning_rate": 3.970577659416158e-05, + "loss": 82.1753, + "step": 37300 + }, + { + "epoch": 0.15073712108663243, + "grad_norm": 773.7681884765625, + "learning_rate": 3.970529916649997e-05, + "loss": 85.6293, + "step": 37310 + }, + { + "epoch": 0.15077752235200007, + "grad_norm": 586.37890625, + "learning_rate": 3.9704821354673414e-05, + "loss": 60.3773, + "step": 37320 + }, + { + "epoch": 0.1508179236173677, + "grad_norm": 1252.96142578125, + "learning_rate": 3.9704343158691226e-05, + "loss": 140.7164, + "step": 37330 + }, + { + "epoch": 0.15085832488273532, + "grad_norm": 701.724609375, + "learning_rate": 3.970386457856271e-05, + "loss": 73.8738, + "step": 37340 + }, + { + "epoch": 0.15089872614810296, + "grad_norm": 508.29498291015625, + "learning_rate": 3.9703385614297224e-05, + "loss": 141.6626, + "step": 37350 + }, + { + "epoch": 0.1509391274134706, + "grad_norm": 828.1369018554688, + "learning_rate": 3.9702906265904085e-05, + "loss": 125.9715, + "step": 37360 + }, + { + "epoch": 0.1509795286788382, + "grad_norm": 585.5205688476562, + "learning_rate": 3.9702426533392645e-05, + "loss": 132.5426, + "step": 37370 + }, + { + "epoch": 0.15101992994420585, + "grad_norm": 2642.8525390625, + "learning_rate": 3.9701946416772254e-05, + "loss": 121.5589, + "step": 37380 + }, + { + "epoch": 0.1510603312095735, + "grad_norm": 976.0630493164062, + "learning_rate": 3.970146591605228e-05, + "loss": 94.591, + "step": 37390 + }, + { + "epoch": 0.1511007324749411, + "grad_norm": 970.7144165039062, + "learning_rate": 3.9700985031242076e-05, + "loss": 105.7089, + "step": 37400 + }, + { + "epoch": 0.15114113374030874, + "grad_norm": 535.1576538085938, + "learning_rate": 3.970050376235104e-05, + "loss": 134.483, + "step": 37410 + }, + { + "epoch": 0.15118153500567638, + "grad_norm": 959.9027099609375, + "learning_rate": 3.970002210938853e-05, + "loss": 139.8605, + "step": 37420 + }, + { + "epoch": 0.15122193627104402, + "grad_norm": 664.1026611328125, + "learning_rate": 3.969954007236395e-05, + "loss": 102.1939, + "step": 37430 + }, + { + "epoch": 0.15126233753641163, + "grad_norm": 1314.028564453125, + "learning_rate": 3.969905765128669e-05, + "loss": 93.3572, + "step": 37440 + }, + { + "epoch": 0.15130273880177927, + "grad_norm": 775.252685546875, + "learning_rate": 3.969857484616616e-05, + "loss": 97.3264, + "step": 37450 + }, + { + "epoch": 0.15134314006714691, + "grad_norm": 1208.257080078125, + "learning_rate": 3.969809165701177e-05, + "loss": 150.1268, + "step": 37460 + }, + { + "epoch": 0.15138354133251453, + "grad_norm": 703.0592041015625, + "learning_rate": 3.969760808383295e-05, + "loss": 108.2553, + "step": 37470 + }, + { + "epoch": 0.15142394259788217, + "grad_norm": 1119.12744140625, + "learning_rate": 3.969712412663912e-05, + "loss": 77.3374, + "step": 37480 + }, + { + "epoch": 0.1514643438632498, + "grad_norm": 539.2511596679688, + "learning_rate": 3.969663978543971e-05, + "loss": 116.8411, + "step": 37490 + }, + { + "epoch": 0.15150474512861742, + "grad_norm": 491.09552001953125, + "learning_rate": 3.9696155060244166e-05, + "loss": 135.4651, + "step": 37500 + }, + { + "epoch": 0.15154514639398506, + "grad_norm": 711.6948852539062, + "learning_rate": 3.969566995106194e-05, + "loss": 108.7477, + "step": 37510 + }, + { + "epoch": 0.1515855476593527, + "grad_norm": 756.9710693359375, + "learning_rate": 3.969518445790248e-05, + "loss": 100.8946, + "step": 37520 + }, + { + "epoch": 0.1516259489247203, + "grad_norm": 960.282958984375, + "learning_rate": 3.9694698580775275e-05, + "loss": 123.249, + "step": 37530 + }, + { + "epoch": 0.15166635019008795, + "grad_norm": 748.89453125, + "learning_rate": 3.969421231968977e-05, + "loss": 88.6761, + "step": 37540 + }, + { + "epoch": 0.1517067514554556, + "grad_norm": 1657.08251953125, + "learning_rate": 3.9693725674655465e-05, + "loss": 112.9514, + "step": 37550 + }, + { + "epoch": 0.1517471527208232, + "grad_norm": 985.081298828125, + "learning_rate": 3.969323864568183e-05, + "loss": 114.0746, + "step": 37560 + }, + { + "epoch": 0.15178755398619084, + "grad_norm": 441.412109375, + "learning_rate": 3.9692751232778376e-05, + "loss": 124.7877, + "step": 37570 + }, + { + "epoch": 0.15182795525155848, + "grad_norm": 871.7003784179688, + "learning_rate": 3.96922634359546e-05, + "loss": 94.3804, + "step": 37580 + }, + { + "epoch": 0.15186835651692612, + "grad_norm": 1066.1202392578125, + "learning_rate": 3.969177525522e-05, + "loss": 80.1215, + "step": 37590 + }, + { + "epoch": 0.15190875778229374, + "grad_norm": 1130.20751953125, + "learning_rate": 3.969128669058411e-05, + "loss": 149.7896, + "step": 37600 + }, + { + "epoch": 0.15194915904766138, + "grad_norm": 774.4163818359375, + "learning_rate": 3.969079774205645e-05, + "loss": 103.4926, + "step": 37610 + }, + { + "epoch": 0.15198956031302902, + "grad_norm": 346.12835693359375, + "learning_rate": 3.969030840964655e-05, + "loss": 114.814, + "step": 37620 + }, + { + "epoch": 0.15202996157839663, + "grad_norm": 953.380126953125, + "learning_rate": 3.968981869336395e-05, + "loss": 148.6479, + "step": 37630 + }, + { + "epoch": 0.15207036284376427, + "grad_norm": 845.9201049804688, + "learning_rate": 3.968932859321819e-05, + "loss": 140.08, + "step": 37640 + }, + { + "epoch": 0.1521107641091319, + "grad_norm": 1350.3818359375, + "learning_rate": 3.9688838109218836e-05, + "loss": 127.5974, + "step": 37650 + }, + { + "epoch": 0.15215116537449952, + "grad_norm": 922.68701171875, + "learning_rate": 3.968834724137545e-05, + "loss": 78.5251, + "step": 37660 + }, + { + "epoch": 0.15219156663986716, + "grad_norm": 883.6046142578125, + "learning_rate": 3.96878559896976e-05, + "loss": 128.7304, + "step": 37670 + }, + { + "epoch": 0.1522319679052348, + "grad_norm": 938.151123046875, + "learning_rate": 3.9687364354194854e-05, + "loss": 113.7263, + "step": 37680 + }, + { + "epoch": 0.1522723691706024, + "grad_norm": 738.2139892578125, + "learning_rate": 3.968687233487681e-05, + "loss": 73.6774, + "step": 37690 + }, + { + "epoch": 0.15231277043597005, + "grad_norm": 1101.00732421875, + "learning_rate": 3.968637993175305e-05, + "loss": 144.5813, + "step": 37700 + }, + { + "epoch": 0.1523531717013377, + "grad_norm": 641.3162231445312, + "learning_rate": 3.9685887144833185e-05, + "loss": 103.3363, + "step": 37710 + }, + { + "epoch": 0.1523935729667053, + "grad_norm": 1423.9307861328125, + "learning_rate": 3.9685393974126804e-05, + "loss": 112.4142, + "step": 37720 + }, + { + "epoch": 0.15243397423207294, + "grad_norm": 545.1290283203125, + "learning_rate": 3.9684900419643544e-05, + "loss": 55.9876, + "step": 37730 + }, + { + "epoch": 0.15247437549744058, + "grad_norm": 1194.0087890625, + "learning_rate": 3.968440648139301e-05, + "loss": 121.0328, + "step": 37740 + }, + { + "epoch": 0.15251477676280822, + "grad_norm": 723.3049926757812, + "learning_rate": 3.968391215938484e-05, + "loss": 90.9158, + "step": 37750 + }, + { + "epoch": 0.15255517802817584, + "grad_norm": 1336.587158203125, + "learning_rate": 3.968341745362867e-05, + "loss": 149.7606, + "step": 37760 + }, + { + "epoch": 0.15259557929354348, + "grad_norm": 621.4027099609375, + "learning_rate": 3.968292236413414e-05, + "loss": 167.5228, + "step": 37770 + }, + { + "epoch": 0.15263598055891112, + "grad_norm": 775.5252075195312, + "learning_rate": 3.96824268909109e-05, + "loss": 107.8637, + "step": 37780 + }, + { + "epoch": 0.15267638182427873, + "grad_norm": 636.5077514648438, + "learning_rate": 3.9681931033968625e-05, + "loss": 103.2506, + "step": 37790 + }, + { + "epoch": 0.15271678308964637, + "grad_norm": 732.7721557617188, + "learning_rate": 3.9681434793316966e-05, + "loss": 77.8052, + "step": 37800 + }, + { + "epoch": 0.152757184355014, + "grad_norm": 829.94970703125, + "learning_rate": 3.96809381689656e-05, + "loss": 150.8947, + "step": 37810 + }, + { + "epoch": 0.15279758562038162, + "grad_norm": 1071.7393798828125, + "learning_rate": 3.9680441160924225e-05, + "loss": 140.2972, + "step": 37820 + }, + { + "epoch": 0.15283798688574926, + "grad_norm": 1150.0419921875, + "learning_rate": 3.96799437692025e-05, + "loss": 168.181, + "step": 37830 + }, + { + "epoch": 0.1528783881511169, + "grad_norm": 1009.7350463867188, + "learning_rate": 3.967944599381015e-05, + "loss": 173.8135, + "step": 37840 + }, + { + "epoch": 0.1529187894164845, + "grad_norm": 1642.1202392578125, + "learning_rate": 3.9678947834756864e-05, + "loss": 139.2285, + "step": 37850 + }, + { + "epoch": 0.15295919068185215, + "grad_norm": 616.789306640625, + "learning_rate": 3.967844929205236e-05, + "loss": 105.676, + "step": 37860 + }, + { + "epoch": 0.1529995919472198, + "grad_norm": 721.6766967773438, + "learning_rate": 3.9677950365706365e-05, + "loss": 98.8448, + "step": 37870 + }, + { + "epoch": 0.1530399932125874, + "grad_norm": 944.2981567382812, + "learning_rate": 3.967745105572858e-05, + "loss": 163.7818, + "step": 37880 + }, + { + "epoch": 0.15308039447795505, + "grad_norm": 577.9256591796875, + "learning_rate": 3.967695136212877e-05, + "loss": 101.1067, + "step": 37890 + }, + { + "epoch": 0.15312079574332269, + "grad_norm": 655.0137939453125, + "learning_rate": 3.967645128491666e-05, + "loss": 64.8816, + "step": 37900 + }, + { + "epoch": 0.15316119700869033, + "grad_norm": 715.8300170898438, + "learning_rate": 3.967595082410199e-05, + "loss": 100.8904, + "step": 37910 + }, + { + "epoch": 0.15320159827405794, + "grad_norm": 456.46612548828125, + "learning_rate": 3.967544997969454e-05, + "loss": 86.7138, + "step": 37920 + }, + { + "epoch": 0.15324199953942558, + "grad_norm": 678.5048217773438, + "learning_rate": 3.967494875170406e-05, + "loss": 156.0822, + "step": 37930 + }, + { + "epoch": 0.15328240080479322, + "grad_norm": 558.9214477539062, + "learning_rate": 3.967444714014032e-05, + "loss": 119.2672, + "step": 37940 + }, + { + "epoch": 0.15332280207016083, + "grad_norm": 816.8359985351562, + "learning_rate": 3.9673945145013114e-05, + "loss": 109.1232, + "step": 37950 + }, + { + "epoch": 0.15336320333552847, + "grad_norm": 400.4376525878906, + "learning_rate": 3.967344276633222e-05, + "loss": 97.8945, + "step": 37960 + }, + { + "epoch": 0.1534036046008961, + "grad_norm": 676.3570556640625, + "learning_rate": 3.9672940004107426e-05, + "loss": 134.8128, + "step": 37970 + }, + { + "epoch": 0.15344400586626372, + "grad_norm": 790.1891479492188, + "learning_rate": 3.967243685834854e-05, + "loss": 135.2298, + "step": 37980 + }, + { + "epoch": 0.15348440713163136, + "grad_norm": 683.4686889648438, + "learning_rate": 3.967193332906537e-05, + "loss": 158.2055, + "step": 37990 + }, + { + "epoch": 0.153524808396999, + "grad_norm": 1344.2230224609375, + "learning_rate": 3.967142941626772e-05, + "loss": 95.2672, + "step": 38000 + }, + { + "epoch": 0.15356520966236661, + "grad_norm": 698.2613525390625, + "learning_rate": 3.9670925119965434e-05, + "loss": 112.5602, + "step": 38010 + }, + { + "epoch": 0.15360561092773425, + "grad_norm": 567.7960815429688, + "learning_rate": 3.9670420440168335e-05, + "loss": 119.5437, + "step": 38020 + }, + { + "epoch": 0.1536460121931019, + "grad_norm": 1655.9705810546875, + "learning_rate": 3.9669915376886265e-05, + "loss": 109.5402, + "step": 38030 + }, + { + "epoch": 0.1536864134584695, + "grad_norm": 939.1187133789062, + "learning_rate": 3.966940993012907e-05, + "loss": 108.8111, + "step": 38040 + }, + { + "epoch": 0.15372681472383715, + "grad_norm": 968.599609375, + "learning_rate": 3.96689040999066e-05, + "loss": 97.9176, + "step": 38050 + }, + { + "epoch": 0.1537672159892048, + "grad_norm": 1038.843017578125, + "learning_rate": 3.966839788622872e-05, + "loss": 131.0141, + "step": 38060 + }, + { + "epoch": 0.15380761725457243, + "grad_norm": 2315.626953125, + "learning_rate": 3.966789128910529e-05, + "loss": 81.3225, + "step": 38070 + }, + { + "epoch": 0.15384801851994004, + "grad_norm": 626.8364868164062, + "learning_rate": 3.966738430854619e-05, + "loss": 122.9847, + "step": 38080 + }, + { + "epoch": 0.15388841978530768, + "grad_norm": 331.8294982910156, + "learning_rate": 3.966687694456132e-05, + "loss": 116.036, + "step": 38090 + }, + { + "epoch": 0.15392882105067532, + "grad_norm": 708.722412109375, + "learning_rate": 3.966636919716056e-05, + "loss": 115.2567, + "step": 38100 + }, + { + "epoch": 0.15396922231604293, + "grad_norm": 1237.1561279296875, + "learning_rate": 3.966586106635379e-05, + "loss": 100.6749, + "step": 38110 + }, + { + "epoch": 0.15400962358141057, + "grad_norm": 589.6749877929688, + "learning_rate": 3.966535255215095e-05, + "loss": 123.7184, + "step": 38120 + }, + { + "epoch": 0.1540500248467782, + "grad_norm": 1178.8912353515625, + "learning_rate": 3.966484365456193e-05, + "loss": 106.2758, + "step": 38130 + }, + { + "epoch": 0.15409042611214582, + "grad_norm": 827.1260375976562, + "learning_rate": 3.966433437359667e-05, + "loss": 120.8724, + "step": 38140 + }, + { + "epoch": 0.15413082737751346, + "grad_norm": 689.5697021484375, + "learning_rate": 3.9663824709265075e-05, + "loss": 104.2138, + "step": 38150 + }, + { + "epoch": 0.1541712286428811, + "grad_norm": 792.0753173828125, + "learning_rate": 3.96633146615771e-05, + "loss": 111.7994, + "step": 38160 + }, + { + "epoch": 0.15421162990824872, + "grad_norm": 810.0701904296875, + "learning_rate": 3.9662804230542676e-05, + "loss": 104.9478, + "step": 38170 + }, + { + "epoch": 0.15425203117361636, + "grad_norm": 913.0693969726562, + "learning_rate": 3.966229341617177e-05, + "loss": 109.3062, + "step": 38180 + }, + { + "epoch": 0.154292432438984, + "grad_norm": 739.4224243164062, + "learning_rate": 3.9661782218474325e-05, + "loss": 88.5594, + "step": 38190 + }, + { + "epoch": 0.1543328337043516, + "grad_norm": 1987.8468017578125, + "learning_rate": 3.966127063746031e-05, + "loss": 93.7035, + "step": 38200 + }, + { + "epoch": 0.15437323496971925, + "grad_norm": 800.012451171875, + "learning_rate": 3.966075867313971e-05, + "loss": 113.1601, + "step": 38210 + }, + { + "epoch": 0.1544136362350869, + "grad_norm": 535.9425048828125, + "learning_rate": 3.966024632552249e-05, + "loss": 116.3206, + "step": 38220 + }, + { + "epoch": 0.15445403750045453, + "grad_norm": 789.9892578125, + "learning_rate": 3.965973359461865e-05, + "loss": 133.4441, + "step": 38230 + }, + { + "epoch": 0.15449443876582214, + "grad_norm": 2969.79296875, + "learning_rate": 3.965922048043818e-05, + "loss": 210.43, + "step": 38240 + }, + { + "epoch": 0.15453484003118978, + "grad_norm": 2127.048828125, + "learning_rate": 3.965870698299109e-05, + "loss": 159.5202, + "step": 38250 + }, + { + "epoch": 0.15457524129655742, + "grad_norm": 645.4761352539062, + "learning_rate": 3.965819310228738e-05, + "loss": 120.3436, + "step": 38260 + }, + { + "epoch": 0.15461564256192503, + "grad_norm": 982.2044067382812, + "learning_rate": 3.965767883833708e-05, + "loss": 151.5558, + "step": 38270 + }, + { + "epoch": 0.15465604382729267, + "grad_norm": 1186.6568603515625, + "learning_rate": 3.965716419115021e-05, + "loss": 125.2752, + "step": 38280 + }, + { + "epoch": 0.1546964450926603, + "grad_norm": 727.8316040039062, + "learning_rate": 3.96566491607368e-05, + "loss": 122.3011, + "step": 38290 + }, + { + "epoch": 0.15473684635802792, + "grad_norm": 421.57379150390625, + "learning_rate": 3.96561337471069e-05, + "loss": 121.1543, + "step": 38300 + }, + { + "epoch": 0.15477724762339556, + "grad_norm": 1260.02587890625, + "learning_rate": 3.965561795027054e-05, + "loss": 105.9478, + "step": 38310 + }, + { + "epoch": 0.1548176488887632, + "grad_norm": 418.11444091796875, + "learning_rate": 3.9655101770237805e-05, + "loss": 159.4605, + "step": 38320 + }, + { + "epoch": 0.15485805015413082, + "grad_norm": 725.11669921875, + "learning_rate": 3.965458520701874e-05, + "loss": 101.5268, + "step": 38330 + }, + { + "epoch": 0.15489845141949846, + "grad_norm": 468.5240478515625, + "learning_rate": 3.965406826062341e-05, + "loss": 100.1309, + "step": 38340 + }, + { + "epoch": 0.1549388526848661, + "grad_norm": 1170.9200439453125, + "learning_rate": 3.96535509310619e-05, + "loss": 85.7728, + "step": 38350 + }, + { + "epoch": 0.1549792539502337, + "grad_norm": 901.2523803710938, + "learning_rate": 3.96530332183443e-05, + "loss": 95.4473, + "step": 38360 + }, + { + "epoch": 0.15501965521560135, + "grad_norm": 660.00244140625, + "learning_rate": 3.96525151224807e-05, + "loss": 88.8342, + "step": 38370 + }, + { + "epoch": 0.155060056480969, + "grad_norm": 893.1664428710938, + "learning_rate": 3.96519966434812e-05, + "loss": 111.5988, + "step": 38380 + }, + { + "epoch": 0.15510045774633663, + "grad_norm": 976.8187255859375, + "learning_rate": 3.965147778135591e-05, + "loss": 99.1879, + "step": 38390 + }, + { + "epoch": 0.15514085901170424, + "grad_norm": 608.680419921875, + "learning_rate": 3.965095853611494e-05, + "loss": 130.9181, + "step": 38400 + }, + { + "epoch": 0.15518126027707188, + "grad_norm": 693.6250610351562, + "learning_rate": 3.9650438907768414e-05, + "loss": 187.105, + "step": 38410 + }, + { + "epoch": 0.15522166154243952, + "grad_norm": 804.358154296875, + "learning_rate": 3.964991889632647e-05, + "loss": 131.4975, + "step": 38420 + }, + { + "epoch": 0.15526206280780713, + "grad_norm": 1707.03759765625, + "learning_rate": 3.964939850179923e-05, + "loss": 119.8642, + "step": 38430 + }, + { + "epoch": 0.15530246407317477, + "grad_norm": 280.5298156738281, + "learning_rate": 3.964887772419687e-05, + "loss": 131.3166, + "step": 38440 + }, + { + "epoch": 0.1553428653385424, + "grad_norm": 591.8248901367188, + "learning_rate": 3.9648356563529506e-05, + "loss": 134.2794, + "step": 38450 + }, + { + "epoch": 0.15538326660391003, + "grad_norm": 776.2308959960938, + "learning_rate": 3.964783501980732e-05, + "loss": 86.9068, + "step": 38460 + }, + { + "epoch": 0.15542366786927767, + "grad_norm": 471.4769592285156, + "learning_rate": 3.9647313093040475e-05, + "loss": 120.0004, + "step": 38470 + }, + { + "epoch": 0.1554640691346453, + "grad_norm": 1563.588134765625, + "learning_rate": 3.964679078323915e-05, + "loss": 147.3916, + "step": 38480 + }, + { + "epoch": 0.15550447040001292, + "grad_norm": 748.78515625, + "learning_rate": 3.9646268090413516e-05, + "loss": 104.8629, + "step": 38490 + }, + { + "epoch": 0.15554487166538056, + "grad_norm": 1032.9627685546875, + "learning_rate": 3.964574501457378e-05, + "loss": 164.7784, + "step": 38500 + }, + { + "epoch": 0.1555852729307482, + "grad_norm": 4509.62841796875, + "learning_rate": 3.964522155573012e-05, + "loss": 132.7616, + "step": 38510 + }, + { + "epoch": 0.1556256741961158, + "grad_norm": 602.1223754882812, + "learning_rate": 3.964469771389276e-05, + "loss": 126.7141, + "step": 38520 + }, + { + "epoch": 0.15566607546148345, + "grad_norm": 1489.5487060546875, + "learning_rate": 3.96441734890719e-05, + "loss": 95.271, + "step": 38530 + }, + { + "epoch": 0.1557064767268511, + "grad_norm": 525.9141845703125, + "learning_rate": 3.964364888127777e-05, + "loss": 116.2935, + "step": 38540 + }, + { + "epoch": 0.15574687799221873, + "grad_norm": 367.6606750488281, + "learning_rate": 3.9643123890520584e-05, + "loss": 97.7154, + "step": 38550 + }, + { + "epoch": 0.15578727925758634, + "grad_norm": 1196.6495361328125, + "learning_rate": 3.9642598516810593e-05, + "loss": 102.2097, + "step": 38560 + }, + { + "epoch": 0.15582768052295398, + "grad_norm": 548.9644165039062, + "learning_rate": 3.9642072760158024e-05, + "loss": 118.3774, + "step": 38570 + }, + { + "epoch": 0.15586808178832162, + "grad_norm": 717.7500610351562, + "learning_rate": 3.964154662057314e-05, + "loss": 110.347, + "step": 38580 + }, + { + "epoch": 0.15590848305368923, + "grad_norm": 1207.1292724609375, + "learning_rate": 3.9641020098066185e-05, + "loss": 120.7089, + "step": 38590 + }, + { + "epoch": 0.15594888431905687, + "grad_norm": 1328.58544921875, + "learning_rate": 3.964049319264744e-05, + "loss": 140.2518, + "step": 38600 + }, + { + "epoch": 0.15598928558442451, + "grad_norm": 993.9035034179688, + "learning_rate": 3.963996590432716e-05, + "loss": 136.1533, + "step": 38610 + }, + { + "epoch": 0.15602968684979213, + "grad_norm": 880.5426025390625, + "learning_rate": 3.963943823311564e-05, + "loss": 105.9826, + "step": 38620 + }, + { + "epoch": 0.15607008811515977, + "grad_norm": 481.2868347167969, + "learning_rate": 3.9638910179023156e-05, + "loss": 152.0335, + "step": 38630 + }, + { + "epoch": 0.1561104893805274, + "grad_norm": 654.109130859375, + "learning_rate": 3.963838174206001e-05, + "loss": 125.8354, + "step": 38640 + }, + { + "epoch": 0.15615089064589502, + "grad_norm": 788.1646118164062, + "learning_rate": 3.963785292223651e-05, + "loss": 127.4936, + "step": 38650 + }, + { + "epoch": 0.15619129191126266, + "grad_norm": 376.84765625, + "learning_rate": 3.9637323719562936e-05, + "loss": 79.31, + "step": 38660 + }, + { + "epoch": 0.1562316931766303, + "grad_norm": 1535.444580078125, + "learning_rate": 3.963679413404964e-05, + "loss": 162.5413, + "step": 38670 + }, + { + "epoch": 0.1562720944419979, + "grad_norm": 972.6801147460938, + "learning_rate": 3.963626416570693e-05, + "loss": 128.6161, + "step": 38680 + }, + { + "epoch": 0.15631249570736555, + "grad_norm": 1085.9542236328125, + "learning_rate": 3.963573381454515e-05, + "loss": 128.6144, + "step": 38690 + }, + { + "epoch": 0.1563528969727332, + "grad_norm": 799.5704345703125, + "learning_rate": 3.963520308057462e-05, + "loss": 147.8636, + "step": 38700 + }, + { + "epoch": 0.15639329823810083, + "grad_norm": 959.645263671875, + "learning_rate": 3.96346719638057e-05, + "loss": 77.0807, + "step": 38710 + }, + { + "epoch": 0.15643369950346844, + "grad_norm": 3532.341064453125, + "learning_rate": 3.963414046424874e-05, + "loss": 84.5433, + "step": 38720 + }, + { + "epoch": 0.15647410076883608, + "grad_norm": 1351.4681396484375, + "learning_rate": 3.96336085819141e-05, + "loss": 115.201, + "step": 38730 + }, + { + "epoch": 0.15651450203420372, + "grad_norm": 1061.2412109375, + "learning_rate": 3.9633076316812155e-05, + "loss": 109.8041, + "step": 38740 + }, + { + "epoch": 0.15655490329957134, + "grad_norm": 462.3342590332031, + "learning_rate": 3.9632543668953284e-05, + "loss": 120.1956, + "step": 38750 + }, + { + "epoch": 0.15659530456493898, + "grad_norm": 775.7161254882812, + "learning_rate": 3.9632010638347865e-05, + "loss": 87.7154, + "step": 38760 + }, + { + "epoch": 0.15663570583030662, + "grad_norm": 877.742431640625, + "learning_rate": 3.9631477225006285e-05, + "loss": 148.5345, + "step": 38770 + }, + { + "epoch": 0.15667610709567423, + "grad_norm": 3709.60107421875, + "learning_rate": 3.963094342893896e-05, + "loss": 83.1346, + "step": 38780 + }, + { + "epoch": 0.15671650836104187, + "grad_norm": 851.2977905273438, + "learning_rate": 3.963040925015628e-05, + "loss": 118.0755, + "step": 38790 + }, + { + "epoch": 0.1567569096264095, + "grad_norm": 484.7431335449219, + "learning_rate": 3.962987468866866e-05, + "loss": 99.6096, + "step": 38800 + }, + { + "epoch": 0.15679731089177712, + "grad_norm": 933.8782958984375, + "learning_rate": 3.9629339744486534e-05, + "loss": 107.5517, + "step": 38810 + }, + { + "epoch": 0.15683771215714476, + "grad_norm": 1589.8094482421875, + "learning_rate": 3.962880441762032e-05, + "loss": 155.7309, + "step": 38820 + }, + { + "epoch": 0.1568781134225124, + "grad_norm": 679.9189453125, + "learning_rate": 3.962826870808046e-05, + "loss": 135.3476, + "step": 38830 + }, + { + "epoch": 0.15691851468788, + "grad_norm": 514.5511474609375, + "learning_rate": 3.96277326158774e-05, + "loss": 98.1747, + "step": 38840 + }, + { + "epoch": 0.15695891595324765, + "grad_norm": 521.84326171875, + "learning_rate": 3.962719614102158e-05, + "loss": 118.8433, + "step": 38850 + }, + { + "epoch": 0.1569993172186153, + "grad_norm": 2646.55712890625, + "learning_rate": 3.9626659283523475e-05, + "loss": 98.1827, + "step": 38860 + }, + { + "epoch": 0.15703971848398293, + "grad_norm": 628.1710205078125, + "learning_rate": 3.9626122043393535e-05, + "loss": 111.3554, + "step": 38870 + }, + { + "epoch": 0.15708011974935054, + "grad_norm": 319.53131103515625, + "learning_rate": 3.9625584420642245e-05, + "loss": 114.2782, + "step": 38880 + }, + { + "epoch": 0.15712052101471818, + "grad_norm": 803.2861938476562, + "learning_rate": 3.962504641528009e-05, + "loss": 82.3757, + "step": 38890 + }, + { + "epoch": 0.15716092228008582, + "grad_norm": 457.76617431640625, + "learning_rate": 3.962450802731754e-05, + "loss": 100.112, + "step": 38900 + }, + { + "epoch": 0.15720132354545344, + "grad_norm": 1044.833984375, + "learning_rate": 3.962396925676511e-05, + "loss": 115.967, + "step": 38910 + }, + { + "epoch": 0.15724172481082108, + "grad_norm": 734.3097534179688, + "learning_rate": 3.962343010363329e-05, + "loss": 123.0896, + "step": 38920 + }, + { + "epoch": 0.15728212607618872, + "grad_norm": 1450.293212890625, + "learning_rate": 3.9622890567932605e-05, + "loss": 118.8325, + "step": 38930 + }, + { + "epoch": 0.15732252734155633, + "grad_norm": 1147.03515625, + "learning_rate": 3.962235064967356e-05, + "loss": 139.3275, + "step": 38940 + }, + { + "epoch": 0.15736292860692397, + "grad_norm": 822.2531127929688, + "learning_rate": 3.962181034886668e-05, + "loss": 136.8151, + "step": 38950 + }, + { + "epoch": 0.1574033298722916, + "grad_norm": 914.1550903320312, + "learning_rate": 3.9621269665522516e-05, + "loss": 100.7557, + "step": 38960 + }, + { + "epoch": 0.15744373113765922, + "grad_norm": 844.9164428710938, + "learning_rate": 3.9620728599651596e-05, + "loss": 142.2148, + "step": 38970 + }, + { + "epoch": 0.15748413240302686, + "grad_norm": 486.3018798828125, + "learning_rate": 3.9620187151264474e-05, + "loss": 123.6527, + "step": 38980 + }, + { + "epoch": 0.1575245336683945, + "grad_norm": 1604.903564453125, + "learning_rate": 3.961964532037169e-05, + "loss": 130.6107, + "step": 38990 + }, + { + "epoch": 0.1575649349337621, + "grad_norm": 525.8357543945312, + "learning_rate": 3.9619103106983835e-05, + "loss": 153.1113, + "step": 39000 + }, + { + "epoch": 0.15760533619912975, + "grad_norm": 1270.010009765625, + "learning_rate": 3.961856051111146e-05, + "loss": 119.7096, + "step": 39010 + }, + { + "epoch": 0.1576457374644974, + "grad_norm": 528.0458374023438, + "learning_rate": 3.961801753276514e-05, + "loss": 52.9746, + "step": 39020 + }, + { + "epoch": 0.15768613872986503, + "grad_norm": 881.9655151367188, + "learning_rate": 3.9617474171955475e-05, + "loss": 120.4968, + "step": 39030 + }, + { + "epoch": 0.15772653999523265, + "grad_norm": 406.15484619140625, + "learning_rate": 3.961693042869305e-05, + "loss": 108.1339, + "step": 39040 + }, + { + "epoch": 0.15776694126060029, + "grad_norm": 1219.187744140625, + "learning_rate": 3.961638630298847e-05, + "loss": 131.1615, + "step": 39050 + }, + { + "epoch": 0.15780734252596793, + "grad_norm": 853.8887329101562, + "learning_rate": 3.9615841794852336e-05, + "loss": 119.6812, + "step": 39060 + }, + { + "epoch": 0.15784774379133554, + "grad_norm": 389.75750732421875, + "learning_rate": 3.9615296904295264e-05, + "loss": 154.2032, + "step": 39070 + }, + { + "epoch": 0.15788814505670318, + "grad_norm": 526.4818115234375, + "learning_rate": 3.961475163132789e-05, + "loss": 68.8153, + "step": 39080 + }, + { + "epoch": 0.15792854632207082, + "grad_norm": 1525.848876953125, + "learning_rate": 3.961420597596082e-05, + "loss": 169.0312, + "step": 39090 + }, + { + "epoch": 0.15796894758743843, + "grad_norm": 718.8117065429688, + "learning_rate": 3.961365993820471e-05, + "loss": 81.0678, + "step": 39100 + }, + { + "epoch": 0.15800934885280607, + "grad_norm": 2585.33349609375, + "learning_rate": 3.961311351807022e-05, + "loss": 104.9615, + "step": 39110 + }, + { + "epoch": 0.1580497501181737, + "grad_norm": 682.7783203125, + "learning_rate": 3.961256671556796e-05, + "loss": 109.1929, + "step": 39120 + }, + { + "epoch": 0.15809015138354132, + "grad_norm": 1699.18896484375, + "learning_rate": 3.961201953070863e-05, + "loss": 90.2374, + "step": 39130 + }, + { + "epoch": 0.15813055264890896, + "grad_norm": 429.6229553222656, + "learning_rate": 3.961147196350288e-05, + "loss": 123.2978, + "step": 39140 + }, + { + "epoch": 0.1581709539142766, + "grad_norm": 659.725341796875, + "learning_rate": 3.9610924013961376e-05, + "loss": 85.841, + "step": 39150 + }, + { + "epoch": 0.15821135517964421, + "grad_norm": 1210.6920166015625, + "learning_rate": 3.9610375682094824e-05, + "loss": 122.0328, + "step": 39160 + }, + { + "epoch": 0.15825175644501185, + "grad_norm": 1622.5048828125, + "learning_rate": 3.96098269679139e-05, + "loss": 103.6573, + "step": 39170 + }, + { + "epoch": 0.1582921577103795, + "grad_norm": 460.536376953125, + "learning_rate": 3.96092778714293e-05, + "loss": 154.8165, + "step": 39180 + }, + { + "epoch": 0.15833255897574713, + "grad_norm": 519.8292236328125, + "learning_rate": 3.9608728392651734e-05, + "loss": 71.9869, + "step": 39190 + }, + { + "epoch": 0.15837296024111475, + "grad_norm": 1789.3155517578125, + "learning_rate": 3.960817853159192e-05, + "loss": 112.8768, + "step": 39200 + }, + { + "epoch": 0.1584133615064824, + "grad_norm": 667.8405151367188, + "learning_rate": 3.960762828826056e-05, + "loss": 112.1022, + "step": 39210 + }, + { + "epoch": 0.15845376277185003, + "grad_norm": 951.6414794921875, + "learning_rate": 3.96070776626684e-05, + "loss": 103.3248, + "step": 39220 + }, + { + "epoch": 0.15849416403721764, + "grad_norm": 407.44793701171875, + "learning_rate": 3.9606526654826154e-05, + "loss": 105.8338, + "step": 39230 + }, + { + "epoch": 0.15853456530258528, + "grad_norm": 606.3262329101562, + "learning_rate": 3.960597526474459e-05, + "loss": 132.6254, + "step": 39240 + }, + { + "epoch": 0.15857496656795292, + "grad_norm": 528.1349487304688, + "learning_rate": 3.9605423492434444e-05, + "loss": 66.8778, + "step": 39250 + }, + { + "epoch": 0.15861536783332053, + "grad_norm": 637.8035278320312, + "learning_rate": 3.9604871337906466e-05, + "loss": 96.8847, + "step": 39260 + }, + { + "epoch": 0.15865576909868817, + "grad_norm": 1047.2392578125, + "learning_rate": 3.960431880117143e-05, + "loss": 120.0049, + "step": 39270 + }, + { + "epoch": 0.1586961703640558, + "grad_norm": 621.6198120117188, + "learning_rate": 3.96037658822401e-05, + "loss": 99.7914, + "step": 39280 + }, + { + "epoch": 0.15873657162942342, + "grad_norm": 953.017333984375, + "learning_rate": 3.960321258112328e-05, + "loss": 74.6426, + "step": 39290 + }, + { + "epoch": 0.15877697289479106, + "grad_norm": 1853.073486328125, + "learning_rate": 3.960265889783173e-05, + "loss": 112.9975, + "step": 39300 + }, + { + "epoch": 0.1588173741601587, + "grad_norm": 1306.50830078125, + "learning_rate": 3.9602104832376244e-05, + "loss": 88.762, + "step": 39310 + }, + { + "epoch": 0.15885777542552632, + "grad_norm": 1058.32373046875, + "learning_rate": 3.960155038476764e-05, + "loss": 94.9559, + "step": 39320 + }, + { + "epoch": 0.15889817669089396, + "grad_norm": 749.2457885742188, + "learning_rate": 3.9600995555016715e-05, + "loss": 133.8143, + "step": 39330 + }, + { + "epoch": 0.1589385779562616, + "grad_norm": 1269.5340576171875, + "learning_rate": 3.960044034313429e-05, + "loss": 106.596, + "step": 39340 + }, + { + "epoch": 0.15897897922162924, + "grad_norm": 1215.6334228515625, + "learning_rate": 3.9599884749131195e-05, + "loss": 178.2509, + "step": 39350 + }, + { + "epoch": 0.15901938048699685, + "grad_norm": 1227.5489501953125, + "learning_rate": 3.9599328773018255e-05, + "loss": 137.344, + "step": 39360 + }, + { + "epoch": 0.1590597817523645, + "grad_norm": 913.0560913085938, + "learning_rate": 3.9598772414806306e-05, + "loss": 144.3639, + "step": 39370 + }, + { + "epoch": 0.15910018301773213, + "grad_norm": 1552.5155029296875, + "learning_rate": 3.95982156745062e-05, + "loss": 96.6038, + "step": 39380 + }, + { + "epoch": 0.15914058428309974, + "grad_norm": 577.2279663085938, + "learning_rate": 3.95976585521288e-05, + "loss": 129.0721, + "step": 39390 + }, + { + "epoch": 0.15918098554846738, + "grad_norm": 641.37109375, + "learning_rate": 3.959710104768494e-05, + "loss": 108.1936, + "step": 39400 + }, + { + "epoch": 0.15922138681383502, + "grad_norm": 292.9571533203125, + "learning_rate": 3.9596543161185515e-05, + "loss": 115.9226, + "step": 39410 + }, + { + "epoch": 0.15926178807920263, + "grad_norm": 1436.6839599609375, + "learning_rate": 3.959598489264139e-05, + "loss": 101.0594, + "step": 39420 + }, + { + "epoch": 0.15930218934457027, + "grad_norm": 1198.4189453125, + "learning_rate": 3.959542624206346e-05, + "loss": 160.2221, + "step": 39430 + }, + { + "epoch": 0.1593425906099379, + "grad_norm": 347.4733581542969, + "learning_rate": 3.9594867209462594e-05, + "loss": 105.2, + "step": 39440 + }, + { + "epoch": 0.15938299187530552, + "grad_norm": 615.9927368164062, + "learning_rate": 3.959430779484971e-05, + "loss": 125.8379, + "step": 39450 + }, + { + "epoch": 0.15942339314067316, + "grad_norm": 764.7908935546875, + "learning_rate": 3.9593747998235696e-05, + "loss": 182.7966, + "step": 39460 + }, + { + "epoch": 0.1594637944060408, + "grad_norm": 1084.840087890625, + "learning_rate": 3.9593187819631496e-05, + "loss": 128.6358, + "step": 39470 + }, + { + "epoch": 0.15950419567140842, + "grad_norm": 1884.3834228515625, + "learning_rate": 3.9592627259048e-05, + "loss": 119.4425, + "step": 39480 + }, + { + "epoch": 0.15954459693677606, + "grad_norm": 1295.892578125, + "learning_rate": 3.9592066316496155e-05, + "loss": 101.9265, + "step": 39490 + }, + { + "epoch": 0.1595849982021437, + "grad_norm": 550.74560546875, + "learning_rate": 3.959150499198688e-05, + "loss": 94.6747, + "step": 39500 + }, + { + "epoch": 0.1596253994675113, + "grad_norm": 957.4488525390625, + "learning_rate": 3.9590943285531146e-05, + "loss": 105.3008, + "step": 39510 + }, + { + "epoch": 0.15966580073287895, + "grad_norm": 799.0433959960938, + "learning_rate": 3.959038119713987e-05, + "loss": 102.7812, + "step": 39520 + }, + { + "epoch": 0.1597062019982466, + "grad_norm": 739.2639770507812, + "learning_rate": 3.958981872682404e-05, + "loss": 133.8768, + "step": 39530 + }, + { + "epoch": 0.15974660326361423, + "grad_norm": 770.6935424804688, + "learning_rate": 3.95892558745946e-05, + "loss": 102.3301, + "step": 39540 + }, + { + "epoch": 0.15978700452898184, + "grad_norm": 559.9129028320312, + "learning_rate": 3.958869264046253e-05, + "loss": 164.0527, + "step": 39550 + }, + { + "epoch": 0.15982740579434948, + "grad_norm": 834.214111328125, + "learning_rate": 3.958812902443882e-05, + "loss": 111.2683, + "step": 39560 + }, + { + "epoch": 0.15986780705971712, + "grad_norm": 434.8836669921875, + "learning_rate": 3.958756502653444e-05, + "loss": 111.8171, + "step": 39570 + }, + { + "epoch": 0.15990820832508473, + "grad_norm": 1380.3211669921875, + "learning_rate": 3.95870006467604e-05, + "loss": 134.5199, + "step": 39580 + }, + { + "epoch": 0.15994860959045237, + "grad_norm": 980.2847290039062, + "learning_rate": 3.9586435885127705e-05, + "loss": 127.9131, + "step": 39590 + }, + { + "epoch": 0.15998901085582, + "grad_norm": 2288.56005859375, + "learning_rate": 3.958587074164735e-05, + "loss": 144.4222, + "step": 39600 + }, + { + "epoch": 0.16002941212118763, + "grad_norm": 811.4932861328125, + "learning_rate": 3.958530521633036e-05, + "loss": 156.4945, + "step": 39610 + }, + { + "epoch": 0.16006981338655527, + "grad_norm": 383.296142578125, + "learning_rate": 3.958473930918777e-05, + "loss": 115.3877, + "step": 39620 + }, + { + "epoch": 0.1601102146519229, + "grad_norm": 595.71044921875, + "learning_rate": 3.95841730202306e-05, + "loss": 153.1669, + "step": 39630 + }, + { + "epoch": 0.16015061591729052, + "grad_norm": 1930.6666259765625, + "learning_rate": 3.958360634946989e-05, + "loss": 154.5227, + "step": 39640 + }, + { + "epoch": 0.16019101718265816, + "grad_norm": 772.9099731445312, + "learning_rate": 3.9583039296916704e-05, + "loss": 109.5913, + "step": 39650 + }, + { + "epoch": 0.1602314184480258, + "grad_norm": 2795.6240234375, + "learning_rate": 3.958247186258208e-05, + "loss": 134.5731, + "step": 39660 + }, + { + "epoch": 0.1602718197133934, + "grad_norm": 782.784423828125, + "learning_rate": 3.9581904046477076e-05, + "loss": 148.3746, + "step": 39670 + }, + { + "epoch": 0.16031222097876105, + "grad_norm": 743.2928466796875, + "learning_rate": 3.958133584861278e-05, + "loss": 102.9782, + "step": 39680 + }, + { + "epoch": 0.1603526222441287, + "grad_norm": 984.4341430664062, + "learning_rate": 3.958076726900026e-05, + "loss": 96.8104, + "step": 39690 + }, + { + "epoch": 0.16039302350949633, + "grad_norm": 850.392822265625, + "learning_rate": 3.95801983076506e-05, + "loss": 67.5797, + "step": 39700 + }, + { + "epoch": 0.16043342477486394, + "grad_norm": 1113.4615478515625, + "learning_rate": 3.957962896457489e-05, + "loss": 93.0962, + "step": 39710 + }, + { + "epoch": 0.16047382604023158, + "grad_norm": 985.4330444335938, + "learning_rate": 3.957905923978424e-05, + "loss": 127.3651, + "step": 39720 + }, + { + "epoch": 0.16051422730559922, + "grad_norm": 993.6034545898438, + "learning_rate": 3.9578489133289745e-05, + "loss": 113.9104, + "step": 39730 + }, + { + "epoch": 0.16055462857096683, + "grad_norm": 1621.978515625, + "learning_rate": 3.9577918645102524e-05, + "loss": 106.7451, + "step": 39740 + }, + { + "epoch": 0.16059502983633447, + "grad_norm": 1079.9647216796875, + "learning_rate": 3.9577347775233705e-05, + "loss": 129.6938, + "step": 39750 + }, + { + "epoch": 0.16063543110170211, + "grad_norm": 748.7120361328125, + "learning_rate": 3.957677652369441e-05, + "loss": 98.8223, + "step": 39760 + }, + { + "epoch": 0.16067583236706973, + "grad_norm": 603.3798828125, + "learning_rate": 3.957620489049577e-05, + "loss": 153.4769, + "step": 39770 + }, + { + "epoch": 0.16071623363243737, + "grad_norm": 1355.56201171875, + "learning_rate": 3.957563287564895e-05, + "loss": 138.715, + "step": 39780 + }, + { + "epoch": 0.160756634897805, + "grad_norm": 1335.77490234375, + "learning_rate": 3.957506047916508e-05, + "loss": 69.2249, + "step": 39790 + }, + { + "epoch": 0.16079703616317262, + "grad_norm": 1391.6043701171875, + "learning_rate": 3.9574487701055326e-05, + "loss": 106.4196, + "step": 39800 + }, + { + "epoch": 0.16083743742854026, + "grad_norm": 1081.728515625, + "learning_rate": 3.9573914541330865e-05, + "loss": 127.9643, + "step": 39810 + }, + { + "epoch": 0.1608778386939079, + "grad_norm": 919.1405639648438, + "learning_rate": 3.957334100000286e-05, + "loss": 107.9311, + "step": 39820 + }, + { + "epoch": 0.1609182399592755, + "grad_norm": 780.0993041992188, + "learning_rate": 3.95727670770825e-05, + "loss": 109.0406, + "step": 39830 + }, + { + "epoch": 0.16095864122464315, + "grad_norm": 533.7987670898438, + "learning_rate": 3.957219277258096e-05, + "loss": 141.7974, + "step": 39840 + }, + { + "epoch": 0.1609990424900108, + "grad_norm": 1860.90771484375, + "learning_rate": 3.957161808650944e-05, + "loss": 82.9148, + "step": 39850 + }, + { + "epoch": 0.16103944375537843, + "grad_norm": 700.6025390625, + "learning_rate": 3.957104301887916e-05, + "loss": 79.6347, + "step": 39860 + }, + { + "epoch": 0.16107984502074604, + "grad_norm": 938.4268798828125, + "learning_rate": 3.957046756970132e-05, + "loss": 123.7255, + "step": 39870 + }, + { + "epoch": 0.16112024628611368, + "grad_norm": 510.0345458984375, + "learning_rate": 3.9569891738987136e-05, + "loss": 105.5226, + "step": 39880 + }, + { + "epoch": 0.16116064755148132, + "grad_norm": 622.7450561523438, + "learning_rate": 3.9569315526747843e-05, + "loss": 112.0368, + "step": 39890 + }, + { + "epoch": 0.16120104881684894, + "grad_norm": 590.03955078125, + "learning_rate": 3.9568738932994665e-05, + "loss": 118.1405, + "step": 39900 + }, + { + "epoch": 0.16124145008221658, + "grad_norm": 730.0455322265625, + "learning_rate": 3.9568161957738844e-05, + "loss": 138.5785, + "step": 39910 + }, + { + "epoch": 0.16128185134758422, + "grad_norm": 419.82708740234375, + "learning_rate": 3.9567584600991635e-05, + "loss": 106.7396, + "step": 39920 + }, + { + "epoch": 0.16132225261295183, + "grad_norm": 2164.44287109375, + "learning_rate": 3.9567006862764286e-05, + "loss": 171.0003, + "step": 39930 + }, + { + "epoch": 0.16136265387831947, + "grad_norm": 465.9510498046875, + "learning_rate": 3.9566428743068074e-05, + "loss": 95.2755, + "step": 39940 + }, + { + "epoch": 0.1614030551436871, + "grad_norm": 889.5313110351562, + "learning_rate": 3.9565850241914246e-05, + "loss": 147.8692, + "step": 39950 + }, + { + "epoch": 0.16144345640905472, + "grad_norm": 1541.0682373046875, + "learning_rate": 3.9565271359314107e-05, + "loss": 134.8627, + "step": 39960 + }, + { + "epoch": 0.16148385767442236, + "grad_norm": 1322.4500732421875, + "learning_rate": 3.9564692095278924e-05, + "loss": 182.163, + "step": 39970 + }, + { + "epoch": 0.16152425893979, + "grad_norm": 1148.746826171875, + "learning_rate": 3.956411244981999e-05, + "loss": 144.0579, + "step": 39980 + }, + { + "epoch": 0.1615646602051576, + "grad_norm": 519.0781860351562, + "learning_rate": 3.9563532422948625e-05, + "loss": 149.7329, + "step": 39990 + }, + { + "epoch": 0.16160506147052525, + "grad_norm": 934.0352783203125, + "learning_rate": 3.9562952014676116e-05, + "loss": 121.8173, + "step": 40000 + }, + { + "epoch": 0.1616454627358929, + "grad_norm": 468.0342102050781, + "learning_rate": 3.956237122501379e-05, + "loss": 95.349, + "step": 40010 + }, + { + "epoch": 0.16168586400126053, + "grad_norm": 885.7557983398438, + "learning_rate": 3.956179005397296e-05, + "loss": 115.4182, + "step": 40020 + }, + { + "epoch": 0.16172626526662814, + "grad_norm": 933.9559936523438, + "learning_rate": 3.956120850156496e-05, + "loss": 100.2219, + "step": 40030 + }, + { + "epoch": 0.16176666653199578, + "grad_norm": 815.7552490234375, + "learning_rate": 3.9560626567801136e-05, + "loss": 103.844, + "step": 40040 + }, + { + "epoch": 0.16180706779736342, + "grad_norm": 1302.731201171875, + "learning_rate": 3.9560044252692826e-05, + "loss": 142.5144, + "step": 40050 + }, + { + "epoch": 0.16184746906273104, + "grad_norm": 3717.3759765625, + "learning_rate": 3.955946155625138e-05, + "loss": 122.3537, + "step": 40060 + }, + { + "epoch": 0.16188787032809868, + "grad_norm": 803.7733764648438, + "learning_rate": 3.955887847848816e-05, + "loss": 119.7936, + "step": 40070 + }, + { + "epoch": 0.16192827159346632, + "grad_norm": 1676.2071533203125, + "learning_rate": 3.9558295019414534e-05, + "loss": 99.6754, + "step": 40080 + }, + { + "epoch": 0.16196867285883393, + "grad_norm": 924.6236572265625, + "learning_rate": 3.9557711179041887e-05, + "loss": 91.1093, + "step": 40090 + }, + { + "epoch": 0.16200907412420157, + "grad_norm": 1932.157470703125, + "learning_rate": 3.955712695738158e-05, + "loss": 105.0394, + "step": 40100 + }, + { + "epoch": 0.1620494753895692, + "grad_norm": 570.0369262695312, + "learning_rate": 3.955654235444502e-05, + "loss": 83.9832, + "step": 40110 + }, + { + "epoch": 0.16208987665493682, + "grad_norm": 559.774169921875, + "learning_rate": 3.95559573702436e-05, + "loss": 122.374, + "step": 40120 + }, + { + "epoch": 0.16213027792030446, + "grad_norm": 1708.4337158203125, + "learning_rate": 3.955537200478872e-05, + "loss": 128.1753, + "step": 40130 + }, + { + "epoch": 0.1621706791856721, + "grad_norm": 618.35400390625, + "learning_rate": 3.955478625809179e-05, + "loss": 179.6723, + "step": 40140 + }, + { + "epoch": 0.1622110804510397, + "grad_norm": 842.423828125, + "learning_rate": 3.955420013016424e-05, + "loss": 147.5065, + "step": 40150 + }, + { + "epoch": 0.16225148171640735, + "grad_norm": 1515.052978515625, + "learning_rate": 3.9553613621017495e-05, + "loss": 142.6038, + "step": 40160 + }, + { + "epoch": 0.162291882981775, + "grad_norm": 650.4257202148438, + "learning_rate": 3.955302673066298e-05, + "loss": 131.1292, + "step": 40170 + }, + { + "epoch": 0.16233228424714263, + "grad_norm": 667.0464477539062, + "learning_rate": 3.955243945911214e-05, + "loss": 108.1784, + "step": 40180 + }, + { + "epoch": 0.16237268551251025, + "grad_norm": 1059.4212646484375, + "learning_rate": 3.955185180637643e-05, + "loss": 132.2586, + "step": 40190 + }, + { + "epoch": 0.16241308677787789, + "grad_norm": 762.611572265625, + "learning_rate": 3.955126377246731e-05, + "loss": 137.3403, + "step": 40200 + }, + { + "epoch": 0.16245348804324553, + "grad_norm": 3228.942138671875, + "learning_rate": 3.955067535739623e-05, + "loss": 121.1844, + "step": 40210 + }, + { + "epoch": 0.16249388930861314, + "grad_norm": 461.04986572265625, + "learning_rate": 3.955008656117467e-05, + "loss": 106.7706, + "step": 40220 + }, + { + "epoch": 0.16253429057398078, + "grad_norm": 513.1512451171875, + "learning_rate": 3.9549497383814105e-05, + "loss": 82.3656, + "step": 40230 + }, + { + "epoch": 0.16257469183934842, + "grad_norm": 725.5595703125, + "learning_rate": 3.954890782532602e-05, + "loss": 105.488, + "step": 40240 + }, + { + "epoch": 0.16261509310471603, + "grad_norm": 715.2576904296875, + "learning_rate": 3.9548317885721925e-05, + "loss": 95.5932, + "step": 40250 + }, + { + "epoch": 0.16265549437008367, + "grad_norm": 422.13104248046875, + "learning_rate": 3.9547727565013295e-05, + "loss": 98.9668, + "step": 40260 + }, + { + "epoch": 0.1626958956354513, + "grad_norm": 833.56201171875, + "learning_rate": 3.954713686321166e-05, + "loss": 212.9737, + "step": 40270 + }, + { + "epoch": 0.16273629690081892, + "grad_norm": 564.8218994140625, + "learning_rate": 3.954654578032853e-05, + "loss": 170.1227, + "step": 40280 + }, + { + "epoch": 0.16277669816618656, + "grad_norm": 284.9054260253906, + "learning_rate": 3.954595431637542e-05, + "loss": 91.1573, + "step": 40290 + }, + { + "epoch": 0.1628170994315542, + "grad_norm": 903.050537109375, + "learning_rate": 3.954536247136387e-05, + "loss": 89.311, + "step": 40300 + }, + { + "epoch": 0.16285750069692181, + "grad_norm": 2041.6787109375, + "learning_rate": 3.954477024530542e-05, + "loss": 154.8774, + "step": 40310 + }, + { + "epoch": 0.16289790196228945, + "grad_norm": 1567.304931640625, + "learning_rate": 3.954417763821161e-05, + "loss": 114.0096, + "step": 40320 + }, + { + "epoch": 0.1629383032276571, + "grad_norm": 790.0082397460938, + "learning_rate": 3.9543584650093994e-05, + "loss": 175.5607, + "step": 40330 + }, + { + "epoch": 0.16297870449302473, + "grad_norm": 1118.681884765625, + "learning_rate": 3.954299128096413e-05, + "loss": 133.9488, + "step": 40340 + }, + { + "epoch": 0.16301910575839235, + "grad_norm": 796.1229248046875, + "learning_rate": 3.95423975308336e-05, + "loss": 80.8322, + "step": 40350 + }, + { + "epoch": 0.16305950702376, + "grad_norm": 597.4182739257812, + "learning_rate": 3.9541803399713956e-05, + "loss": 115.0141, + "step": 40360 + }, + { + "epoch": 0.16309990828912763, + "grad_norm": 1058.8887939453125, + "learning_rate": 3.9541208887616805e-05, + "loss": 97.3586, + "step": 40370 + }, + { + "epoch": 0.16314030955449524, + "grad_norm": 552.2665405273438, + "learning_rate": 3.954061399455372e-05, + "loss": 125.3299, + "step": 40380 + }, + { + "epoch": 0.16318071081986288, + "grad_norm": 429.4337158203125, + "learning_rate": 3.95400187205363e-05, + "loss": 77.1835, + "step": 40390 + }, + { + "epoch": 0.16322111208523052, + "grad_norm": 849.1442260742188, + "learning_rate": 3.9539423065576165e-05, + "loss": 165.0286, + "step": 40400 + }, + { + "epoch": 0.16326151335059813, + "grad_norm": 760.0634155273438, + "learning_rate": 3.9538827029684916e-05, + "loss": 71.2514, + "step": 40410 + }, + { + "epoch": 0.16330191461596577, + "grad_norm": 828.3287353515625, + "learning_rate": 3.9538230612874174e-05, + "loss": 139.8567, + "step": 40420 + }, + { + "epoch": 0.1633423158813334, + "grad_norm": 1147.2078857421875, + "learning_rate": 3.953763381515556e-05, + "loss": 130.123, + "step": 40430 + }, + { + "epoch": 0.16338271714670102, + "grad_norm": 968.625, + "learning_rate": 3.953703663654072e-05, + "loss": 136.5216, + "step": 40440 + }, + { + "epoch": 0.16342311841206866, + "grad_norm": 733.6450805664062, + "learning_rate": 3.95364390770413e-05, + "loss": 103.3633, + "step": 40450 + }, + { + "epoch": 0.1634635196774363, + "grad_norm": 858.1682739257812, + "learning_rate": 3.9535841136668936e-05, + "loss": 92.0788, + "step": 40460 + }, + { + "epoch": 0.16350392094280392, + "grad_norm": 1335.85888671875, + "learning_rate": 3.953524281543529e-05, + "loss": 104.7228, + "step": 40470 + }, + { + "epoch": 0.16354432220817156, + "grad_norm": 1130.6636962890625, + "learning_rate": 3.9534644113352036e-05, + "loss": 106.9868, + "step": 40480 + }, + { + "epoch": 0.1635847234735392, + "grad_norm": 833.35595703125, + "learning_rate": 3.953404503043083e-05, + "loss": 102.9079, + "step": 40490 + }, + { + "epoch": 0.16362512473890684, + "grad_norm": 1680.3211669921875, + "learning_rate": 3.9533445566683364e-05, + "loss": 171.5896, + "step": 40500 + }, + { + "epoch": 0.16366552600427445, + "grad_norm": 689.1195678710938, + "learning_rate": 3.9532845722121315e-05, + "loss": 55.7235, + "step": 40510 + }, + { + "epoch": 0.1637059272696421, + "grad_norm": 680.0838623046875, + "learning_rate": 3.953224549675638e-05, + "loss": 113.6331, + "step": 40520 + }, + { + "epoch": 0.16374632853500973, + "grad_norm": 586.0123901367188, + "learning_rate": 3.9531644890600276e-05, + "loss": 150.5315, + "step": 40530 + }, + { + "epoch": 0.16378672980037734, + "grad_norm": 581.7367553710938, + "learning_rate": 3.953104390366469e-05, + "loss": 86.762, + "step": 40540 + }, + { + "epoch": 0.16382713106574498, + "grad_norm": 850.872314453125, + "learning_rate": 3.953044253596135e-05, + "loss": 148.3216, + "step": 40550 + }, + { + "epoch": 0.16386753233111262, + "grad_norm": 495.7790832519531, + "learning_rate": 3.952984078750198e-05, + "loss": 141.7199, + "step": 40560 + }, + { + "epoch": 0.16390793359648023, + "grad_norm": 1054.132568359375, + "learning_rate": 3.9529238658298304e-05, + "loss": 115.1323, + "step": 40570 + }, + { + "epoch": 0.16394833486184787, + "grad_norm": 1156.91259765625, + "learning_rate": 3.952863614836207e-05, + "loss": 86.3228, + "step": 40580 + }, + { + "epoch": 0.1639887361272155, + "grad_norm": 5759.1015625, + "learning_rate": 3.952803325770501e-05, + "loss": 140.2169, + "step": 40590 + }, + { + "epoch": 0.16402913739258312, + "grad_norm": 1147.9371337890625, + "learning_rate": 3.95274299863389e-05, + "loss": 103.7103, + "step": 40600 + }, + { + "epoch": 0.16406953865795076, + "grad_norm": 484.32110595703125, + "learning_rate": 3.952682633427548e-05, + "loss": 109.6233, + "step": 40610 + }, + { + "epoch": 0.1641099399233184, + "grad_norm": 1567.8480224609375, + "learning_rate": 3.952622230152654e-05, + "loss": 134.9177, + "step": 40620 + }, + { + "epoch": 0.16415034118868602, + "grad_norm": 1466.4073486328125, + "learning_rate": 3.952561788810384e-05, + "loss": 138.8492, + "step": 40630 + }, + { + "epoch": 0.16419074245405366, + "grad_norm": 428.9383239746094, + "learning_rate": 3.952501309401916e-05, + "loss": 106.2617, + "step": 40640 + }, + { + "epoch": 0.1642311437194213, + "grad_norm": 1053.27294921875, + "learning_rate": 3.95244079192843e-05, + "loss": 165.314, + "step": 40650 + }, + { + "epoch": 0.16427154498478894, + "grad_norm": 946.93701171875, + "learning_rate": 3.952380236391106e-05, + "loss": 123.6535, + "step": 40660 + }, + { + "epoch": 0.16431194625015655, + "grad_norm": 988.1174926757812, + "learning_rate": 3.952319642791124e-05, + "loss": 78.0531, + "step": 40670 + }, + { + "epoch": 0.1643523475155242, + "grad_norm": 655.7237548828125, + "learning_rate": 3.9522590111296646e-05, + "loss": 114.9386, + "step": 40680 + }, + { + "epoch": 0.16439274878089183, + "grad_norm": 828.2750244140625, + "learning_rate": 3.952198341407911e-05, + "loss": 118.8359, + "step": 40690 + }, + { + "epoch": 0.16443315004625944, + "grad_norm": 878.3519897460938, + "learning_rate": 3.9521376336270466e-05, + "loss": 132.4717, + "step": 40700 + }, + { + "epoch": 0.16447355131162708, + "grad_norm": 4821.99462890625, + "learning_rate": 3.952076887788253e-05, + "loss": 143.6081, + "step": 40710 + }, + { + "epoch": 0.16451395257699472, + "grad_norm": 766.0470581054688, + "learning_rate": 3.952016103892716e-05, + "loss": 89.6328, + "step": 40720 + }, + { + "epoch": 0.16455435384236233, + "grad_norm": 525.7019653320312, + "learning_rate": 3.95195528194162e-05, + "loss": 143.0163, + "step": 40730 + }, + { + "epoch": 0.16459475510772997, + "grad_norm": 278.3527526855469, + "learning_rate": 3.951894421936151e-05, + "loss": 99.2914, + "step": 40740 + }, + { + "epoch": 0.1646351563730976, + "grad_norm": 1530.2376708984375, + "learning_rate": 3.951833523877495e-05, + "loss": 115.4705, + "step": 40750 + }, + { + "epoch": 0.16467555763846523, + "grad_norm": 773.5904541015625, + "learning_rate": 3.95177258776684e-05, + "loss": 97.1055, + "step": 40760 + }, + { + "epoch": 0.16471595890383287, + "grad_norm": 998.42138671875, + "learning_rate": 3.951711613605374e-05, + "loss": 82.2179, + "step": 40770 + }, + { + "epoch": 0.1647563601692005, + "grad_norm": 679.403076171875, + "learning_rate": 3.9516506013942836e-05, + "loss": 111.0334, + "step": 40780 + }, + { + "epoch": 0.16479676143456812, + "grad_norm": 805.0313720703125, + "learning_rate": 3.951589551134761e-05, + "loss": 130.0479, + "step": 40790 + }, + { + "epoch": 0.16483716269993576, + "grad_norm": 988.4754028320312, + "learning_rate": 3.9515284628279954e-05, + "loss": 65.4172, + "step": 40800 + }, + { + "epoch": 0.1648775639653034, + "grad_norm": 744.9801025390625, + "learning_rate": 3.9514673364751776e-05, + "loss": 138.3552, + "step": 40810 + }, + { + "epoch": 0.16491796523067104, + "grad_norm": 829.218505859375, + "learning_rate": 3.9514061720775e-05, + "loss": 149.9847, + "step": 40820 + }, + { + "epoch": 0.16495836649603865, + "grad_norm": 1154.8477783203125, + "learning_rate": 3.9513449696361535e-05, + "loss": 87.1555, + "step": 40830 + }, + { + "epoch": 0.1649987677614063, + "grad_norm": 1157.313232421875, + "learning_rate": 3.951283729152332e-05, + "loss": 95.1053, + "step": 40840 + }, + { + "epoch": 0.16503916902677393, + "grad_norm": 932.96435546875, + "learning_rate": 3.951222450627231e-05, + "loss": 76.9024, + "step": 40850 + }, + { + "epoch": 0.16507957029214154, + "grad_norm": 671.228515625, + "learning_rate": 3.951161134062042e-05, + "loss": 119.4201, + "step": 40860 + }, + { + "epoch": 0.16511997155750918, + "grad_norm": 1190.280517578125, + "learning_rate": 3.951099779457963e-05, + "loss": 83.5422, + "step": 40870 + }, + { + "epoch": 0.16516037282287682, + "grad_norm": 801.9124145507812, + "learning_rate": 3.95103838681619e-05, + "loss": 118.7613, + "step": 40880 + }, + { + "epoch": 0.16520077408824443, + "grad_norm": 891.947998046875, + "learning_rate": 3.9509769561379184e-05, + "loss": 169.8893, + "step": 40890 + }, + { + "epoch": 0.16524117535361207, + "grad_norm": 790.9534301757812, + "learning_rate": 3.9509154874243466e-05, + "loss": 75.8404, + "step": 40900 + }, + { + "epoch": 0.16528157661897971, + "grad_norm": 596.9677734375, + "learning_rate": 3.950853980676673e-05, + "loss": 141.7207, + "step": 40910 + }, + { + "epoch": 0.16532197788434733, + "grad_norm": 1046.884765625, + "learning_rate": 3.950792435896097e-05, + "loss": 138.6463, + "step": 40920 + }, + { + "epoch": 0.16536237914971497, + "grad_norm": 1965.2464599609375, + "learning_rate": 3.950730853083818e-05, + "loss": 188.2334, + "step": 40930 + }, + { + "epoch": 0.1654027804150826, + "grad_norm": 865.046142578125, + "learning_rate": 3.950669232241036e-05, + "loss": 172.2823, + "step": 40940 + }, + { + "epoch": 0.16544318168045022, + "grad_norm": 1367.5283203125, + "learning_rate": 3.950607573368954e-05, + "loss": 122.3632, + "step": 40950 + }, + { + "epoch": 0.16548358294581786, + "grad_norm": 1082.30810546875, + "learning_rate": 3.950545876468773e-05, + "loss": 115.009, + "step": 40960 + }, + { + "epoch": 0.1655239842111855, + "grad_norm": 1976.847900390625, + "learning_rate": 3.9504841415416955e-05, + "loss": 106.9823, + "step": 40970 + }, + { + "epoch": 0.16556438547655314, + "grad_norm": 1007.4364013671875, + "learning_rate": 3.950422368588926e-05, + "loss": 88.378, + "step": 40980 + }, + { + "epoch": 0.16560478674192075, + "grad_norm": 1910.2664794921875, + "learning_rate": 3.950360557611668e-05, + "loss": 105.6011, + "step": 40990 + }, + { + "epoch": 0.1656451880072884, + "grad_norm": 439.5602111816406, + "learning_rate": 3.950298708611127e-05, + "loss": 96.6951, + "step": 41000 + }, + { + "epoch": 0.16568558927265603, + "grad_norm": 878.0592651367188, + "learning_rate": 3.950236821588508e-05, + "loss": 98.5497, + "step": 41010 + }, + { + "epoch": 0.16572599053802364, + "grad_norm": 804.0274658203125, + "learning_rate": 3.9501748965450186e-05, + "loss": 94.8638, + "step": 41020 + }, + { + "epoch": 0.16576639180339128, + "grad_norm": 1002.6051025390625, + "learning_rate": 3.950112933481866e-05, + "loss": 100.6395, + "step": 41030 + }, + { + "epoch": 0.16580679306875892, + "grad_norm": 446.0777282714844, + "learning_rate": 3.950050932400257e-05, + "loss": 111.4529, + "step": 41040 + }, + { + "epoch": 0.16584719433412654, + "grad_norm": 723.0811157226562, + "learning_rate": 3.949988893301401e-05, + "loss": 150.1717, + "step": 41050 + }, + { + "epoch": 0.16588759559949418, + "grad_norm": 1703.6566162109375, + "learning_rate": 3.9499268161865085e-05, + "loss": 94.4115, + "step": 41060 + }, + { + "epoch": 0.16592799686486182, + "grad_norm": 517.4072265625, + "learning_rate": 3.949864701056788e-05, + "loss": 116.2724, + "step": 41070 + }, + { + "epoch": 0.16596839813022943, + "grad_norm": 639.8158569335938, + "learning_rate": 3.9498025479134516e-05, + "loss": 105.3175, + "step": 41080 + }, + { + "epoch": 0.16600879939559707, + "grad_norm": 583.4629516601562, + "learning_rate": 3.9497403567577114e-05, + "loss": 148.0952, + "step": 41090 + }, + { + "epoch": 0.1660492006609647, + "grad_norm": 479.5802001953125, + "learning_rate": 3.949678127590778e-05, + "loss": 97.9721, + "step": 41100 + }, + { + "epoch": 0.16608960192633232, + "grad_norm": 700.5800170898438, + "learning_rate": 3.949615860413866e-05, + "loss": 81.5057, + "step": 41110 + }, + { + "epoch": 0.16613000319169996, + "grad_norm": 768.9335327148438, + "learning_rate": 3.94955355522819e-05, + "loss": 195.5286, + "step": 41120 + }, + { + "epoch": 0.1661704044570676, + "grad_norm": 717.7261352539062, + "learning_rate": 3.9494912120349626e-05, + "loss": 70.028, + "step": 41130 + }, + { + "epoch": 0.16621080572243524, + "grad_norm": 511.00213623046875, + "learning_rate": 3.949428830835401e-05, + "loss": 75.638, + "step": 41140 + }, + { + "epoch": 0.16625120698780285, + "grad_norm": 533.3799438476562, + "learning_rate": 3.9493664116307204e-05, + "loss": 123.071, + "step": 41150 + }, + { + "epoch": 0.1662916082531705, + "grad_norm": 438.3563537597656, + "learning_rate": 3.9493039544221375e-05, + "loss": 120.464, + "step": 41160 + }, + { + "epoch": 0.16633200951853813, + "grad_norm": 658.2781372070312, + "learning_rate": 3.949241459210871e-05, + "loss": 81.1189, + "step": 41170 + }, + { + "epoch": 0.16637241078390574, + "grad_norm": 716.3282470703125, + "learning_rate": 3.949178925998139e-05, + "loss": 146.465, + "step": 41180 + }, + { + "epoch": 0.16641281204927338, + "grad_norm": 418.78228759765625, + "learning_rate": 3.9491163547851604e-05, + "loss": 152.23, + "step": 41190 + }, + { + "epoch": 0.16645321331464102, + "grad_norm": 2858.0849609375, + "learning_rate": 3.949053745573155e-05, + "loss": 151.1828, + "step": 41200 + }, + { + "epoch": 0.16649361458000864, + "grad_norm": 823.4974365234375, + "learning_rate": 3.9489910983633426e-05, + "loss": 142.1795, + "step": 41210 + }, + { + "epoch": 0.16653401584537628, + "grad_norm": 511.9835205078125, + "learning_rate": 3.9489284131569456e-05, + "loss": 111.6528, + "step": 41220 + }, + { + "epoch": 0.16657441711074392, + "grad_norm": 563.0213623046875, + "learning_rate": 3.948865689955186e-05, + "loss": 96.6502, + "step": 41230 + }, + { + "epoch": 0.16661481837611153, + "grad_norm": 1379.977783203125, + "learning_rate": 3.948802928759287e-05, + "loss": 106.4517, + "step": 41240 + }, + { + "epoch": 0.16665521964147917, + "grad_norm": 1753.0281982421875, + "learning_rate": 3.948740129570471e-05, + "loss": 97.4072, + "step": 41250 + }, + { + "epoch": 0.1666956209068468, + "grad_norm": 660.6708984375, + "learning_rate": 3.948677292389963e-05, + "loss": 93.4757, + "step": 41260 + }, + { + "epoch": 0.16673602217221442, + "grad_norm": 480.96356201171875, + "learning_rate": 3.948614417218988e-05, + "loss": 104.2728, + "step": 41270 + }, + { + "epoch": 0.16677642343758206, + "grad_norm": 2432.844482421875, + "learning_rate": 3.948551504058771e-05, + "loss": 267.0415, + "step": 41280 + }, + { + "epoch": 0.1668168247029497, + "grad_norm": 560.226318359375, + "learning_rate": 3.94848855291054e-05, + "loss": 114.4159, + "step": 41290 + }, + { + "epoch": 0.16685722596831734, + "grad_norm": 889.0921630859375, + "learning_rate": 3.948425563775521e-05, + "loss": 143.6283, + "step": 41300 + }, + { + "epoch": 0.16689762723368495, + "grad_norm": 848.4343872070312, + "learning_rate": 3.948362536654943e-05, + "loss": 101.7539, + "step": 41310 + }, + { + "epoch": 0.1669380284990526, + "grad_norm": 717.8780517578125, + "learning_rate": 3.948299471550034e-05, + "loss": 104.8227, + "step": 41320 + }, + { + "epoch": 0.16697842976442023, + "grad_norm": 1096.0289306640625, + "learning_rate": 3.9482363684620247e-05, + "loss": 146.6659, + "step": 41330 + }, + { + "epoch": 0.16701883102978785, + "grad_norm": 1031.4234619140625, + "learning_rate": 3.9481732273921435e-05, + "loss": 137.1286, + "step": 41340 + }, + { + "epoch": 0.16705923229515549, + "grad_norm": 1168.3775634765625, + "learning_rate": 3.948110048341622e-05, + "loss": 119.015, + "step": 41350 + }, + { + "epoch": 0.16709963356052313, + "grad_norm": 1371.406982421875, + "learning_rate": 3.9480468313116925e-05, + "loss": 155.8904, + "step": 41360 + }, + { + "epoch": 0.16714003482589074, + "grad_norm": 626.3816528320312, + "learning_rate": 3.947983576303587e-05, + "loss": 92.0411, + "step": 41370 + }, + { + "epoch": 0.16718043609125838, + "grad_norm": 861.3816528320312, + "learning_rate": 3.947920283318539e-05, + "loss": 97.724, + "step": 41380 + }, + { + "epoch": 0.16722083735662602, + "grad_norm": 1282.038330078125, + "learning_rate": 3.947856952357782e-05, + "loss": 142.2227, + "step": 41390 + }, + { + "epoch": 0.16726123862199363, + "grad_norm": 516.6495971679688, + "learning_rate": 3.9477935834225503e-05, + "loss": 117.7902, + "step": 41400 + }, + { + "epoch": 0.16730163988736127, + "grad_norm": 890.553466796875, + "learning_rate": 3.947730176514081e-05, + "loss": 108.9449, + "step": 41410 + }, + { + "epoch": 0.1673420411527289, + "grad_norm": 558.4175415039062, + "learning_rate": 3.947666731633609e-05, + "loss": 111.3973, + "step": 41420 + }, + { + "epoch": 0.16738244241809652, + "grad_norm": 640.7515869140625, + "learning_rate": 3.947603248782371e-05, + "loss": 110.9352, + "step": 41430 + }, + { + "epoch": 0.16742284368346416, + "grad_norm": 1068.6756591796875, + "learning_rate": 3.947539727961605e-05, + "loss": 120.4783, + "step": 41440 + }, + { + "epoch": 0.1674632449488318, + "grad_norm": 1086.560302734375, + "learning_rate": 3.947476169172549e-05, + "loss": 117.4876, + "step": 41450 + }, + { + "epoch": 0.16750364621419944, + "grad_norm": 651.2415161132812, + "learning_rate": 3.947412572416443e-05, + "loss": 82.3842, + "step": 41460 + }, + { + "epoch": 0.16754404747956705, + "grad_norm": 4211.45068359375, + "learning_rate": 3.947348937694526e-05, + "loss": 140.8529, + "step": 41470 + }, + { + "epoch": 0.1675844487449347, + "grad_norm": 2848.469482421875, + "learning_rate": 3.947285265008039e-05, + "loss": 125.1804, + "step": 41480 + }, + { + "epoch": 0.16762485001030233, + "grad_norm": 655.3458251953125, + "learning_rate": 3.9472215543582234e-05, + "loss": 86.31, + "step": 41490 + }, + { + "epoch": 0.16766525127566995, + "grad_norm": 957.9923095703125, + "learning_rate": 3.9471578057463206e-05, + "loss": 106.9451, + "step": 41500 + }, + { + "epoch": 0.1677056525410376, + "grad_norm": 1261.2860107421875, + "learning_rate": 3.9470940191735745e-05, + "loss": 86.2562, + "step": 41510 + }, + { + "epoch": 0.16774605380640523, + "grad_norm": 1045.08154296875, + "learning_rate": 3.947030194641228e-05, + "loss": 102.7567, + "step": 41520 + }, + { + "epoch": 0.16778645507177284, + "grad_norm": 985.9114990234375, + "learning_rate": 3.946966332150525e-05, + "loss": 135.3336, + "step": 41530 + }, + { + "epoch": 0.16782685633714048, + "grad_norm": 1004.5711669921875, + "learning_rate": 3.9469024317027115e-05, + "loss": 102.1287, + "step": 41540 + }, + { + "epoch": 0.16786725760250812, + "grad_norm": 472.5003356933594, + "learning_rate": 3.9468384932990324e-05, + "loss": 122.8802, + "step": 41550 + }, + { + "epoch": 0.16790765886787573, + "grad_norm": 1034.25537109375, + "learning_rate": 3.9467745169407346e-05, + "loss": 130.1389, + "step": 41560 + }, + { + "epoch": 0.16794806013324337, + "grad_norm": 1978.5595703125, + "learning_rate": 3.946710502629065e-05, + "loss": 142.1907, + "step": 41570 + }, + { + "epoch": 0.167988461398611, + "grad_norm": 995.4104614257812, + "learning_rate": 3.946646450365273e-05, + "loss": 132.5986, + "step": 41580 + }, + { + "epoch": 0.16802886266397862, + "grad_norm": 1032.72412109375, + "learning_rate": 3.9465823601506055e-05, + "loss": 109.7837, + "step": 41590 + }, + { + "epoch": 0.16806926392934626, + "grad_norm": 1005.884765625, + "learning_rate": 3.946518231986313e-05, + "loss": 99.3994, + "step": 41600 + }, + { + "epoch": 0.1681096651947139, + "grad_norm": 722.04443359375, + "learning_rate": 3.946454065873645e-05, + "loss": 139.0672, + "step": 41610 + }, + { + "epoch": 0.16815006646008154, + "grad_norm": 825.5150146484375, + "learning_rate": 3.946389861813854e-05, + "loss": 106.9603, + "step": 41620 + }, + { + "epoch": 0.16819046772544916, + "grad_norm": 358.41937255859375, + "learning_rate": 3.946325619808189e-05, + "loss": 121.2124, + "step": 41630 + }, + { + "epoch": 0.1682308689908168, + "grad_norm": 640.3995971679688, + "learning_rate": 3.9462613398579044e-05, + "loss": 92.8995, + "step": 41640 + }, + { + "epoch": 0.16827127025618444, + "grad_norm": 463.2493896484375, + "learning_rate": 3.9461970219642535e-05, + "loss": 123.4215, + "step": 41650 + }, + { + "epoch": 0.16831167152155205, + "grad_norm": 332.972412109375, + "learning_rate": 3.946132666128489e-05, + "loss": 100.7317, + "step": 41660 + }, + { + "epoch": 0.1683520727869197, + "grad_norm": 864.1085815429688, + "learning_rate": 3.946068272351867e-05, + "loss": 144.739, + "step": 41670 + }, + { + "epoch": 0.16839247405228733, + "grad_norm": 901.1964721679688, + "learning_rate": 3.946003840635642e-05, + "loss": 131.7153, + "step": 41680 + }, + { + "epoch": 0.16843287531765494, + "grad_norm": 1305.6922607421875, + "learning_rate": 3.94593937098107e-05, + "loss": 113.1183, + "step": 41690 + }, + { + "epoch": 0.16847327658302258, + "grad_norm": 660.06689453125, + "learning_rate": 3.945874863389408e-05, + "loss": 88.4348, + "step": 41700 + }, + { + "epoch": 0.16851367784839022, + "grad_norm": 638.3485107421875, + "learning_rate": 3.9458103178619146e-05, + "loss": 116.1925, + "step": 41710 + }, + { + "epoch": 0.16855407911375783, + "grad_norm": 577.2969360351562, + "learning_rate": 3.945745734399846e-05, + "loss": 149.497, + "step": 41720 + }, + { + "epoch": 0.16859448037912547, + "grad_norm": 785.0895385742188, + "learning_rate": 3.945681113004463e-05, + "loss": 157.5109, + "step": 41730 + }, + { + "epoch": 0.1686348816444931, + "grad_norm": 297.44744873046875, + "learning_rate": 3.945616453677025e-05, + "loss": 82.8016, + "step": 41740 + }, + { + "epoch": 0.16867528290986072, + "grad_norm": 1657.5028076171875, + "learning_rate": 3.945551756418794e-05, + "loss": 131.907, + "step": 41750 + }, + { + "epoch": 0.16871568417522836, + "grad_norm": 821.028076171875, + "learning_rate": 3.945487021231028e-05, + "loss": 111.2273, + "step": 41760 + }, + { + "epoch": 0.168756085440596, + "grad_norm": 862.4304809570312, + "learning_rate": 3.9454222481149916e-05, + "loss": 90.5899, + "step": 41770 + }, + { + "epoch": 0.16879648670596364, + "grad_norm": 781.4091186523438, + "learning_rate": 3.945357437071947e-05, + "loss": 112.6845, + "step": 41780 + }, + { + "epoch": 0.16883688797133126, + "grad_norm": 2174.80517578125, + "learning_rate": 3.9452925881031574e-05, + "loss": 90.2783, + "step": 41790 + }, + { + "epoch": 0.1688772892366989, + "grad_norm": 539.6593017578125, + "learning_rate": 3.9452277012098875e-05, + "loss": 108.9328, + "step": 41800 + }, + { + "epoch": 0.16891769050206654, + "grad_norm": 866.9989013671875, + "learning_rate": 3.945162776393402e-05, + "loss": 107.8307, + "step": 41810 + }, + { + "epoch": 0.16895809176743415, + "grad_norm": 776.9735717773438, + "learning_rate": 3.9450978136549665e-05, + "loss": 78.6728, + "step": 41820 + }, + { + "epoch": 0.1689984930328018, + "grad_norm": 561.6749877929688, + "learning_rate": 3.9450328129958484e-05, + "loss": 111.9968, + "step": 41830 + }, + { + "epoch": 0.16903889429816943, + "grad_norm": 592.6694946289062, + "learning_rate": 3.9449677744173135e-05, + "loss": 112.9213, + "step": 41840 + }, + { + "epoch": 0.16907929556353704, + "grad_norm": 473.5004577636719, + "learning_rate": 3.9449026979206305e-05, + "loss": 95.2653, + "step": 41850 + }, + { + "epoch": 0.16911969682890468, + "grad_norm": 6222.8759765625, + "learning_rate": 3.9448375835070685e-05, + "loss": 147.4869, + "step": 41860 + }, + { + "epoch": 0.16916009809427232, + "grad_norm": 995.9276123046875, + "learning_rate": 3.944772431177896e-05, + "loss": 105.4912, + "step": 41870 + }, + { + "epoch": 0.16920049935963993, + "grad_norm": 924.9529418945312, + "learning_rate": 3.9447072409343844e-05, + "loss": 107.817, + "step": 41880 + }, + { + "epoch": 0.16924090062500757, + "grad_norm": 679.0335083007812, + "learning_rate": 3.944642012777804e-05, + "loss": 108.5123, + "step": 41890 + }, + { + "epoch": 0.1692813018903752, + "grad_norm": 934.6605834960938, + "learning_rate": 3.9445767467094256e-05, + "loss": 98.4993, + "step": 41900 + }, + { + "epoch": 0.16932170315574283, + "grad_norm": 1562.09619140625, + "learning_rate": 3.944511442730523e-05, + "loss": 167.9836, + "step": 41910 + }, + { + "epoch": 0.16936210442111047, + "grad_norm": 554.9857788085938, + "learning_rate": 3.9444461008423687e-05, + "loss": 76.6755, + "step": 41920 + }, + { + "epoch": 0.1694025056864781, + "grad_norm": 871.3563232421875, + "learning_rate": 3.944380721046236e-05, + "loss": 104.406, + "step": 41930 + }, + { + "epoch": 0.16944290695184575, + "grad_norm": 792.3182373046875, + "learning_rate": 3.944315303343401e-05, + "loss": 94.9315, + "step": 41940 + }, + { + "epoch": 0.16948330821721336, + "grad_norm": 569.3258056640625, + "learning_rate": 3.9442498477351376e-05, + "loss": 136.9381, + "step": 41950 + }, + { + "epoch": 0.169523709482581, + "grad_norm": 354.1698303222656, + "learning_rate": 3.944184354222722e-05, + "loss": 92.9349, + "step": 41960 + }, + { + "epoch": 0.16956411074794864, + "grad_norm": 2419.380615234375, + "learning_rate": 3.9441188228074326e-05, + "loss": 136.1749, + "step": 41970 + }, + { + "epoch": 0.16960451201331625, + "grad_norm": 697.9307861328125, + "learning_rate": 3.944053253490546e-05, + "loss": 135.0591, + "step": 41980 + }, + { + "epoch": 0.1696449132786839, + "grad_norm": 855.5518188476562, + "learning_rate": 3.943987646273339e-05, + "loss": 158.1706, + "step": 41990 + }, + { + "epoch": 0.16968531454405153, + "grad_norm": 417.7906799316406, + "learning_rate": 3.943922001157093e-05, + "loss": 82.8401, + "step": 42000 + }, + { + "epoch": 0.16972571580941914, + "grad_norm": 625.1957397460938, + "learning_rate": 3.9438563181430863e-05, + "loss": 84.6596, + "step": 42010 + }, + { + "epoch": 0.16976611707478678, + "grad_norm": 487.8559875488281, + "learning_rate": 3.9437905972326e-05, + "loss": 103.0718, + "step": 42020 + }, + { + "epoch": 0.16980651834015442, + "grad_norm": 1154.2357177734375, + "learning_rate": 3.9437248384269155e-05, + "loss": 91.8372, + "step": 42030 + }, + { + "epoch": 0.16984691960552203, + "grad_norm": 3242.97802734375, + "learning_rate": 3.943659041727314e-05, + "loss": 95.5124, + "step": 42040 + }, + { + "epoch": 0.16988732087088967, + "grad_norm": 588.6522827148438, + "learning_rate": 3.94359320713508e-05, + "loss": 97.7354, + "step": 42050 + }, + { + "epoch": 0.16992772213625731, + "grad_norm": 576.7422485351562, + "learning_rate": 3.943527334651495e-05, + "loss": 81.0645, + "step": 42060 + }, + { + "epoch": 0.16996812340162493, + "grad_norm": 849.04296875, + "learning_rate": 3.9434614242778435e-05, + "loss": 108.8678, + "step": 42070 + }, + { + "epoch": 0.17000852466699257, + "grad_norm": 875.044921875, + "learning_rate": 3.9433954760154116e-05, + "loss": 124.9416, + "step": 42080 + }, + { + "epoch": 0.1700489259323602, + "grad_norm": 1066.2618408203125, + "learning_rate": 3.9433294898654846e-05, + "loss": 114.1114, + "step": 42090 + }, + { + "epoch": 0.17008932719772785, + "grad_norm": 902.5121459960938, + "learning_rate": 3.943263465829348e-05, + "loss": 87.5394, + "step": 42100 + }, + { + "epoch": 0.17012972846309546, + "grad_norm": 601.8876953125, + "learning_rate": 3.94319740390829e-05, + "loss": 67.5315, + "step": 42110 + }, + { + "epoch": 0.1701701297284631, + "grad_norm": 962.333984375, + "learning_rate": 3.943131304103599e-05, + "loss": 111.7517, + "step": 42120 + }, + { + "epoch": 0.17021053099383074, + "grad_norm": 1373.868896484375, + "learning_rate": 3.9430651664165616e-05, + "loss": 158.8356, + "step": 42130 + }, + { + "epoch": 0.17025093225919835, + "grad_norm": 434.61395263671875, + "learning_rate": 3.942998990848469e-05, + "loss": 101.412, + "step": 42140 + }, + { + "epoch": 0.170291333524566, + "grad_norm": 776.0209350585938, + "learning_rate": 3.942932777400611e-05, + "loss": 95.1552, + "step": 42150 + }, + { + "epoch": 0.17033173478993363, + "grad_norm": 744.5134887695312, + "learning_rate": 3.942866526074277e-05, + "loss": 88.9911, + "step": 42160 + }, + { + "epoch": 0.17037213605530124, + "grad_norm": 1249.5181884765625, + "learning_rate": 3.942800236870761e-05, + "loss": 114.6759, + "step": 42170 + }, + { + "epoch": 0.17041253732066888, + "grad_norm": 950.3778076171875, + "learning_rate": 3.942733909791354e-05, + "loss": 105.9882, + "step": 42180 + }, + { + "epoch": 0.17045293858603652, + "grad_norm": 672.03759765625, + "learning_rate": 3.942667544837349e-05, + "loss": 172.628, + "step": 42190 + }, + { + "epoch": 0.17049333985140414, + "grad_norm": 641.8781127929688, + "learning_rate": 3.9426011420100405e-05, + "loss": 149.9698, + "step": 42200 + }, + { + "epoch": 0.17053374111677178, + "grad_norm": 924.5191040039062, + "learning_rate": 3.942534701310722e-05, + "loss": 136.1438, + "step": 42210 + }, + { + "epoch": 0.17057414238213942, + "grad_norm": 935.656494140625, + "learning_rate": 3.94246822274069e-05, + "loss": 140.1681, + "step": 42220 + }, + { + "epoch": 0.17061454364750703, + "grad_norm": 1792.654541015625, + "learning_rate": 3.9424017063012394e-05, + "loss": 91.5103, + "step": 42230 + }, + { + "epoch": 0.17065494491287467, + "grad_norm": 932.7950439453125, + "learning_rate": 3.942335151993668e-05, + "loss": 131.6863, + "step": 42240 + }, + { + "epoch": 0.1706953461782423, + "grad_norm": 971.8015747070312, + "learning_rate": 3.942268559819272e-05, + "loss": 114.6801, + "step": 42250 + }, + { + "epoch": 0.17073574744360995, + "grad_norm": 1403.396728515625, + "learning_rate": 3.9422019297793516e-05, + "loss": 134.4927, + "step": 42260 + }, + { + "epoch": 0.17077614870897756, + "grad_norm": 836.7633056640625, + "learning_rate": 3.942135261875204e-05, + "loss": 148.6156, + "step": 42270 + }, + { + "epoch": 0.1708165499743452, + "grad_norm": 621.222412109375, + "learning_rate": 3.94206855610813e-05, + "loss": 110.3543, + "step": 42280 + }, + { + "epoch": 0.17085695123971284, + "grad_norm": 1044.2490234375, + "learning_rate": 3.9420018124794294e-05, + "loss": 124.8935, + "step": 42290 + }, + { + "epoch": 0.17089735250508045, + "grad_norm": 1415.83154296875, + "learning_rate": 3.941935030990403e-05, + "loss": 152.093, + "step": 42300 + }, + { + "epoch": 0.1709377537704481, + "grad_norm": 667.5238647460938, + "learning_rate": 3.941868211642355e-05, + "loss": 96.6363, + "step": 42310 + }, + { + "epoch": 0.17097815503581573, + "grad_norm": 640.1421508789062, + "learning_rate": 3.941801354436585e-05, + "loss": 107.8416, + "step": 42320 + }, + { + "epoch": 0.17101855630118334, + "grad_norm": 721.0938720703125, + "learning_rate": 3.941734459374399e-05, + "loss": 105.7388, + "step": 42330 + }, + { + "epoch": 0.17105895756655098, + "grad_norm": 488.30548095703125, + "learning_rate": 3.941667526457099e-05, + "loss": 98.8257, + "step": 42340 + }, + { + "epoch": 0.17109935883191862, + "grad_norm": 1547.44580078125, + "learning_rate": 3.9416005556859914e-05, + "loss": 109.376, + "step": 42350 + }, + { + "epoch": 0.17113976009728624, + "grad_norm": 597.3015747070312, + "learning_rate": 3.9415335470623816e-05, + "loss": 125.9846, + "step": 42360 + }, + { + "epoch": 0.17118016136265388, + "grad_norm": 1727.8135986328125, + "learning_rate": 3.941466500587575e-05, + "loss": 152.7063, + "step": 42370 + }, + { + "epoch": 0.17122056262802152, + "grad_norm": 448.71038818359375, + "learning_rate": 3.9413994162628804e-05, + "loss": 147.5613, + "step": 42380 + }, + { + "epoch": 0.17126096389338913, + "grad_norm": 1016.4852905273438, + "learning_rate": 3.941332294089604e-05, + "loss": 88.7957, + "step": 42390 + }, + { + "epoch": 0.17130136515875677, + "grad_norm": 567.3622436523438, + "learning_rate": 3.941265134069055e-05, + "loss": 120.7791, + "step": 42400 + }, + { + "epoch": 0.1713417664241244, + "grad_norm": 852.2799072265625, + "learning_rate": 3.941197936202543e-05, + "loss": 127.5319, + "step": 42410 + }, + { + "epoch": 0.17138216768949205, + "grad_norm": 1023.0543212890625, + "learning_rate": 3.941130700491378e-05, + "loss": 112.3733, + "step": 42420 + }, + { + "epoch": 0.17142256895485966, + "grad_norm": 1055.6456298828125, + "learning_rate": 3.9410634269368706e-05, + "loss": 115.818, + "step": 42430 + }, + { + "epoch": 0.1714629702202273, + "grad_norm": 514.7354736328125, + "learning_rate": 3.940996115540332e-05, + "loss": 114.1943, + "step": 42440 + }, + { + "epoch": 0.17150337148559494, + "grad_norm": 979.1553344726562, + "learning_rate": 3.9409287663030754e-05, + "loss": 118.4565, + "step": 42450 + }, + { + "epoch": 0.17154377275096255, + "grad_norm": 879.0123901367188, + "learning_rate": 3.9408613792264125e-05, + "loss": 139.759, + "step": 42460 + }, + { + "epoch": 0.1715841740163302, + "grad_norm": 2067.239990234375, + "learning_rate": 3.940793954311659e-05, + "loss": 125.3303, + "step": 42470 + }, + { + "epoch": 0.17162457528169783, + "grad_norm": 932.2379150390625, + "learning_rate": 3.940726491560127e-05, + "loss": 103.2024, + "step": 42480 + }, + { + "epoch": 0.17166497654706545, + "grad_norm": 361.9033508300781, + "learning_rate": 3.9406589909731335e-05, + "loss": 63.9916, + "step": 42490 + }, + { + "epoch": 0.17170537781243309, + "grad_norm": 1383.032958984375, + "learning_rate": 3.940591452551993e-05, + "loss": 125.238, + "step": 42500 + }, + { + "epoch": 0.17174577907780073, + "grad_norm": 463.8815002441406, + "learning_rate": 3.940523876298024e-05, + "loss": 137.1448, + "step": 42510 + }, + { + "epoch": 0.17178618034316834, + "grad_norm": 943.1512451171875, + "learning_rate": 3.940456262212543e-05, + "loss": 125.5852, + "step": 42520 + }, + { + "epoch": 0.17182658160853598, + "grad_norm": 837.7481689453125, + "learning_rate": 3.940388610296868e-05, + "loss": 92.0099, + "step": 42530 + }, + { + "epoch": 0.17186698287390362, + "grad_norm": 785.1520385742188, + "learning_rate": 3.9403209205523173e-05, + "loss": 94.0657, + "step": 42540 + }, + { + "epoch": 0.17190738413927123, + "grad_norm": 612.3370361328125, + "learning_rate": 3.940253192980212e-05, + "loss": 118.8491, + "step": 42550 + }, + { + "epoch": 0.17194778540463887, + "grad_norm": 617.0350341796875, + "learning_rate": 3.9401854275818716e-05, + "loss": 142.3785, + "step": 42560 + }, + { + "epoch": 0.1719881866700065, + "grad_norm": 1035.0308837890625, + "learning_rate": 3.9401176243586177e-05, + "loss": 113.9025, + "step": 42570 + }, + { + "epoch": 0.17202858793537412, + "grad_norm": 3864.7373046875, + "learning_rate": 3.9400497833117716e-05, + "loss": 146.3651, + "step": 42580 + }, + { + "epoch": 0.17206898920074176, + "grad_norm": 598.6505126953125, + "learning_rate": 3.939981904442657e-05, + "loss": 96.4063, + "step": 42590 + }, + { + "epoch": 0.1721093904661094, + "grad_norm": 997.36572265625, + "learning_rate": 3.939913987752595e-05, + "loss": 110.8288, + "step": 42600 + }, + { + "epoch": 0.17214979173147704, + "grad_norm": 1018.92822265625, + "learning_rate": 3.9398460332429115e-05, + "loss": 118.7646, + "step": 42610 + }, + { + "epoch": 0.17219019299684465, + "grad_norm": 517.6384887695312, + "learning_rate": 3.9397780409149314e-05, + "loss": 70.0042, + "step": 42620 + }, + { + "epoch": 0.1722305942622123, + "grad_norm": 1427.200439453125, + "learning_rate": 3.9397100107699795e-05, + "loss": 125.8943, + "step": 42630 + }, + { + "epoch": 0.17227099552757993, + "grad_norm": 589.7958984375, + "learning_rate": 3.939641942809382e-05, + "loss": 119.084, + "step": 42640 + }, + { + "epoch": 0.17231139679294755, + "grad_norm": 558.9051513671875, + "learning_rate": 3.939573837034466e-05, + "loss": 129.0951, + "step": 42650 + }, + { + "epoch": 0.1723517980583152, + "grad_norm": 1427.28076171875, + "learning_rate": 3.9395056934465604e-05, + "loss": 130.8611, + "step": 42660 + }, + { + "epoch": 0.17239219932368283, + "grad_norm": 590.2985229492188, + "learning_rate": 3.939437512046993e-05, + "loss": 109.1903, + "step": 42670 + }, + { + "epoch": 0.17243260058905044, + "grad_norm": 889.4963989257812, + "learning_rate": 3.939369292837092e-05, + "loss": 105.8503, + "step": 42680 + }, + { + "epoch": 0.17247300185441808, + "grad_norm": 1047.5587158203125, + "learning_rate": 3.939301035818188e-05, + "loss": 109.6355, + "step": 42690 + }, + { + "epoch": 0.17251340311978572, + "grad_norm": 1273.1549072265625, + "learning_rate": 3.939232740991612e-05, + "loss": 132.9647, + "step": 42700 + }, + { + "epoch": 0.17255380438515333, + "grad_norm": 1242.1671142578125, + "learning_rate": 3.939164408358696e-05, + "loss": 147.3217, + "step": 42710 + }, + { + "epoch": 0.17259420565052097, + "grad_norm": 670.1947631835938, + "learning_rate": 3.939096037920771e-05, + "loss": 105.9943, + "step": 42720 + }, + { + "epoch": 0.1726346069158886, + "grad_norm": 900.7072143554688, + "learning_rate": 3.939027629679171e-05, + "loss": 136.0396, + "step": 42730 + }, + { + "epoch": 0.17267500818125622, + "grad_norm": 8994.9296875, + "learning_rate": 3.938959183635228e-05, + "loss": 142.5161, + "step": 42740 + }, + { + "epoch": 0.17271540944662386, + "grad_norm": 933.7422485351562, + "learning_rate": 3.9388906997902784e-05, + "loss": 114.8915, + "step": 42750 + }, + { + "epoch": 0.1727558107119915, + "grad_norm": 1077.122314453125, + "learning_rate": 3.938822178145656e-05, + "loss": 139.1018, + "step": 42760 + }, + { + "epoch": 0.17279621197735914, + "grad_norm": 1194.261962890625, + "learning_rate": 3.938753618702697e-05, + "loss": 139.0951, + "step": 42770 + }, + { + "epoch": 0.17283661324272676, + "grad_norm": 617.9090576171875, + "learning_rate": 3.938685021462738e-05, + "loss": 93.7339, + "step": 42780 + }, + { + "epoch": 0.1728770145080944, + "grad_norm": 437.38006591796875, + "learning_rate": 3.938616386427117e-05, + "loss": 133.6026, + "step": 42790 + }, + { + "epoch": 0.17291741577346204, + "grad_norm": 470.6004333496094, + "learning_rate": 3.938547713597171e-05, + "loss": 90.3386, + "step": 42800 + }, + { + "epoch": 0.17295781703882965, + "grad_norm": 734.4319458007812, + "learning_rate": 3.938479002974239e-05, + "loss": 99.7942, + "step": 42810 + }, + { + "epoch": 0.1729982183041973, + "grad_norm": 2805.66943359375, + "learning_rate": 3.938410254559661e-05, + "loss": 163.4562, + "step": 42820 + }, + { + "epoch": 0.17303861956956493, + "grad_norm": 484.2641296386719, + "learning_rate": 3.938341468354778e-05, + "loss": 104.5835, + "step": 42830 + }, + { + "epoch": 0.17307902083493254, + "grad_norm": 1167.4285888671875, + "learning_rate": 3.938272644360929e-05, + "loss": 184.0451, + "step": 42840 + }, + { + "epoch": 0.17311942210030018, + "grad_norm": 468.2950134277344, + "learning_rate": 3.938203782579457e-05, + "loss": 99.6706, + "step": 42850 + }, + { + "epoch": 0.17315982336566782, + "grad_norm": 423.25048828125, + "learning_rate": 3.9381348830117055e-05, + "loss": 132.6958, + "step": 42860 + }, + { + "epoch": 0.17320022463103543, + "grad_norm": 1089.146484375, + "learning_rate": 3.938065945659016e-05, + "loss": 108.9709, + "step": 42870 + }, + { + "epoch": 0.17324062589640307, + "grad_norm": 785.1051025390625, + "learning_rate": 3.937996970522733e-05, + "loss": 107.2216, + "step": 42880 + }, + { + "epoch": 0.1732810271617707, + "grad_norm": 549.4488525390625, + "learning_rate": 3.937927957604201e-05, + "loss": 72.6161, + "step": 42890 + }, + { + "epoch": 0.17332142842713832, + "grad_norm": 598.3110961914062, + "learning_rate": 3.937858906904766e-05, + "loss": 52.1551, + "step": 42900 + }, + { + "epoch": 0.17336182969250596, + "grad_norm": 1018.7184448242188, + "learning_rate": 3.937789818425774e-05, + "loss": 169.5503, + "step": 42910 + }, + { + "epoch": 0.1734022309578736, + "grad_norm": 750.703857421875, + "learning_rate": 3.9377206921685725e-05, + "loss": 121.1408, + "step": 42920 + }, + { + "epoch": 0.17344263222324124, + "grad_norm": 626.6898193359375, + "learning_rate": 3.9376515281345076e-05, + "loss": 127.6009, + "step": 42930 + }, + { + "epoch": 0.17348303348860886, + "grad_norm": 593.05615234375, + "learning_rate": 3.937582326324929e-05, + "loss": 105.4111, + "step": 42940 + }, + { + "epoch": 0.1735234347539765, + "grad_norm": 1509.237548828125, + "learning_rate": 3.937513086741185e-05, + "loss": 133.1203, + "step": 42950 + }, + { + "epoch": 0.17356383601934414, + "grad_norm": 974.6829223632812, + "learning_rate": 3.937443809384626e-05, + "loss": 114.0419, + "step": 42960 + }, + { + "epoch": 0.17360423728471175, + "grad_norm": 289.76513671875, + "learning_rate": 3.9373744942566025e-05, + "loss": 80.0766, + "step": 42970 + }, + { + "epoch": 0.1736446385500794, + "grad_norm": 586.3272705078125, + "learning_rate": 3.937305141358466e-05, + "loss": 92.4703, + "step": 42980 + }, + { + "epoch": 0.17368503981544703, + "grad_norm": 626.81982421875, + "learning_rate": 3.937235750691569e-05, + "loss": 119.8873, + "step": 42990 + }, + { + "epoch": 0.17372544108081464, + "grad_norm": 542.3326416015625, + "learning_rate": 3.9371663222572625e-05, + "loss": 121.7529, + "step": 43000 + }, + { + "epoch": 0.17376584234618228, + "grad_norm": 1248.322998046875, + "learning_rate": 3.937096856056902e-05, + "loss": 97.0481, + "step": 43010 + }, + { + "epoch": 0.17380624361154992, + "grad_norm": 1079.0216064453125, + "learning_rate": 3.93702735209184e-05, + "loss": 120.0687, + "step": 43020 + }, + { + "epoch": 0.17384664487691753, + "grad_norm": 1763.86962890625, + "learning_rate": 3.936957810363434e-05, + "loss": 161.4768, + "step": 43030 + }, + { + "epoch": 0.17388704614228517, + "grad_norm": 713.7518310546875, + "learning_rate": 3.936888230873037e-05, + "loss": 103.1226, + "step": 43040 + }, + { + "epoch": 0.1739274474076528, + "grad_norm": 971.8245849609375, + "learning_rate": 3.936818613622008e-05, + "loss": 160.2456, + "step": 43050 + }, + { + "epoch": 0.17396784867302043, + "grad_norm": 841.14404296875, + "learning_rate": 3.936748958611702e-05, + "loss": 136.5533, + "step": 43060 + }, + { + "epoch": 0.17400824993838807, + "grad_norm": 1150.502197265625, + "learning_rate": 3.936679265843478e-05, + "loss": 147.3124, + "step": 43070 + }, + { + "epoch": 0.1740486512037557, + "grad_norm": 1363.6405029296875, + "learning_rate": 3.936609535318695e-05, + "loss": 137.463, + "step": 43080 + }, + { + "epoch": 0.17408905246912335, + "grad_norm": 511.3232421875, + "learning_rate": 3.936539767038712e-05, + "loss": 105.749, + "step": 43090 + }, + { + "epoch": 0.17412945373449096, + "grad_norm": 807.436767578125, + "learning_rate": 3.9364699610048894e-05, + "loss": 109.5458, + "step": 43100 + }, + { + "epoch": 0.1741698549998586, + "grad_norm": 736.1069946289062, + "learning_rate": 3.9364001172185875e-05, + "loss": 76.9735, + "step": 43110 + }, + { + "epoch": 0.17421025626522624, + "grad_norm": 1110.43798828125, + "learning_rate": 3.936330235681169e-05, + "loss": 136.345, + "step": 43120 + }, + { + "epoch": 0.17425065753059385, + "grad_norm": 687.7774658203125, + "learning_rate": 3.936260316393995e-05, + "loss": 149.5644, + "step": 43130 + }, + { + "epoch": 0.1742910587959615, + "grad_norm": 369.6134338378906, + "learning_rate": 3.936190359358429e-05, + "loss": 169.0157, + "step": 43140 + }, + { + "epoch": 0.17433146006132913, + "grad_norm": 873.145263671875, + "learning_rate": 3.936120364575836e-05, + "loss": 93.1834, + "step": 43150 + }, + { + "epoch": 0.17437186132669674, + "grad_norm": 886.9813842773438, + "learning_rate": 3.9360503320475795e-05, + "loss": 115.0812, + "step": 43160 + }, + { + "epoch": 0.17441226259206438, + "grad_norm": 750.0335083007812, + "learning_rate": 3.935980261775025e-05, + "loss": 113.0262, + "step": 43170 + }, + { + "epoch": 0.17445266385743202, + "grad_norm": 815.4891357421875, + "learning_rate": 3.935910153759538e-05, + "loss": 92.6933, + "step": 43180 + }, + { + "epoch": 0.17449306512279963, + "grad_norm": 887.3807373046875, + "learning_rate": 3.9358400080024867e-05, + "loss": 105.6509, + "step": 43190 + }, + { + "epoch": 0.17453346638816727, + "grad_norm": 848.8743286132812, + "learning_rate": 3.935769824505238e-05, + "loss": 141.9864, + "step": 43200 + }, + { + "epoch": 0.17457386765353491, + "grad_norm": 707.7371215820312, + "learning_rate": 3.935699603269159e-05, + "loss": 98.3148, + "step": 43210 + }, + { + "epoch": 0.17461426891890253, + "grad_norm": 772.0900268554688, + "learning_rate": 3.935629344295621e-05, + "loss": 106.1912, + "step": 43220 + }, + { + "epoch": 0.17465467018427017, + "grad_norm": 2498.93359375, + "learning_rate": 3.935559047585991e-05, + "loss": 108.6816, + "step": 43230 + }, + { + "epoch": 0.1746950714496378, + "grad_norm": 1263.0877685546875, + "learning_rate": 3.935488713141641e-05, + "loss": 127.9902, + "step": 43240 + }, + { + "epoch": 0.17473547271500545, + "grad_norm": 643.4801025390625, + "learning_rate": 3.935418340963943e-05, + "loss": 134.648, + "step": 43250 + }, + { + "epoch": 0.17477587398037306, + "grad_norm": 1354.6151123046875, + "learning_rate": 3.935347931054267e-05, + "loss": 93.0312, + "step": 43260 + }, + { + "epoch": 0.1748162752457407, + "grad_norm": 1533.6732177734375, + "learning_rate": 3.9352774834139875e-05, + "loss": 125.7991, + "step": 43270 + }, + { + "epoch": 0.17485667651110834, + "grad_norm": 920.9436645507812, + "learning_rate": 3.9352069980444764e-05, + "loss": 136.2971, + "step": 43280 + }, + { + "epoch": 0.17489707777647595, + "grad_norm": 850.7123413085938, + "learning_rate": 3.9351364749471095e-05, + "loss": 90.9942, + "step": 43290 + }, + { + "epoch": 0.1749374790418436, + "grad_norm": 1125.968505859375, + "learning_rate": 3.93506591412326e-05, + "loss": 85.2883, + "step": 43300 + }, + { + "epoch": 0.17497788030721123, + "grad_norm": 1034.281005859375, + "learning_rate": 3.9349953155743046e-05, + "loss": 138.5068, + "step": 43310 + }, + { + "epoch": 0.17501828157257884, + "grad_norm": 604.328857421875, + "learning_rate": 3.934924679301619e-05, + "loss": 92.4035, + "step": 43320 + }, + { + "epoch": 0.17505868283794648, + "grad_norm": 1608.2493896484375, + "learning_rate": 3.934854005306581e-05, + "loss": 176.7471, + "step": 43330 + }, + { + "epoch": 0.17509908410331412, + "grad_norm": 305.9052734375, + "learning_rate": 3.934783293590568e-05, + "loss": 83.9882, + "step": 43340 + }, + { + "epoch": 0.17513948536868174, + "grad_norm": 515.9991455078125, + "learning_rate": 3.9347125441549585e-05, + "loss": 86.167, + "step": 43350 + }, + { + "epoch": 0.17517988663404938, + "grad_norm": 630.107666015625, + "learning_rate": 3.9346417570011316e-05, + "loss": 139.8822, + "step": 43360 + }, + { + "epoch": 0.17522028789941702, + "grad_norm": 2095.282470703125, + "learning_rate": 3.934570932130468e-05, + "loss": 162.0379, + "step": 43370 + }, + { + "epoch": 0.17526068916478463, + "grad_norm": 676.9356079101562, + "learning_rate": 3.934500069544348e-05, + "loss": 101.0464, + "step": 43380 + }, + { + "epoch": 0.17530109043015227, + "grad_norm": 767.9412231445312, + "learning_rate": 3.934429169244154e-05, + "loss": 110.7268, + "step": 43390 + }, + { + "epoch": 0.1753414916955199, + "grad_norm": 419.210205078125, + "learning_rate": 3.934358231231266e-05, + "loss": 137.1669, + "step": 43400 + }, + { + "epoch": 0.17538189296088755, + "grad_norm": 2844.443603515625, + "learning_rate": 3.934287255507069e-05, + "loss": 104.0516, + "step": 43410 + }, + { + "epoch": 0.17542229422625516, + "grad_norm": 517.8118896484375, + "learning_rate": 3.9342162420729464e-05, + "loss": 82.3356, + "step": 43420 + }, + { + "epoch": 0.1754626954916228, + "grad_norm": 898.265869140625, + "learning_rate": 3.9341451909302814e-05, + "loss": 124.7275, + "step": 43430 + }, + { + "epoch": 0.17550309675699044, + "grad_norm": 1000.8018188476562, + "learning_rate": 3.934074102080461e-05, + "loss": 101.5745, + "step": 43440 + }, + { + "epoch": 0.17554349802235805, + "grad_norm": 876.035400390625, + "learning_rate": 3.9340029755248707e-05, + "loss": 155.5188, + "step": 43450 + }, + { + "epoch": 0.1755838992877257, + "grad_norm": 635.9815063476562, + "learning_rate": 3.933931811264896e-05, + "loss": 112.3606, + "step": 43460 + }, + { + "epoch": 0.17562430055309333, + "grad_norm": 697.6968383789062, + "learning_rate": 3.933860609301925e-05, + "loss": 159.1869, + "step": 43470 + }, + { + "epoch": 0.17566470181846094, + "grad_norm": 868.5369262695312, + "learning_rate": 3.9337893696373456e-05, + "loss": 72.2606, + "step": 43480 + }, + { + "epoch": 0.17570510308382858, + "grad_norm": 850.4609985351562, + "learning_rate": 3.933718092272547e-05, + "loss": 101.3773, + "step": 43490 + }, + { + "epoch": 0.17574550434919622, + "grad_norm": 798.5441284179688, + "learning_rate": 3.9336467772089195e-05, + "loss": 99.4872, + "step": 43500 + }, + { + "epoch": 0.17578590561456384, + "grad_norm": 1022.8936157226562, + "learning_rate": 3.9335754244478514e-05, + "loss": 133.3874, + "step": 43510 + }, + { + "epoch": 0.17582630687993148, + "grad_norm": 451.1548156738281, + "learning_rate": 3.9335040339907354e-05, + "loss": 79.6456, + "step": 43520 + }, + { + "epoch": 0.17586670814529912, + "grad_norm": 601.6317138671875, + "learning_rate": 3.933432605838963e-05, + "loss": 94.1807, + "step": 43530 + }, + { + "epoch": 0.17590710941066673, + "grad_norm": 1544.4425048828125, + "learning_rate": 3.933361139993926e-05, + "loss": 144.7013, + "step": 43540 + }, + { + "epoch": 0.17594751067603437, + "grad_norm": 468.1459655761719, + "learning_rate": 3.933289636457019e-05, + "loss": 69.4936, + "step": 43550 + }, + { + "epoch": 0.175987911941402, + "grad_norm": 504.0082702636719, + "learning_rate": 3.933218095229634e-05, + "loss": 139.9873, + "step": 43560 + }, + { + "epoch": 0.17602831320676965, + "grad_norm": 641.511962890625, + "learning_rate": 3.933146516313169e-05, + "loss": 91.0043, + "step": 43570 + }, + { + "epoch": 0.17606871447213726, + "grad_norm": 701.4042358398438, + "learning_rate": 3.933074899709016e-05, + "loss": 139.2144, + "step": 43580 + }, + { + "epoch": 0.1761091157375049, + "grad_norm": 577.1221923828125, + "learning_rate": 3.933003245418572e-05, + "loss": 106.1059, + "step": 43590 + }, + { + "epoch": 0.17614951700287254, + "grad_norm": 1425.8583984375, + "learning_rate": 3.932931553443235e-05, + "loss": 103.5875, + "step": 43600 + }, + { + "epoch": 0.17618991826824015, + "grad_norm": 1045.544677734375, + "learning_rate": 3.9328598237844035e-05, + "loss": 107.4462, + "step": 43610 + }, + { + "epoch": 0.1762303195336078, + "grad_norm": 1000.53955078125, + "learning_rate": 3.9327880564434735e-05, + "loss": 143.4156, + "step": 43620 + }, + { + "epoch": 0.17627072079897543, + "grad_norm": 1252.872802734375, + "learning_rate": 3.932716251421845e-05, + "loss": 97.9865, + "step": 43630 + }, + { + "epoch": 0.17631112206434305, + "grad_norm": 722.3681030273438, + "learning_rate": 3.932644408720919e-05, + "loss": 129.5503, + "step": 43640 + }, + { + "epoch": 0.17635152332971069, + "grad_norm": 2304.47021484375, + "learning_rate": 3.932572528342094e-05, + "loss": 177.6783, + "step": 43650 + }, + { + "epoch": 0.17639192459507833, + "grad_norm": 1779.197265625, + "learning_rate": 3.932500610286774e-05, + "loss": 97.212, + "step": 43660 + }, + { + "epoch": 0.17643232586044594, + "grad_norm": 1110.281494140625, + "learning_rate": 3.9324286545563595e-05, + "loss": 111.1921, + "step": 43670 + }, + { + "epoch": 0.17647272712581358, + "grad_norm": 456.4955749511719, + "learning_rate": 3.932356661152253e-05, + "loss": 105.9601, + "step": 43680 + }, + { + "epoch": 0.17651312839118122, + "grad_norm": 832.123779296875, + "learning_rate": 3.9322846300758585e-05, + "loss": 105.7379, + "step": 43690 + }, + { + "epoch": 0.17655352965654883, + "grad_norm": 4109.98095703125, + "learning_rate": 3.9322125613285805e-05, + "loss": 110.1918, + "step": 43700 + }, + { + "epoch": 0.17659393092191647, + "grad_norm": 994.8103637695312, + "learning_rate": 3.9321404549118236e-05, + "loss": 164.7918, + "step": 43710 + }, + { + "epoch": 0.1766343321872841, + "grad_norm": 614.5648803710938, + "learning_rate": 3.9320683108269945e-05, + "loss": 120.3491, + "step": 43720 + }, + { + "epoch": 0.17667473345265175, + "grad_norm": 528.0435180664062, + "learning_rate": 3.9319961290754985e-05, + "loss": 110.2536, + "step": 43730 + }, + { + "epoch": 0.17671513471801936, + "grad_norm": 468.5578918457031, + "learning_rate": 3.931923909658744e-05, + "loss": 107.9219, + "step": 43740 + }, + { + "epoch": 0.176755535983387, + "grad_norm": 873.9409790039062, + "learning_rate": 3.931851652578137e-05, + "loss": 133.1628, + "step": 43750 + }, + { + "epoch": 0.17679593724875464, + "grad_norm": 1713.7037353515625, + "learning_rate": 3.931779357835088e-05, + "loss": 126.9396, + "step": 43760 + }, + { + "epoch": 0.17683633851412225, + "grad_norm": 978.21142578125, + "learning_rate": 3.9317070254310056e-05, + "loss": 95.8394, + "step": 43770 + }, + { + "epoch": 0.1768767397794899, + "grad_norm": 1193.838623046875, + "learning_rate": 3.931634655367301e-05, + "loss": 101.4446, + "step": 43780 + }, + { + "epoch": 0.17691714104485753, + "grad_norm": 2759.174072265625, + "learning_rate": 3.931562247645384e-05, + "loss": 157.293, + "step": 43790 + }, + { + "epoch": 0.17695754231022515, + "grad_norm": 470.10040283203125, + "learning_rate": 3.9314898022666656e-05, + "loss": 86.8824, + "step": 43800 + }, + { + "epoch": 0.1769979435755928, + "grad_norm": 683.3786010742188, + "learning_rate": 3.9314173192325603e-05, + "loss": 157.5538, + "step": 43810 + }, + { + "epoch": 0.17703834484096043, + "grad_norm": 846.3517456054688, + "learning_rate": 3.9313447985444796e-05, + "loss": 92.6225, + "step": 43820 + }, + { + "epoch": 0.17707874610632804, + "grad_norm": 655.2369995117188, + "learning_rate": 3.931272240203838e-05, + "loss": 87.1965, + "step": 43830 + }, + { + "epoch": 0.17711914737169568, + "grad_norm": 657.6278686523438, + "learning_rate": 3.93119964421205e-05, + "loss": 88.1639, + "step": 43840 + }, + { + "epoch": 0.17715954863706332, + "grad_norm": 1067.4725341796875, + "learning_rate": 3.9311270105705296e-05, + "loss": 120.4654, + "step": 43850 + }, + { + "epoch": 0.17719994990243093, + "grad_norm": 660.3068237304688, + "learning_rate": 3.931054339280695e-05, + "loss": 98.5905, + "step": 43860 + }, + { + "epoch": 0.17724035116779857, + "grad_norm": 1475.1063232421875, + "learning_rate": 3.930981630343962e-05, + "loss": 117.4361, + "step": 43870 + }, + { + "epoch": 0.1772807524331662, + "grad_norm": 514.3161010742188, + "learning_rate": 3.9309088837617464e-05, + "loss": 161.733, + "step": 43880 + }, + { + "epoch": 0.17732115369853385, + "grad_norm": 823.1534423828125, + "learning_rate": 3.930836099535469e-05, + "loss": 90.2721, + "step": 43890 + }, + { + "epoch": 0.17736155496390146, + "grad_norm": 645.538330078125, + "learning_rate": 3.930763277666548e-05, + "loss": 126.9521, + "step": 43900 + }, + { + "epoch": 0.1774019562292691, + "grad_norm": 1879.9925537109375, + "learning_rate": 3.9306904181564025e-05, + "loss": 80.516, + "step": 43910 + }, + { + "epoch": 0.17744235749463674, + "grad_norm": 495.7853088378906, + "learning_rate": 3.930617521006454e-05, + "loss": 92.3393, + "step": 43920 + }, + { + "epoch": 0.17748275876000436, + "grad_norm": 703.3677978515625, + "learning_rate": 3.9305445862181225e-05, + "loss": 96.1346, + "step": 43930 + }, + { + "epoch": 0.177523160025372, + "grad_norm": 1900.0235595703125, + "learning_rate": 3.93047161379283e-05, + "loss": 132.3289, + "step": 43940 + }, + { + "epoch": 0.17756356129073964, + "grad_norm": 711.5303344726562, + "learning_rate": 3.9303986037320004e-05, + "loss": 97.6169, + "step": 43950 + }, + { + "epoch": 0.17760396255610725, + "grad_norm": 544.9531860351562, + "learning_rate": 3.9303255560370554e-05, + "loss": 118.0651, + "step": 43960 + }, + { + "epoch": 0.1776443638214749, + "grad_norm": 827.1351318359375, + "learning_rate": 3.93025247070942e-05, + "loss": 87.1244, + "step": 43970 + }, + { + "epoch": 0.17768476508684253, + "grad_norm": 869.0027465820312, + "learning_rate": 3.930179347750519e-05, + "loss": 91.0392, + "step": 43980 + }, + { + "epoch": 0.17772516635221014, + "grad_norm": 656.9530029296875, + "learning_rate": 3.930106187161778e-05, + "loss": 98.0329, + "step": 43990 + }, + { + "epoch": 0.17776556761757778, + "grad_norm": 1630.1060791015625, + "learning_rate": 3.930032988944623e-05, + "loss": 142.5636, + "step": 44000 + }, + { + "epoch": 0.17780596888294542, + "grad_norm": 883.3037719726562, + "learning_rate": 3.9299597531004816e-05, + "loss": 110.7293, + "step": 44010 + }, + { + "epoch": 0.17784637014831303, + "grad_norm": 2067.3095703125, + "learning_rate": 3.929886479630781e-05, + "loss": 137.7868, + "step": 44020 + }, + { + "epoch": 0.17788677141368067, + "grad_norm": 293.4819030761719, + "learning_rate": 3.92981316853695e-05, + "loss": 85.5579, + "step": 44030 + }, + { + "epoch": 0.1779271726790483, + "grad_norm": 434.93670654296875, + "learning_rate": 3.929739819820418e-05, + "loss": 162.527, + "step": 44040 + }, + { + "epoch": 0.17796757394441595, + "grad_norm": 864.392333984375, + "learning_rate": 3.9296664334826135e-05, + "loss": 69.2837, + "step": 44050 + }, + { + "epoch": 0.17800797520978356, + "grad_norm": 1320.3184814453125, + "learning_rate": 3.9295930095249695e-05, + "loss": 151.8937, + "step": 44060 + }, + { + "epoch": 0.1780483764751512, + "grad_norm": 836.29931640625, + "learning_rate": 3.9295195479489165e-05, + "loss": 84.9214, + "step": 44070 + }, + { + "epoch": 0.17808877774051884, + "grad_norm": 457.5914306640625, + "learning_rate": 3.9294460487558865e-05, + "loss": 119.7825, + "step": 44080 + }, + { + "epoch": 0.17812917900588646, + "grad_norm": 506.6680603027344, + "learning_rate": 3.929372511947311e-05, + "loss": 106.6699, + "step": 44090 + }, + { + "epoch": 0.1781695802712541, + "grad_norm": 778.6116943359375, + "learning_rate": 3.929298937524627e-05, + "loss": 288.2132, + "step": 44100 + }, + { + "epoch": 0.17820998153662174, + "grad_norm": 427.6026306152344, + "learning_rate": 3.929225325489266e-05, + "loss": 98.0807, + "step": 44110 + }, + { + "epoch": 0.17825038280198935, + "grad_norm": 651.238037109375, + "learning_rate": 3.929151675842664e-05, + "loss": 103.1041, + "step": 44120 + }, + { + "epoch": 0.178290784067357, + "grad_norm": 702.1795654296875, + "learning_rate": 3.929077988586257e-05, + "loss": 149.5671, + "step": 44130 + }, + { + "epoch": 0.17833118533272463, + "grad_norm": 893.52783203125, + "learning_rate": 3.9290042637214815e-05, + "loss": 107.1741, + "step": 44140 + }, + { + "epoch": 0.17837158659809224, + "grad_norm": 1301.1751708984375, + "learning_rate": 3.9289305012497745e-05, + "loss": 138.5198, + "step": 44150 + }, + { + "epoch": 0.17841198786345988, + "grad_norm": 920.2763671875, + "learning_rate": 3.928856701172575e-05, + "loss": 115.3094, + "step": 44160 + }, + { + "epoch": 0.17845238912882752, + "grad_norm": 808.4774780273438, + "learning_rate": 3.928782863491321e-05, + "loss": 97.6874, + "step": 44170 + }, + { + "epoch": 0.17849279039419513, + "grad_norm": 813.8521118164062, + "learning_rate": 3.928708988207452e-05, + "loss": 67.6691, + "step": 44180 + }, + { + "epoch": 0.17853319165956277, + "grad_norm": 1198.8116455078125, + "learning_rate": 3.928635075322407e-05, + "loss": 109.0637, + "step": 44190 + }, + { + "epoch": 0.1785735929249304, + "grad_norm": 741.3660278320312, + "learning_rate": 3.92856112483763e-05, + "loss": 64.9529, + "step": 44200 + }, + { + "epoch": 0.17861399419029805, + "grad_norm": 1037.3350830078125, + "learning_rate": 3.9284871367545595e-05, + "loss": 116.3379, + "step": 44210 + }, + { + "epoch": 0.17865439545566567, + "grad_norm": 617.2257080078125, + "learning_rate": 3.9284131110746404e-05, + "loss": 177.5903, + "step": 44220 + }, + { + "epoch": 0.1786947967210333, + "grad_norm": 614.7222900390625, + "learning_rate": 3.928339047799315e-05, + "loss": 95.1893, + "step": 44230 + }, + { + "epoch": 0.17873519798640095, + "grad_norm": 1375.2762451171875, + "learning_rate": 3.928264946930027e-05, + "loss": 98.322, + "step": 44240 + }, + { + "epoch": 0.17877559925176856, + "grad_norm": 1221.23583984375, + "learning_rate": 3.92819080846822e-05, + "loss": 91.1179, + "step": 44250 + }, + { + "epoch": 0.1788160005171362, + "grad_norm": 732.765625, + "learning_rate": 3.928116632415342e-05, + "loss": 113.0836, + "step": 44260 + }, + { + "epoch": 0.17885640178250384, + "grad_norm": 1104.79833984375, + "learning_rate": 3.928042418772837e-05, + "loss": 205.7247, + "step": 44270 + }, + { + "epoch": 0.17889680304787145, + "grad_norm": 855.9139404296875, + "learning_rate": 3.927968167542152e-05, + "loss": 102.6662, + "step": 44280 + }, + { + "epoch": 0.1789372043132391, + "grad_norm": 984.3355712890625, + "learning_rate": 3.9278938787247354e-05, + "loss": 92.4419, + "step": 44290 + }, + { + "epoch": 0.17897760557860673, + "grad_norm": 1473.210205078125, + "learning_rate": 3.927819552322035e-05, + "loss": 120.0374, + "step": 44300 + }, + { + "epoch": 0.17901800684397434, + "grad_norm": 292.0830078125, + "learning_rate": 3.9277451883355005e-05, + "loss": 115.7803, + "step": 44310 + }, + { + "epoch": 0.17905840810934198, + "grad_norm": 4271.056640625, + "learning_rate": 3.9276707867665805e-05, + "loss": 104.5843, + "step": 44320 + }, + { + "epoch": 0.17909880937470962, + "grad_norm": 465.0617370605469, + "learning_rate": 3.927596347616726e-05, + "loss": 118.4168, + "step": 44330 + }, + { + "epoch": 0.17913921064007723, + "grad_norm": 535.888916015625, + "learning_rate": 3.9275218708873887e-05, + "loss": 115.7777, + "step": 44340 + }, + { + "epoch": 0.17917961190544487, + "grad_norm": 722.5185546875, + "learning_rate": 3.92744735658002e-05, + "loss": 179.7792, + "step": 44350 + }, + { + "epoch": 0.17922001317081251, + "grad_norm": 485.36468505859375, + "learning_rate": 3.9273728046960726e-05, + "loss": 116.2458, + "step": 44360 + }, + { + "epoch": 0.17926041443618015, + "grad_norm": 1476.8302001953125, + "learning_rate": 3.927298215237e-05, + "loss": 93.3648, + "step": 44370 + }, + { + "epoch": 0.17930081570154777, + "grad_norm": 557.299072265625, + "learning_rate": 3.927223588204257e-05, + "loss": 136.3531, + "step": 44380 + }, + { + "epoch": 0.1793412169669154, + "grad_norm": 1239.0323486328125, + "learning_rate": 3.927148923599297e-05, + "loss": 125.0844, + "step": 44390 + }, + { + "epoch": 0.17938161823228305, + "grad_norm": 1010.6303100585938, + "learning_rate": 3.927074221423578e-05, + "loss": 97.6496, + "step": 44400 + }, + { + "epoch": 0.17942201949765066, + "grad_norm": 539.2913818359375, + "learning_rate": 3.9269994816785535e-05, + "loss": 137.7799, + "step": 44410 + }, + { + "epoch": 0.1794624207630183, + "grad_norm": 812.9047241210938, + "learning_rate": 3.9269247043656825e-05, + "loss": 134.8396, + "step": 44420 + }, + { + "epoch": 0.17950282202838594, + "grad_norm": 706.6622314453125, + "learning_rate": 3.926849889486423e-05, + "loss": 98.1537, + "step": 44430 + }, + { + "epoch": 0.17954322329375355, + "grad_norm": 1392.4033203125, + "learning_rate": 3.926775037042232e-05, + "loss": 169.5301, + "step": 44440 + }, + { + "epoch": 0.1795836245591212, + "grad_norm": 583.2578125, + "learning_rate": 3.92670014703457e-05, + "loss": 112.6691, + "step": 44450 + }, + { + "epoch": 0.17962402582448883, + "grad_norm": 1096.614501953125, + "learning_rate": 3.926625219464897e-05, + "loss": 84.5041, + "step": 44460 + }, + { + "epoch": 0.17966442708985644, + "grad_norm": 673.0792236328125, + "learning_rate": 3.926550254334673e-05, + "loss": 83.7446, + "step": 44470 + }, + { + "epoch": 0.17970482835522408, + "grad_norm": 881.2259521484375, + "learning_rate": 3.92647525164536e-05, + "loss": 87.3744, + "step": 44480 + }, + { + "epoch": 0.17974522962059172, + "grad_norm": 1072.380859375, + "learning_rate": 3.92640021139842e-05, + "loss": 126.373, + "step": 44490 + }, + { + "epoch": 0.17978563088595934, + "grad_norm": 594.1669311523438, + "learning_rate": 3.926325133595317e-05, + "loss": 79.4463, + "step": 44500 + }, + { + "epoch": 0.17982603215132698, + "grad_norm": 877.1673583984375, + "learning_rate": 3.926250018237513e-05, + "loss": 145.9573, + "step": 44510 + }, + { + "epoch": 0.17986643341669462, + "grad_norm": 1088.96826171875, + "learning_rate": 3.926174865326473e-05, + "loss": 142.7042, + "step": 44520 + }, + { + "epoch": 0.17990683468206226, + "grad_norm": 1293.61962890625, + "learning_rate": 3.926099674863663e-05, + "loss": 80.1414, + "step": 44530 + }, + { + "epoch": 0.17994723594742987, + "grad_norm": 1215.0960693359375, + "learning_rate": 3.926024446850548e-05, + "loss": 103.9489, + "step": 44540 + }, + { + "epoch": 0.1799876372127975, + "grad_norm": 852.5911865234375, + "learning_rate": 3.925949181288595e-05, + "loss": 73.0225, + "step": 44550 + }, + { + "epoch": 0.18002803847816515, + "grad_norm": 827.7257080078125, + "learning_rate": 3.92587387817927e-05, + "loss": 91.5024, + "step": 44560 + }, + { + "epoch": 0.18006843974353276, + "grad_norm": 1147.688232421875, + "learning_rate": 3.9257985375240435e-05, + "loss": 139.4638, + "step": 44570 + }, + { + "epoch": 0.1801088410089004, + "grad_norm": 573.06396484375, + "learning_rate": 3.925723159324383e-05, + "loss": 121.0616, + "step": 44580 + }, + { + "epoch": 0.18014924227426804, + "grad_norm": 691.3921508789062, + "learning_rate": 3.925647743581758e-05, + "loss": 86.6645, + "step": 44590 + }, + { + "epoch": 0.18018964353963565, + "grad_norm": 391.08612060546875, + "learning_rate": 3.925572290297638e-05, + "loss": 116.9233, + "step": 44600 + }, + { + "epoch": 0.1802300448050033, + "grad_norm": 1527.3037109375, + "learning_rate": 3.9254967994734954e-05, + "loss": 120.6871, + "step": 44610 + }, + { + "epoch": 0.18027044607037093, + "grad_norm": 894.4608764648438, + "learning_rate": 3.925421271110802e-05, + "loss": 119.6365, + "step": 44620 + }, + { + "epoch": 0.18031084733573854, + "grad_norm": 2787.767822265625, + "learning_rate": 3.925345705211029e-05, + "loss": 139.6162, + "step": 44630 + }, + { + "epoch": 0.18035124860110618, + "grad_norm": 722.30078125, + "learning_rate": 3.92527010177565e-05, + "loss": 107.6143, + "step": 44640 + }, + { + "epoch": 0.18039164986647382, + "grad_norm": 1030.7080078125, + "learning_rate": 3.9251944608061395e-05, + "loss": 69.9784, + "step": 44650 + }, + { + "epoch": 0.18043205113184144, + "grad_norm": 886.6362915039062, + "learning_rate": 3.9251187823039715e-05, + "loss": 154.5885, + "step": 44660 + }, + { + "epoch": 0.18047245239720908, + "grad_norm": 534.3170776367188, + "learning_rate": 3.925043066270622e-05, + "loss": 67.6841, + "step": 44670 + }, + { + "epoch": 0.18051285366257672, + "grad_norm": 405.6924743652344, + "learning_rate": 3.9249673127075675e-05, + "loss": 104.5246, + "step": 44680 + }, + { + "epoch": 0.18055325492794436, + "grad_norm": 1052.0738525390625, + "learning_rate": 3.924891521616283e-05, + "loss": 120.8437, + "step": 44690 + }, + { + "epoch": 0.18059365619331197, + "grad_norm": 829.1868896484375, + "learning_rate": 3.924815692998248e-05, + "loss": 107.1924, + "step": 44700 + }, + { + "epoch": 0.1806340574586796, + "grad_norm": 1073.474853515625, + "learning_rate": 3.9247398268549405e-05, + "loss": 105.708, + "step": 44710 + }, + { + "epoch": 0.18067445872404725, + "grad_norm": 615.37841796875, + "learning_rate": 3.9246639231878384e-05, + "loss": 129.1731, + "step": 44720 + }, + { + "epoch": 0.18071485998941486, + "grad_norm": 933.241455078125, + "learning_rate": 3.924587981998422e-05, + "loss": 104.1226, + "step": 44730 + }, + { + "epoch": 0.1807552612547825, + "grad_norm": 1376.4990234375, + "learning_rate": 3.924512003288173e-05, + "loss": 110.8136, + "step": 44740 + }, + { + "epoch": 0.18079566252015014, + "grad_norm": 1066.09228515625, + "learning_rate": 3.924435987058571e-05, + "loss": 84.4375, + "step": 44750 + }, + { + "epoch": 0.18083606378551775, + "grad_norm": 1244.1910400390625, + "learning_rate": 3.924359933311099e-05, + "loss": 104.2757, + "step": 44760 + }, + { + "epoch": 0.1808764650508854, + "grad_norm": 832.4368286132812, + "learning_rate": 3.9242838420472393e-05, + "loss": 131.9319, + "step": 44770 + }, + { + "epoch": 0.18091686631625303, + "grad_norm": 499.34136962890625, + "learning_rate": 3.9242077132684755e-05, + "loss": 65.5931, + "step": 44780 + }, + { + "epoch": 0.18095726758162065, + "grad_norm": 704.25439453125, + "learning_rate": 3.9241315469762915e-05, + "loss": 169.5149, + "step": 44790 + }, + { + "epoch": 0.18099766884698829, + "grad_norm": 519.4228515625, + "learning_rate": 3.924055343172172e-05, + "loss": 121.5268, + "step": 44800 + }, + { + "epoch": 0.18103807011235593, + "grad_norm": 671.3677978515625, + "learning_rate": 3.9239791018576034e-05, + "loss": 110.7395, + "step": 44810 + }, + { + "epoch": 0.18107847137772354, + "grad_norm": 333.40325927734375, + "learning_rate": 3.923902823034072e-05, + "loss": 78.5989, + "step": 44820 + }, + { + "epoch": 0.18111887264309118, + "grad_norm": 660.3540649414062, + "learning_rate": 3.923826506703064e-05, + "loss": 108.9713, + "step": 44830 + }, + { + "epoch": 0.18115927390845882, + "grad_norm": 794.8124389648438, + "learning_rate": 3.923750152866068e-05, + "loss": 88.9738, + "step": 44840 + }, + { + "epoch": 0.18119967517382646, + "grad_norm": 1358.336181640625, + "learning_rate": 3.9236737615245726e-05, + "loss": 105.3401, + "step": 44850 + }, + { + "epoch": 0.18124007643919407, + "grad_norm": 827.7672119140625, + "learning_rate": 3.923597332680067e-05, + "loss": 99.8932, + "step": 44860 + }, + { + "epoch": 0.1812804777045617, + "grad_norm": 788.9783325195312, + "learning_rate": 3.9235208663340404e-05, + "loss": 123.5471, + "step": 44870 + }, + { + "epoch": 0.18132087896992935, + "grad_norm": 999.8792724609375, + "learning_rate": 3.923444362487985e-05, + "loss": 149.0542, + "step": 44880 + }, + { + "epoch": 0.18136128023529696, + "grad_norm": 781.2118530273438, + "learning_rate": 3.923367821143391e-05, + "loss": 111.1438, + "step": 44890 + }, + { + "epoch": 0.1814016815006646, + "grad_norm": 817.459228515625, + "learning_rate": 3.923291242301751e-05, + "loss": 120.3275, + "step": 44900 + }, + { + "epoch": 0.18144208276603224, + "grad_norm": 761.6355590820312, + "learning_rate": 3.923214625964558e-05, + "loss": 54.5724, + "step": 44910 + }, + { + "epoch": 0.18148248403139985, + "grad_norm": 792.9924926757812, + "learning_rate": 3.923137972133306e-05, + "loss": 95.1763, + "step": 44920 + }, + { + "epoch": 0.1815228852967675, + "grad_norm": 1150.4649658203125, + "learning_rate": 3.923061280809489e-05, + "loss": 90.6889, + "step": 44930 + }, + { + "epoch": 0.18156328656213513, + "grad_norm": 590.4578247070312, + "learning_rate": 3.922984551994602e-05, + "loss": 121.0319, + "step": 44940 + }, + { + "epoch": 0.18160368782750275, + "grad_norm": 450.7500915527344, + "learning_rate": 3.922907785690142e-05, + "loss": 57.0796, + "step": 44950 + }, + { + "epoch": 0.1816440890928704, + "grad_norm": 856.1098022460938, + "learning_rate": 3.922830981897604e-05, + "loss": 102.8541, + "step": 44960 + }, + { + "epoch": 0.18168449035823803, + "grad_norm": 1260.447021484375, + "learning_rate": 3.922754140618486e-05, + "loss": 90.7514, + "step": 44970 + }, + { + "epoch": 0.18172489162360564, + "grad_norm": 756.5560302734375, + "learning_rate": 3.9226772618542874e-05, + "loss": 115.8032, + "step": 44980 + }, + { + "epoch": 0.18176529288897328, + "grad_norm": 502.2868347167969, + "learning_rate": 3.9226003456065045e-05, + "loss": 80.9444, + "step": 44990 + }, + { + "epoch": 0.18180569415434092, + "grad_norm": 821.5325317382812, + "learning_rate": 3.922523391876638e-05, + "loss": 110.5888, + "step": 45000 + }, + { + "epoch": 0.18184609541970856, + "grad_norm": 1033.2593994140625, + "learning_rate": 3.9224464006661885e-05, + "loss": 119.1832, + "step": 45010 + }, + { + "epoch": 0.18188649668507617, + "grad_norm": 1292.5682373046875, + "learning_rate": 3.922369371976657e-05, + "loss": 126.6403, + "step": 45020 + }, + { + "epoch": 0.1819268979504438, + "grad_norm": 578.7438354492188, + "learning_rate": 3.9222923058095445e-05, + "loss": 117.5187, + "step": 45030 + }, + { + "epoch": 0.18196729921581145, + "grad_norm": 896.1525268554688, + "learning_rate": 3.922215202166354e-05, + "loss": 84.1352, + "step": 45040 + }, + { + "epoch": 0.18200770048117906, + "grad_norm": 1228.18212890625, + "learning_rate": 3.922138061048588e-05, + "loss": 116.1831, + "step": 45050 + }, + { + "epoch": 0.1820481017465467, + "grad_norm": 843.9871215820312, + "learning_rate": 3.922060882457751e-05, + "loss": 120.1767, + "step": 45060 + }, + { + "epoch": 0.18208850301191434, + "grad_norm": 641.1558227539062, + "learning_rate": 3.921983666395348e-05, + "loss": 137.1051, + "step": 45070 + }, + { + "epoch": 0.18212890427728196, + "grad_norm": 612.7044677734375, + "learning_rate": 3.921906412862884e-05, + "loss": 94.3645, + "step": 45080 + }, + { + "epoch": 0.1821693055426496, + "grad_norm": 672.0086669921875, + "learning_rate": 3.921829121861865e-05, + "loss": 97.982, + "step": 45090 + }, + { + "epoch": 0.18220970680801724, + "grad_norm": 1230.185302734375, + "learning_rate": 3.9217517933937974e-05, + "loss": 115.056, + "step": 45100 + }, + { + "epoch": 0.18225010807338485, + "grad_norm": 1234.1407470703125, + "learning_rate": 3.9216744274601895e-05, + "loss": 115.3858, + "step": 45110 + }, + { + "epoch": 0.1822905093387525, + "grad_norm": 569.9103393554688, + "learning_rate": 3.921597024062549e-05, + "loss": 139.6657, + "step": 45120 + }, + { + "epoch": 0.18233091060412013, + "grad_norm": 857.34765625, + "learning_rate": 3.921519583202386e-05, + "loss": 94.6049, + "step": 45130 + }, + { + "epoch": 0.18237131186948774, + "grad_norm": 525.5388793945312, + "learning_rate": 3.921442104881209e-05, + "loss": 88.2086, + "step": 45140 + }, + { + "epoch": 0.18241171313485538, + "grad_norm": 633.9356689453125, + "learning_rate": 3.9213645891005284e-05, + "loss": 97.771, + "step": 45150 + }, + { + "epoch": 0.18245211440022302, + "grad_norm": 1296.2205810546875, + "learning_rate": 3.921287035861857e-05, + "loss": 124.3096, + "step": 45160 + }, + { + "epoch": 0.18249251566559066, + "grad_norm": 504.6768493652344, + "learning_rate": 3.921209445166705e-05, + "loss": 86.4531, + "step": 45170 + }, + { + "epoch": 0.18253291693095827, + "grad_norm": 831.5971069335938, + "learning_rate": 3.9211318170165854e-05, + "loss": 106.425, + "step": 45180 + }, + { + "epoch": 0.1825733181963259, + "grad_norm": 922.2108764648438, + "learning_rate": 3.9210541514130126e-05, + "loss": 102.943, + "step": 45190 + }, + { + "epoch": 0.18261371946169355, + "grad_norm": 1160.41455078125, + "learning_rate": 3.9209764483575e-05, + "loss": 126.4679, + "step": 45200 + }, + { + "epoch": 0.18265412072706116, + "grad_norm": 880.1768798828125, + "learning_rate": 3.9208987078515625e-05, + "loss": 110.5957, + "step": 45210 + }, + { + "epoch": 0.1826945219924288, + "grad_norm": 564.4341430664062, + "learning_rate": 3.9208209298967156e-05, + "loss": 64.9079, + "step": 45220 + }, + { + "epoch": 0.18273492325779644, + "grad_norm": 591.32470703125, + "learning_rate": 3.920743114494476e-05, + "loss": 116.494, + "step": 45230 + }, + { + "epoch": 0.18277532452316406, + "grad_norm": 1376.952392578125, + "learning_rate": 3.92066526164636e-05, + "loss": 87.9144, + "step": 45240 + }, + { + "epoch": 0.1828157257885317, + "grad_norm": 776.5267944335938, + "learning_rate": 3.9205873713538864e-05, + "loss": 113.5147, + "step": 45250 + }, + { + "epoch": 0.18285612705389934, + "grad_norm": 1010.7616577148438, + "learning_rate": 3.9205094436185734e-05, + "loss": 102.5515, + "step": 45260 + }, + { + "epoch": 0.18289652831926695, + "grad_norm": 1262.4642333984375, + "learning_rate": 3.9204314784419396e-05, + "loss": 153.424, + "step": 45270 + }, + { + "epoch": 0.1829369295846346, + "grad_norm": 1117.28271484375, + "learning_rate": 3.9203534758255056e-05, + "loss": 134.0267, + "step": 45280 + }, + { + "epoch": 0.18297733085000223, + "grad_norm": 1103.278076171875, + "learning_rate": 3.920275435770791e-05, + "loss": 96.1866, + "step": 45290 + }, + { + "epoch": 0.18301773211536984, + "grad_norm": 822.197509765625, + "learning_rate": 3.92019735827932e-05, + "loss": 88.8869, + "step": 45300 + }, + { + "epoch": 0.18305813338073748, + "grad_norm": 704.1466064453125, + "learning_rate": 3.920119243352612e-05, + "loss": 95.4713, + "step": 45310 + }, + { + "epoch": 0.18309853464610512, + "grad_norm": 1079.7664794921875, + "learning_rate": 3.9200410909921903e-05, + "loss": 84.2463, + "step": 45320 + }, + { + "epoch": 0.18313893591147276, + "grad_norm": 1588.8756103515625, + "learning_rate": 3.91996290119958e-05, + "loss": 104.2267, + "step": 45330 + }, + { + "epoch": 0.18317933717684037, + "grad_norm": 1014.8966674804688, + "learning_rate": 3.919884673976304e-05, + "loss": 99.0899, + "step": 45340 + }, + { + "epoch": 0.183219738442208, + "grad_norm": 658.822998046875, + "learning_rate": 3.9198064093238874e-05, + "loss": 144.0913, + "step": 45350 + }, + { + "epoch": 0.18326013970757565, + "grad_norm": 1343.022216796875, + "learning_rate": 3.9197281072438576e-05, + "loss": 142.9431, + "step": 45360 + }, + { + "epoch": 0.18330054097294327, + "grad_norm": 449.5462951660156, + "learning_rate": 3.919649767737739e-05, + "loss": 94.757, + "step": 45370 + }, + { + "epoch": 0.1833409422383109, + "grad_norm": 551.3982543945312, + "learning_rate": 3.9195713908070606e-05, + "loss": 101.662, + "step": 45380 + }, + { + "epoch": 0.18338134350367855, + "grad_norm": 1080.28759765625, + "learning_rate": 3.919492976453349e-05, + "loss": 151.5954, + "step": 45390 + }, + { + "epoch": 0.18342174476904616, + "grad_norm": 1882.885009765625, + "learning_rate": 3.9194145246781336e-05, + "loss": 126.3629, + "step": 45400 + }, + { + "epoch": 0.1834621460344138, + "grad_norm": 803.5654296875, + "learning_rate": 3.919336035482944e-05, + "loss": 106.8753, + "step": 45410 + }, + { + "epoch": 0.18350254729978144, + "grad_norm": 536.650146484375, + "learning_rate": 3.9192575088693104e-05, + "loss": 129.1379, + "step": 45420 + }, + { + "epoch": 0.18354294856514905, + "grad_norm": 786.2908935546875, + "learning_rate": 3.9191789448387634e-05, + "loss": 90.7641, + "step": 45430 + }, + { + "epoch": 0.1835833498305167, + "grad_norm": 337.1232604980469, + "learning_rate": 3.919100343392835e-05, + "loss": 78.2249, + "step": 45440 + }, + { + "epoch": 0.18362375109588433, + "grad_norm": 346.5594177246094, + "learning_rate": 3.919021704533058e-05, + "loss": 141.4602, + "step": 45450 + }, + { + "epoch": 0.18366415236125194, + "grad_norm": 706.908203125, + "learning_rate": 3.918943028260964e-05, + "loss": 123.6957, + "step": 45460 + }, + { + "epoch": 0.18370455362661958, + "grad_norm": 952.102783203125, + "learning_rate": 3.918864314578087e-05, + "loss": 122.1361, + "step": 45470 + }, + { + "epoch": 0.18374495489198722, + "grad_norm": 1575.9967041015625, + "learning_rate": 3.918785563485964e-05, + "loss": 147.6008, + "step": 45480 + }, + { + "epoch": 0.18378535615735486, + "grad_norm": 927.835205078125, + "learning_rate": 3.918706774986128e-05, + "loss": 121.7773, + "step": 45490 + }, + { + "epoch": 0.18382575742272247, + "grad_norm": 1672.407958984375, + "learning_rate": 3.918627949080115e-05, + "loss": 132.4416, + "step": 45500 + }, + { + "epoch": 0.18386615868809011, + "grad_norm": 814.1611328125, + "learning_rate": 3.918549085769464e-05, + "loss": 100.8685, + "step": 45510 + }, + { + "epoch": 0.18390655995345775, + "grad_norm": 509.89453125, + "learning_rate": 3.918470185055709e-05, + "loss": 81.7529, + "step": 45520 + }, + { + "epoch": 0.18394696121882537, + "grad_norm": 1164.39990234375, + "learning_rate": 3.91839124694039e-05, + "loss": 136.9088, + "step": 45530 + }, + { + "epoch": 0.183987362484193, + "grad_norm": 1092.2449951171875, + "learning_rate": 3.918312271425047e-05, + "loss": 107.5758, + "step": 45540 + }, + { + "epoch": 0.18402776374956065, + "grad_norm": 773.4453125, + "learning_rate": 3.9182332585112186e-05, + "loss": 108.2592, + "step": 45550 + }, + { + "epoch": 0.18406816501492826, + "grad_norm": 1146.68505859375, + "learning_rate": 3.918154208200445e-05, + "loss": 168.4107, + "step": 45560 + }, + { + "epoch": 0.1841085662802959, + "grad_norm": 884.4878540039062, + "learning_rate": 3.918075120494267e-05, + "loss": 110.3425, + "step": 45570 + }, + { + "epoch": 0.18414896754566354, + "grad_norm": 802.3369140625, + "learning_rate": 3.9179959953942283e-05, + "loss": 158.6479, + "step": 45580 + }, + { + "epoch": 0.18418936881103115, + "grad_norm": 2042.192138671875, + "learning_rate": 3.917916832901869e-05, + "loss": 116.4019, + "step": 45590 + }, + { + "epoch": 0.1842297700763988, + "grad_norm": 1187.767578125, + "learning_rate": 3.917837633018734e-05, + "loss": 121.2097, + "step": 45600 + }, + { + "epoch": 0.18427017134176643, + "grad_norm": 864.92138671875, + "learning_rate": 3.917758395746368e-05, + "loss": 84.0501, + "step": 45610 + }, + { + "epoch": 0.18431057260713404, + "grad_norm": 964.3775634765625, + "learning_rate": 3.9176791210863136e-05, + "loss": 192.0525, + "step": 45620 + }, + { + "epoch": 0.18435097387250168, + "grad_norm": 795.0538330078125, + "learning_rate": 3.917599809040117e-05, + "loss": 142.4928, + "step": 45630 + }, + { + "epoch": 0.18439137513786932, + "grad_norm": 1195.813232421875, + "learning_rate": 3.917520459609326e-05, + "loss": 139.2071, + "step": 45640 + }, + { + "epoch": 0.18443177640323694, + "grad_norm": 329.8785705566406, + "learning_rate": 3.9174410727954856e-05, + "loss": 78.7253, + "step": 45650 + }, + { + "epoch": 0.18447217766860458, + "grad_norm": 1084.1959228515625, + "learning_rate": 3.917361648600145e-05, + "loss": 104.6644, + "step": 45660 + }, + { + "epoch": 0.18451257893397222, + "grad_norm": 1362.5384521484375, + "learning_rate": 3.9172821870248524e-05, + "loss": 112.944, + "step": 45670 + }, + { + "epoch": 0.18455298019933986, + "grad_norm": 1244.648193359375, + "learning_rate": 3.917202688071155e-05, + "loss": 75.607, + "step": 45680 + }, + { + "epoch": 0.18459338146470747, + "grad_norm": 1170.45849609375, + "learning_rate": 3.917123151740605e-05, + "loss": 121.37, + "step": 45690 + }, + { + "epoch": 0.1846337827300751, + "grad_norm": 807.755126953125, + "learning_rate": 3.917043578034752e-05, + "loss": 76.0498, + "step": 45700 + }, + { + "epoch": 0.18467418399544275, + "grad_norm": 947.9320678710938, + "learning_rate": 3.9169639669551476e-05, + "loss": 82.1819, + "step": 45710 + }, + { + "epoch": 0.18471458526081036, + "grad_norm": 1049.06396484375, + "learning_rate": 3.9168843185033434e-05, + "loss": 82.042, + "step": 45720 + }, + { + "epoch": 0.184754986526178, + "grad_norm": 879.45361328125, + "learning_rate": 3.9168046326808934e-05, + "loss": 83.898, + "step": 45730 + }, + { + "epoch": 0.18479538779154564, + "grad_norm": 850.8912353515625, + "learning_rate": 3.9167249094893495e-05, + "loss": 77.2259, + "step": 45740 + }, + { + "epoch": 0.18483578905691325, + "grad_norm": 366.6460876464844, + "learning_rate": 3.916645148930267e-05, + "loss": 144.1211, + "step": 45750 + }, + { + "epoch": 0.1848761903222809, + "grad_norm": 427.6673583984375, + "learning_rate": 3.9165653510052004e-05, + "loss": 103.7898, + "step": 45760 + }, + { + "epoch": 0.18491659158764853, + "grad_norm": 733.4408569335938, + "learning_rate": 3.9164855157157055e-05, + "loss": 91.4522, + "step": 45770 + }, + { + "epoch": 0.18495699285301614, + "grad_norm": 1173.1689453125, + "learning_rate": 3.916405643063339e-05, + "loss": 106.3651, + "step": 45780 + }, + { + "epoch": 0.18499739411838378, + "grad_norm": 544.6749267578125, + "learning_rate": 3.9163257330496576e-05, + "loss": 71.7877, + "step": 45790 + }, + { + "epoch": 0.18503779538375142, + "grad_norm": 557.1127319335938, + "learning_rate": 3.91624578567622e-05, + "loss": 104.0788, + "step": 45800 + }, + { + "epoch": 0.18507819664911904, + "grad_norm": 1273.956787109375, + "learning_rate": 3.9161658009445834e-05, + "loss": 144.939, + "step": 45810 + }, + { + "epoch": 0.18511859791448668, + "grad_norm": 1249.144287109375, + "learning_rate": 3.9160857788563085e-05, + "loss": 163.5391, + "step": 45820 + }, + { + "epoch": 0.18515899917985432, + "grad_norm": 430.4400939941406, + "learning_rate": 3.9160057194129544e-05, + "loss": 79.093, + "step": 45830 + }, + { + "epoch": 0.18519940044522196, + "grad_norm": 462.1000061035156, + "learning_rate": 3.915925622616083e-05, + "loss": 107.3453, + "step": 45840 + }, + { + "epoch": 0.18523980171058957, + "grad_norm": 910.2260131835938, + "learning_rate": 3.915845488467255e-05, + "loss": 106.6356, + "step": 45850 + }, + { + "epoch": 0.1852802029759572, + "grad_norm": 840.5657958984375, + "learning_rate": 3.915765316968033e-05, + "loss": 133.9957, + "step": 45860 + }, + { + "epoch": 0.18532060424132485, + "grad_norm": 840.9609375, + "learning_rate": 3.91568510811998e-05, + "loss": 100.4298, + "step": 45870 + }, + { + "epoch": 0.18536100550669246, + "grad_norm": 718.1873779296875, + "learning_rate": 3.915604861924659e-05, + "loss": 76.9476, + "step": 45880 + }, + { + "epoch": 0.1854014067720601, + "grad_norm": 830.609619140625, + "learning_rate": 3.915524578383635e-05, + "loss": 88.8338, + "step": 45890 + }, + { + "epoch": 0.18544180803742774, + "grad_norm": 686.35009765625, + "learning_rate": 3.9154442574984735e-05, + "loss": 94.5852, + "step": 45900 + }, + { + "epoch": 0.18548220930279535, + "grad_norm": 558.4879150390625, + "learning_rate": 3.91536389927074e-05, + "loss": 132.6553, + "step": 45910 + }, + { + "epoch": 0.185522610568163, + "grad_norm": 928.2503662109375, + "learning_rate": 3.9152835037020013e-05, + "loss": 102.1466, + "step": 45920 + }, + { + "epoch": 0.18556301183353063, + "grad_norm": 998.4694213867188, + "learning_rate": 3.915203070793825e-05, + "loss": 112.1613, + "step": 45930 + }, + { + "epoch": 0.18560341309889825, + "grad_norm": 1041.3758544921875, + "learning_rate": 3.9151226005477786e-05, + "loss": 121.7452, + "step": 45940 + }, + { + "epoch": 0.18564381436426589, + "grad_norm": 1185.895751953125, + "learning_rate": 3.91504209296543e-05, + "loss": 148.8559, + "step": 45950 + }, + { + "epoch": 0.18568421562963353, + "grad_norm": 776.1190185546875, + "learning_rate": 3.914961548048351e-05, + "loss": 108.2814, + "step": 45960 + }, + { + "epoch": 0.18572461689500114, + "grad_norm": 905.7137451171875, + "learning_rate": 3.91488096579811e-05, + "loss": 109.8443, + "step": 45970 + }, + { + "epoch": 0.18576501816036878, + "grad_norm": 1604.76025390625, + "learning_rate": 3.914800346216279e-05, + "loss": 132.4436, + "step": 45980 + }, + { + "epoch": 0.18580541942573642, + "grad_norm": 439.4797058105469, + "learning_rate": 3.9147196893044305e-05, + "loss": 85.442, + "step": 45990 + }, + { + "epoch": 0.18584582069110406, + "grad_norm": 1252.072509765625, + "learning_rate": 3.9146389950641345e-05, + "loss": 86.7494, + "step": 46000 + }, + { + "epoch": 0.18588622195647167, + "grad_norm": 1445.6693115234375, + "learning_rate": 3.914558263496966e-05, + "loss": 93.1659, + "step": 46010 + }, + { + "epoch": 0.1859266232218393, + "grad_norm": 592.785400390625, + "learning_rate": 3.914477494604499e-05, + "loss": 63.2274, + "step": 46020 + }, + { + "epoch": 0.18596702448720695, + "grad_norm": 684.5814208984375, + "learning_rate": 3.914396688388308e-05, + "loss": 66.7492, + "step": 46030 + }, + { + "epoch": 0.18600742575257456, + "grad_norm": 694.837158203125, + "learning_rate": 3.914315844849966e-05, + "loss": 121.156, + "step": 46040 + }, + { + "epoch": 0.1860478270179422, + "grad_norm": 687.0386352539062, + "learning_rate": 3.914234963991053e-05, + "loss": 124.3705, + "step": 46050 + }, + { + "epoch": 0.18608822828330984, + "grad_norm": 643.592041015625, + "learning_rate": 3.914154045813143e-05, + "loss": 98.3161, + "step": 46060 + }, + { + "epoch": 0.18612862954867745, + "grad_norm": 565.8458251953125, + "learning_rate": 3.914073090317814e-05, + "loss": 147.7122, + "step": 46070 + }, + { + "epoch": 0.1861690308140451, + "grad_norm": 603.8526000976562, + "learning_rate": 3.913992097506645e-05, + "loss": 107.8672, + "step": 46080 + }, + { + "epoch": 0.18620943207941273, + "grad_norm": 1965.3399658203125, + "learning_rate": 3.9139110673812155e-05, + "loss": 132.9031, + "step": 46090 + }, + { + "epoch": 0.18624983334478035, + "grad_norm": 843.605224609375, + "learning_rate": 3.913829999943103e-05, + "loss": 103.8504, + "step": 46100 + }, + { + "epoch": 0.186290234610148, + "grad_norm": 1145.6500244140625, + "learning_rate": 3.91374889519389e-05, + "loss": 152.2288, + "step": 46110 + }, + { + "epoch": 0.18633063587551563, + "grad_norm": 1354.3662109375, + "learning_rate": 3.913667753135157e-05, + "loss": 130.6337, + "step": 46120 + }, + { + "epoch": 0.18637103714088324, + "grad_norm": 982.4403076171875, + "learning_rate": 3.913586573768486e-05, + "loss": 94.9781, + "step": 46130 + }, + { + "epoch": 0.18641143840625088, + "grad_norm": 765.2337646484375, + "learning_rate": 3.91350535709546e-05, + "loss": 96.5427, + "step": 46140 + }, + { + "epoch": 0.18645183967161852, + "grad_norm": 1147.355712890625, + "learning_rate": 3.9134241031176614e-05, + "loss": 98.0149, + "step": 46150 + }, + { + "epoch": 0.18649224093698616, + "grad_norm": 1172.125244140625, + "learning_rate": 3.9133428118366746e-05, + "loss": 109.064, + "step": 46160 + }, + { + "epoch": 0.18653264220235377, + "grad_norm": 3520.637939453125, + "learning_rate": 3.9132614832540854e-05, + "loss": 160.3167, + "step": 46170 + }, + { + "epoch": 0.1865730434677214, + "grad_norm": 636.5177612304688, + "learning_rate": 3.913180117371478e-05, + "loss": 90.1426, + "step": 46180 + }, + { + "epoch": 0.18661344473308905, + "grad_norm": 1442.9559326171875, + "learning_rate": 3.913098714190439e-05, + "loss": 108.3514, + "step": 46190 + }, + { + "epoch": 0.18665384599845666, + "grad_norm": 978.65478515625, + "learning_rate": 3.913017273712556e-05, + "loss": 123.6053, + "step": 46200 + }, + { + "epoch": 0.1866942472638243, + "grad_norm": 1030.4388427734375, + "learning_rate": 3.9129357959394166e-05, + "loss": 252.4063, + "step": 46210 + }, + { + "epoch": 0.18673464852919194, + "grad_norm": 689.920166015625, + "learning_rate": 3.9128542808726084e-05, + "loss": 80.3692, + "step": 46220 + }, + { + "epoch": 0.18677504979455956, + "grad_norm": 2157.83251953125, + "learning_rate": 3.912772728513722e-05, + "loss": 96.5918, + "step": 46230 + }, + { + "epoch": 0.1868154510599272, + "grad_norm": 3890.38037109375, + "learning_rate": 3.912691138864346e-05, + "loss": 133.9155, + "step": 46240 + }, + { + "epoch": 0.18685585232529484, + "grad_norm": 1876.4798583984375, + "learning_rate": 3.912609511926071e-05, + "loss": 132.3427, + "step": 46250 + }, + { + "epoch": 0.18689625359066245, + "grad_norm": 1149.63134765625, + "learning_rate": 3.912527847700489e-05, + "loss": 166.6669, + "step": 46260 + }, + { + "epoch": 0.1869366548560301, + "grad_norm": 768.5447387695312, + "learning_rate": 3.912446146189193e-05, + "loss": 112.3029, + "step": 46270 + }, + { + "epoch": 0.18697705612139773, + "grad_norm": 657.8525390625, + "learning_rate": 3.912364407393774e-05, + "loss": 94.0237, + "step": 46280 + }, + { + "epoch": 0.18701745738676534, + "grad_norm": 1057.7891845703125, + "learning_rate": 3.912282631315827e-05, + "loss": 106.0209, + "step": 46290 + }, + { + "epoch": 0.18705785865213298, + "grad_norm": 642.4913940429688, + "learning_rate": 3.912200817956945e-05, + "loss": 103.2481, + "step": 46300 + }, + { + "epoch": 0.18709825991750062, + "grad_norm": 980.9729614257812, + "learning_rate": 3.912118967318724e-05, + "loss": 117.7385, + "step": 46310 + }, + { + "epoch": 0.18713866118286826, + "grad_norm": 780.43115234375, + "learning_rate": 3.912037079402759e-05, + "loss": 78.7624, + "step": 46320 + }, + { + "epoch": 0.18717906244823587, + "grad_norm": 490.0324401855469, + "learning_rate": 3.911955154210646e-05, + "loss": 60.0442, + "step": 46330 + }, + { + "epoch": 0.1872194637136035, + "grad_norm": 393.7885437011719, + "learning_rate": 3.9118731917439844e-05, + "loss": 73.0317, + "step": 46340 + }, + { + "epoch": 0.18725986497897115, + "grad_norm": 1408.740966796875, + "learning_rate": 3.9117911920043695e-05, + "loss": 129.4143, + "step": 46350 + }, + { + "epoch": 0.18730026624433876, + "grad_norm": 806.8511962890625, + "learning_rate": 3.9117091549934014e-05, + "loss": 96.7331, + "step": 46360 + }, + { + "epoch": 0.1873406675097064, + "grad_norm": 1883.2796630859375, + "learning_rate": 3.9116270807126794e-05, + "loss": 108.8084, + "step": 46370 + }, + { + "epoch": 0.18738106877507404, + "grad_norm": 822.182373046875, + "learning_rate": 3.9115449691638027e-05, + "loss": 111.0568, + "step": 46380 + }, + { + "epoch": 0.18742147004044166, + "grad_norm": 861.093017578125, + "learning_rate": 3.911462820348374e-05, + "loss": 95.55, + "step": 46390 + }, + { + "epoch": 0.1874618713058093, + "grad_norm": 952.3106079101562, + "learning_rate": 3.9113806342679926e-05, + "loss": 99.0121, + "step": 46400 + }, + { + "epoch": 0.18750227257117694, + "grad_norm": 1434.952392578125, + "learning_rate": 3.911298410924262e-05, + "loss": 167.7147, + "step": 46410 + }, + { + "epoch": 0.18754267383654455, + "grad_norm": 513.6852416992188, + "learning_rate": 3.911216150318784e-05, + "loss": 106.6464, + "step": 46420 + }, + { + "epoch": 0.1875830751019122, + "grad_norm": 1250.233642578125, + "learning_rate": 3.911133852453164e-05, + "loss": 91.5242, + "step": 46430 + }, + { + "epoch": 0.18762347636727983, + "grad_norm": 1038.3704833984375, + "learning_rate": 3.911051517329005e-05, + "loss": 96.5715, + "step": 46440 + }, + { + "epoch": 0.18766387763264744, + "grad_norm": 667.5458374023438, + "learning_rate": 3.910969144947913e-05, + "loss": 123.3611, + "step": 46450 + }, + { + "epoch": 0.18770427889801508, + "grad_norm": 644.3773803710938, + "learning_rate": 3.910886735311494e-05, + "loss": 97.1455, + "step": 46460 + }, + { + "epoch": 0.18774468016338272, + "grad_norm": 429.63348388671875, + "learning_rate": 3.910804288421355e-05, + "loss": 142.8125, + "step": 46470 + }, + { + "epoch": 0.18778508142875036, + "grad_norm": 658.8795776367188, + "learning_rate": 3.910721804279101e-05, + "loss": 136.0824, + "step": 46480 + }, + { + "epoch": 0.18782548269411797, + "grad_norm": 502.06011962890625, + "learning_rate": 3.910639282886343e-05, + "loss": 97.003, + "step": 46490 + }, + { + "epoch": 0.1878658839594856, + "grad_norm": 789.5308837890625, + "learning_rate": 3.9105567242446874e-05, + "loss": 78.4425, + "step": 46500 + }, + { + "epoch": 0.18790628522485325, + "grad_norm": 576.5253295898438, + "learning_rate": 3.9104741283557456e-05, + "loss": 65.1268, + "step": 46510 + }, + { + "epoch": 0.18794668649022087, + "grad_norm": 832.866455078125, + "learning_rate": 3.910391495221127e-05, + "loss": 63.9773, + "step": 46520 + }, + { + "epoch": 0.1879870877555885, + "grad_norm": 525.132080078125, + "learning_rate": 3.9103088248424425e-05, + "loss": 126.2944, + "step": 46530 + }, + { + "epoch": 0.18802748902095615, + "grad_norm": 696.36181640625, + "learning_rate": 3.910226117221305e-05, + "loss": 83.011, + "step": 46540 + }, + { + "epoch": 0.18806789028632376, + "grad_norm": 612.4939575195312, + "learning_rate": 3.9101433723593245e-05, + "loss": 93.6564, + "step": 46550 + }, + { + "epoch": 0.1881082915516914, + "grad_norm": 715.0386352539062, + "learning_rate": 3.910060590258116e-05, + "loss": 90.012, + "step": 46560 + }, + { + "epoch": 0.18814869281705904, + "grad_norm": 1234.200439453125, + "learning_rate": 3.909977770919293e-05, + "loss": 80.2349, + "step": 46570 + }, + { + "epoch": 0.18818909408242665, + "grad_norm": 720.6617431640625, + "learning_rate": 3.9098949143444704e-05, + "loss": 196.6034, + "step": 46580 + }, + { + "epoch": 0.1882294953477943, + "grad_norm": 347.96905517578125, + "learning_rate": 3.909812020535262e-05, + "loss": 83.7383, + "step": 46590 + }, + { + "epoch": 0.18826989661316193, + "grad_norm": 2670.52587890625, + "learning_rate": 3.9097290894932866e-05, + "loss": 141.0266, + "step": 46600 + }, + { + "epoch": 0.18831029787852954, + "grad_norm": 583.7713623046875, + "learning_rate": 3.909646121220158e-05, + "loss": 133.2487, + "step": 46610 + }, + { + "epoch": 0.18835069914389718, + "grad_norm": 802.1284790039062, + "learning_rate": 3.9095631157174956e-05, + "loss": 71.9864, + "step": 46620 + }, + { + "epoch": 0.18839110040926482, + "grad_norm": 1095.15966796875, + "learning_rate": 3.909480072986918e-05, + "loss": 94.2537, + "step": 46630 + }, + { + "epoch": 0.18843150167463246, + "grad_norm": 472.41302490234375, + "learning_rate": 3.9093969930300424e-05, + "loss": 112.6919, + "step": 46640 + }, + { + "epoch": 0.18847190294000007, + "grad_norm": 1193.3759765625, + "learning_rate": 3.909313875848489e-05, + "loss": 114.1505, + "step": 46650 + }, + { + "epoch": 0.18851230420536771, + "grad_norm": 164.27403259277344, + "learning_rate": 3.9092307214438795e-05, + "loss": 92.1624, + "step": 46660 + }, + { + "epoch": 0.18855270547073535, + "grad_norm": 454.241455078125, + "learning_rate": 3.909147529817834e-05, + "loss": 99.4759, + "step": 46670 + }, + { + "epoch": 0.18859310673610297, + "grad_norm": 879.4342651367188, + "learning_rate": 3.909064300971975e-05, + "loss": 115.0247, + "step": 46680 + }, + { + "epoch": 0.1886335080014706, + "grad_norm": 871.2396850585938, + "learning_rate": 3.908981034907923e-05, + "loss": 79.1777, + "step": 46690 + }, + { + "epoch": 0.18867390926683825, + "grad_norm": 806.806640625, + "learning_rate": 3.9088977316273044e-05, + "loss": 86.2375, + "step": 46700 + }, + { + "epoch": 0.18871431053220586, + "grad_norm": 468.9644775390625, + "learning_rate": 3.908814391131741e-05, + "loss": 154.6513, + "step": 46710 + }, + { + "epoch": 0.1887547117975735, + "grad_norm": 883.056884765625, + "learning_rate": 3.908731013422859e-05, + "loss": 136.5812, + "step": 46720 + }, + { + "epoch": 0.18879511306294114, + "grad_norm": 1126.4501953125, + "learning_rate": 3.908647598502282e-05, + "loss": 101.9081, + "step": 46730 + }, + { + "epoch": 0.18883551432830875, + "grad_norm": 840.9667358398438, + "learning_rate": 3.908564146371639e-05, + "loss": 89.9149, + "step": 46740 + }, + { + "epoch": 0.1888759155936764, + "grad_norm": 2253.41943359375, + "learning_rate": 3.908480657032554e-05, + "loss": 101.5406, + "step": 46750 + }, + { + "epoch": 0.18891631685904403, + "grad_norm": 876.1803588867188, + "learning_rate": 3.9083971304866566e-05, + "loss": 82.987, + "step": 46760 + }, + { + "epoch": 0.18895671812441164, + "grad_norm": 904.62548828125, + "learning_rate": 3.908313566735575e-05, + "loss": 133.2801, + "step": 46770 + }, + { + "epoch": 0.18899711938977928, + "grad_norm": 2488.17041015625, + "learning_rate": 3.908229965780937e-05, + "loss": 155.3012, + "step": 46780 + }, + { + "epoch": 0.18903752065514692, + "grad_norm": 984.2618408203125, + "learning_rate": 3.908146327624374e-05, + "loss": 121.7915, + "step": 46790 + }, + { + "epoch": 0.18907792192051456, + "grad_norm": 875.5440673828125, + "learning_rate": 3.908062652267516e-05, + "loss": 83.577, + "step": 46800 + }, + { + "epoch": 0.18911832318588218, + "grad_norm": 2193.131591796875, + "learning_rate": 3.907978939711995e-05, + "loss": 107.2632, + "step": 46810 + }, + { + "epoch": 0.18915872445124982, + "grad_norm": 779.9102172851562, + "learning_rate": 3.907895189959441e-05, + "loss": 121.2211, + "step": 46820 + }, + { + "epoch": 0.18919912571661746, + "grad_norm": 590.3921508789062, + "learning_rate": 3.907811403011488e-05, + "loss": 102.3358, + "step": 46830 + }, + { + "epoch": 0.18923952698198507, + "grad_norm": 915.5516357421875, + "learning_rate": 3.9077275788697704e-05, + "loss": 124.5061, + "step": 46840 + }, + { + "epoch": 0.1892799282473527, + "grad_norm": 335.1733093261719, + "learning_rate": 3.907643717535921e-05, + "loss": 113.3554, + "step": 46850 + }, + { + "epoch": 0.18932032951272035, + "grad_norm": 1945.07568359375, + "learning_rate": 3.9075598190115756e-05, + "loss": 88.2688, + "step": 46860 + }, + { + "epoch": 0.18936073077808796, + "grad_norm": 1386.7379150390625, + "learning_rate": 3.9074758832983685e-05, + "loss": 147.3337, + "step": 46870 + }, + { + "epoch": 0.1894011320434556, + "grad_norm": 1345.5042724609375, + "learning_rate": 3.9073919103979383e-05, + "loss": 106.5779, + "step": 46880 + }, + { + "epoch": 0.18944153330882324, + "grad_norm": 763.0669555664062, + "learning_rate": 3.90730790031192e-05, + "loss": 116.0768, + "step": 46890 + }, + { + "epoch": 0.18948193457419085, + "grad_norm": 416.8092956542969, + "learning_rate": 3.9072238530419525e-05, + "loss": 114.4026, + "step": 46900 + }, + { + "epoch": 0.1895223358395585, + "grad_norm": 555.8541259765625, + "learning_rate": 3.907139768589674e-05, + "loss": 93.7598, + "step": 46910 + }, + { + "epoch": 0.18956273710492613, + "grad_norm": 1058.077880859375, + "learning_rate": 3.907055646956724e-05, + "loss": 102.9807, + "step": 46920 + }, + { + "epoch": 0.18960313837029374, + "grad_norm": 606.0723876953125, + "learning_rate": 3.9069714881447416e-05, + "loss": 95.3127, + "step": 46930 + }, + { + "epoch": 0.18964353963566138, + "grad_norm": 891.5465698242188, + "learning_rate": 3.906887292155369e-05, + "loss": 118.5351, + "step": 46940 + }, + { + "epoch": 0.18968394090102902, + "grad_norm": 880.2471313476562, + "learning_rate": 3.906803058990247e-05, + "loss": 109.6543, + "step": 46950 + }, + { + "epoch": 0.18972434216639666, + "grad_norm": 996.2352294921875, + "learning_rate": 3.9067187886510173e-05, + "loss": 99.1289, + "step": 46960 + }, + { + "epoch": 0.18976474343176428, + "grad_norm": 517.0300903320312, + "learning_rate": 3.9066344811393226e-05, + "loss": 80.6816, + "step": 46970 + }, + { + "epoch": 0.18980514469713192, + "grad_norm": 933.3284301757812, + "learning_rate": 3.906550136456808e-05, + "loss": 91.4371, + "step": 46980 + }, + { + "epoch": 0.18984554596249956, + "grad_norm": 1253.6434326171875, + "learning_rate": 3.9064657546051175e-05, + "loss": 98.3493, + "step": 46990 + }, + { + "epoch": 0.18988594722786717, + "grad_norm": 1040.4783935546875, + "learning_rate": 3.9063813355858944e-05, + "loss": 118.8392, + "step": 47000 + }, + { + "epoch": 0.1899263484932348, + "grad_norm": 1197.19921875, + "learning_rate": 3.906296879400786e-05, + "loss": 124.5089, + "step": 47010 + }, + { + "epoch": 0.18996674975860245, + "grad_norm": 2596.039794921875, + "learning_rate": 3.906212386051439e-05, + "loss": 105.6103, + "step": 47020 + }, + { + "epoch": 0.19000715102397006, + "grad_norm": 291.2439880371094, + "learning_rate": 3.9061278555394995e-05, + "loss": 74.313, + "step": 47030 + }, + { + "epoch": 0.1900475522893377, + "grad_norm": 686.6499633789062, + "learning_rate": 3.906043287866617e-05, + "loss": 115.2689, + "step": 47040 + }, + { + "epoch": 0.19008795355470534, + "grad_norm": 760.739013671875, + "learning_rate": 3.905958683034438e-05, + "loss": 94.2875, + "step": 47050 + }, + { + "epoch": 0.19012835482007295, + "grad_norm": 407.802490234375, + "learning_rate": 3.905874041044614e-05, + "loss": 99.9216, + "step": 47060 + }, + { + "epoch": 0.1901687560854406, + "grad_norm": 1711.402099609375, + "learning_rate": 3.9057893618987946e-05, + "loss": 103.528, + "step": 47070 + }, + { + "epoch": 0.19020915735080823, + "grad_norm": 548.1200561523438, + "learning_rate": 3.90570464559863e-05, + "loss": 87.3228, + "step": 47080 + }, + { + "epoch": 0.19024955861617585, + "grad_norm": 610.97265625, + "learning_rate": 3.905619892145772e-05, + "loss": 95.1298, + "step": 47090 + }, + { + "epoch": 0.19028995988154349, + "grad_norm": 457.3301086425781, + "learning_rate": 3.9055351015418735e-05, + "loss": 106.8903, + "step": 47100 + }, + { + "epoch": 0.19033036114691113, + "grad_norm": 1357.89990234375, + "learning_rate": 3.905450273788587e-05, + "loss": 87.2958, + "step": 47110 + }, + { + "epoch": 0.19037076241227877, + "grad_norm": 953.5557861328125, + "learning_rate": 3.9053654088875667e-05, + "loss": 107.9062, + "step": 47120 + }, + { + "epoch": 0.19041116367764638, + "grad_norm": 1156.779296875, + "learning_rate": 3.905280506840466e-05, + "loss": 154.6784, + "step": 47130 + }, + { + "epoch": 0.19045156494301402, + "grad_norm": 601.1533203125, + "learning_rate": 3.905195567648942e-05, + "loss": 96.1751, + "step": 47140 + }, + { + "epoch": 0.19049196620838166, + "grad_norm": 746.859619140625, + "learning_rate": 3.905110591314649e-05, + "loss": 118.4346, + "step": 47150 + }, + { + "epoch": 0.19053236747374927, + "grad_norm": 526.612548828125, + "learning_rate": 3.905025577839244e-05, + "loss": 128.7738, + "step": 47160 + }, + { + "epoch": 0.1905727687391169, + "grad_norm": 743.1995239257812, + "learning_rate": 3.904940527224385e-05, + "loss": 72.3215, + "step": 47170 + }, + { + "epoch": 0.19061317000448455, + "grad_norm": 569.5060424804688, + "learning_rate": 3.90485543947173e-05, + "loss": 122.9496, + "step": 47180 + }, + { + "epoch": 0.19065357126985216, + "grad_norm": 802.4224853515625, + "learning_rate": 3.904770314582937e-05, + "loss": 100.5065, + "step": 47190 + }, + { + "epoch": 0.1906939725352198, + "grad_norm": 926.613037109375, + "learning_rate": 3.9046851525596656e-05, + "loss": 88.1017, + "step": 47200 + }, + { + "epoch": 0.19073437380058744, + "grad_norm": 902.0261840820312, + "learning_rate": 3.904599953403577e-05, + "loss": 155.1511, + "step": 47210 + }, + { + "epoch": 0.19077477506595505, + "grad_norm": 656.165771484375, + "learning_rate": 3.904514717116332e-05, + "loss": 110.1571, + "step": 47220 + }, + { + "epoch": 0.1908151763313227, + "grad_norm": 690.4620971679688, + "learning_rate": 3.9044294436995924e-05, + "loss": 125.7411, + "step": 47230 + }, + { + "epoch": 0.19085557759669033, + "grad_norm": 1137.6370849609375, + "learning_rate": 3.9043441331550195e-05, + "loss": 141.262, + "step": 47240 + }, + { + "epoch": 0.19089597886205795, + "grad_norm": 1214.5484619140625, + "learning_rate": 3.9042587854842776e-05, + "loss": 101.5303, + "step": 47250 + }, + { + "epoch": 0.1909363801274256, + "grad_norm": 724.4214477539062, + "learning_rate": 3.9041734006890304e-05, + "loss": 71.886, + "step": 47260 + }, + { + "epoch": 0.19097678139279323, + "grad_norm": 843.385009765625, + "learning_rate": 3.9040879787709426e-05, + "loss": 140.1131, + "step": 47270 + }, + { + "epoch": 0.19101718265816087, + "grad_norm": 542.4107055664062, + "learning_rate": 3.904002519731679e-05, + "loss": 88.5582, + "step": 47280 + }, + { + "epoch": 0.19105758392352848, + "grad_norm": 960.9347534179688, + "learning_rate": 3.903917023572907e-05, + "loss": 120.371, + "step": 47290 + }, + { + "epoch": 0.19109798518889612, + "grad_norm": 779.9694213867188, + "learning_rate": 3.9038314902962906e-05, + "loss": 91.0707, + "step": 47300 + }, + { + "epoch": 0.19113838645426376, + "grad_norm": 740.7415771484375, + "learning_rate": 3.9037459199035004e-05, + "loss": 111.5646, + "step": 47310 + }, + { + "epoch": 0.19117878771963137, + "grad_norm": 559.0507202148438, + "learning_rate": 3.903660312396203e-05, + "loss": 87.232, + "step": 47320 + }, + { + "epoch": 0.191219188984999, + "grad_norm": 943.8803100585938, + "learning_rate": 3.903574667776068e-05, + "loss": 78.608, + "step": 47330 + }, + { + "epoch": 0.19125959025036665, + "grad_norm": 1007.5565185546875, + "learning_rate": 3.903488986044765e-05, + "loss": 96.0088, + "step": 47340 + }, + { + "epoch": 0.19129999151573426, + "grad_norm": 1069.4013671875, + "learning_rate": 3.9034032672039634e-05, + "loss": 107.2876, + "step": 47350 + }, + { + "epoch": 0.1913403927811019, + "grad_norm": 856.0144653320312, + "learning_rate": 3.9033175112553354e-05, + "loss": 123.2435, + "step": 47360 + }, + { + "epoch": 0.19138079404646954, + "grad_norm": 547.5615234375, + "learning_rate": 3.9032317182005536e-05, + "loss": 93.7633, + "step": 47370 + }, + { + "epoch": 0.19142119531183716, + "grad_norm": 620.7142944335938, + "learning_rate": 3.903145888041289e-05, + "loss": 107.7574, + "step": 47380 + }, + { + "epoch": 0.1914615965772048, + "grad_norm": 736.723388671875, + "learning_rate": 3.903060020779215e-05, + "loss": 109.2038, + "step": 47390 + }, + { + "epoch": 0.19150199784257244, + "grad_norm": 594.0596313476562, + "learning_rate": 3.902974116416006e-05, + "loss": 95.9783, + "step": 47400 + }, + { + "epoch": 0.19154239910794005, + "grad_norm": 476.41986083984375, + "learning_rate": 3.902888174953338e-05, + "loss": 98.159, + "step": 47410 + }, + { + "epoch": 0.1915828003733077, + "grad_norm": 877.4638061523438, + "learning_rate": 3.902802196392885e-05, + "loss": 106.6224, + "step": 47420 + }, + { + "epoch": 0.19162320163867533, + "grad_norm": 1783.840087890625, + "learning_rate": 3.902716180736323e-05, + "loss": 110.1414, + "step": 47430 + }, + { + "epoch": 0.19166360290404297, + "grad_norm": 620.6277465820312, + "learning_rate": 3.90263012798533e-05, + "loss": 96.7933, + "step": 47440 + }, + { + "epoch": 0.19170400416941058, + "grad_norm": 1432.2301025390625, + "learning_rate": 3.902544038141583e-05, + "loss": 126.9916, + "step": 47450 + }, + { + "epoch": 0.19174440543477822, + "grad_norm": 584.9290161132812, + "learning_rate": 3.9024579112067604e-05, + "loss": 70.8887, + "step": 47460 + }, + { + "epoch": 0.19178480670014586, + "grad_norm": 1275.022705078125, + "learning_rate": 3.9023717471825416e-05, + "loss": 135.8612, + "step": 47470 + }, + { + "epoch": 0.19182520796551347, + "grad_norm": 677.3323364257812, + "learning_rate": 3.902285546070606e-05, + "loss": 131.6395, + "step": 47480 + }, + { + "epoch": 0.1918656092308811, + "grad_norm": 1256.497802734375, + "learning_rate": 3.902199307872634e-05, + "loss": 136.0821, + "step": 47490 + }, + { + "epoch": 0.19190601049624875, + "grad_norm": 503.8961486816406, + "learning_rate": 3.9021130325903076e-05, + "loss": 83.2521, + "step": 47500 + }, + { + "epoch": 0.19194641176161636, + "grad_norm": 983.4738159179688, + "learning_rate": 3.902026720225308e-05, + "loss": 153.5143, + "step": 47510 + }, + { + "epoch": 0.191986813026984, + "grad_norm": 625.4432983398438, + "learning_rate": 3.901940370779318e-05, + "loss": 97.7986, + "step": 47520 + }, + { + "epoch": 0.19202721429235164, + "grad_norm": 819.6492919921875, + "learning_rate": 3.901853984254023e-05, + "loss": 121.4806, + "step": 47530 + }, + { + "epoch": 0.19206761555771926, + "grad_norm": 482.64019775390625, + "learning_rate": 3.901767560651104e-05, + "loss": 119.9756, + "step": 47540 + }, + { + "epoch": 0.1921080168230869, + "grad_norm": 579.32080078125, + "learning_rate": 3.901681099972247e-05, + "loss": 88.8558, + "step": 47550 + }, + { + "epoch": 0.19214841808845454, + "grad_norm": 446.040283203125, + "learning_rate": 3.901594602219138e-05, + "loss": 99.5406, + "step": 47560 + }, + { + "epoch": 0.19218881935382215, + "grad_norm": 1592.5213623046875, + "learning_rate": 3.901508067393464e-05, + "loss": 96.5574, + "step": 47570 + }, + { + "epoch": 0.1922292206191898, + "grad_norm": 1247.53466796875, + "learning_rate": 3.901421495496911e-05, + "loss": 106.3904, + "step": 47580 + }, + { + "epoch": 0.19226962188455743, + "grad_norm": 1081.18310546875, + "learning_rate": 3.901334886531167e-05, + "loss": 112.1749, + "step": 47590 + }, + { + "epoch": 0.19231002314992507, + "grad_norm": 403.4075012207031, + "learning_rate": 3.90124824049792e-05, + "loss": 119.8106, + "step": 47600 + }, + { + "epoch": 0.19235042441529268, + "grad_norm": 1389.2232666015625, + "learning_rate": 3.901161557398861e-05, + "loss": 143.9282, + "step": 47610 + }, + { + "epoch": 0.19239082568066032, + "grad_norm": 809.2152099609375, + "learning_rate": 3.901074837235677e-05, + "loss": 129.0526, + "step": 47620 + }, + { + "epoch": 0.19243122694602796, + "grad_norm": 861.3272094726562, + "learning_rate": 3.900988080010061e-05, + "loss": 128.3992, + "step": 47630 + }, + { + "epoch": 0.19247162821139557, + "grad_norm": 1084.995849609375, + "learning_rate": 3.900901285723704e-05, + "loss": 60.623, + "step": 47640 + }, + { + "epoch": 0.1925120294767632, + "grad_norm": 3156.18310546875, + "learning_rate": 3.900814454378297e-05, + "loss": 120.0533, + "step": 47650 + }, + { + "epoch": 0.19255243074213085, + "grad_norm": 1573.1180419921875, + "learning_rate": 3.900727585975535e-05, + "loss": 122.9062, + "step": 47660 + }, + { + "epoch": 0.19259283200749847, + "grad_norm": 1000.0886840820312, + "learning_rate": 3.900640680517109e-05, + "loss": 82.2484, + "step": 47670 + }, + { + "epoch": 0.1926332332728661, + "grad_norm": 592.7572021484375, + "learning_rate": 3.900553738004715e-05, + "loss": 93.7149, + "step": 47680 + }, + { + "epoch": 0.19267363453823375, + "grad_norm": 506.865478515625, + "learning_rate": 3.9004667584400474e-05, + "loss": 106.1138, + "step": 47690 + }, + { + "epoch": 0.19271403580360136, + "grad_norm": 2047.713623046875, + "learning_rate": 3.900379741824802e-05, + "loss": 138.9009, + "step": 47700 + }, + { + "epoch": 0.192754437068969, + "grad_norm": 1223.90966796875, + "learning_rate": 3.900292688160674e-05, + "loss": 85.75, + "step": 47710 + }, + { + "epoch": 0.19279483833433664, + "grad_norm": 859.6675415039062, + "learning_rate": 3.9002055974493626e-05, + "loss": 78.0723, + "step": 47720 + }, + { + "epoch": 0.19283523959970425, + "grad_norm": 811.9254150390625, + "learning_rate": 3.900118469692565e-05, + "loss": 157.9077, + "step": 47730 + }, + { + "epoch": 0.1928756408650719, + "grad_norm": 545.8783569335938, + "learning_rate": 3.90003130489198e-05, + "loss": 99.3891, + "step": 47740 + }, + { + "epoch": 0.19291604213043953, + "grad_norm": 1080.083984375, + "learning_rate": 3.899944103049305e-05, + "loss": 113.9532, + "step": 47750 + }, + { + "epoch": 0.19295644339580717, + "grad_norm": 793.1613159179688, + "learning_rate": 3.8998568641662425e-05, + "loss": 97.7357, + "step": 47760 + }, + { + "epoch": 0.19299684466117478, + "grad_norm": 322.8968200683594, + "learning_rate": 3.899769588244493e-05, + "loss": 106.2711, + "step": 47770 + }, + { + "epoch": 0.19303724592654242, + "grad_norm": 2364.1064453125, + "learning_rate": 3.899682275285756e-05, + "loss": 96.7341, + "step": 47780 + }, + { + "epoch": 0.19307764719191006, + "grad_norm": 540.1163940429688, + "learning_rate": 3.8995949252917354e-05, + "loss": 101.0279, + "step": 47790 + }, + { + "epoch": 0.19311804845727767, + "grad_norm": 730.4757690429688, + "learning_rate": 3.899507538264134e-05, + "loss": 82.724, + "step": 47800 + }, + { + "epoch": 0.19315844972264531, + "grad_norm": 842.0224609375, + "learning_rate": 3.899420114204655e-05, + "loss": 124.4751, + "step": 47810 + }, + { + "epoch": 0.19319885098801295, + "grad_norm": 1032.0927734375, + "learning_rate": 3.899332653115004e-05, + "loss": 67.0296, + "step": 47820 + }, + { + "epoch": 0.19323925225338057, + "grad_norm": 602.0194091796875, + "learning_rate": 3.899245154996884e-05, + "loss": 91.8092, + "step": 47830 + }, + { + "epoch": 0.1932796535187482, + "grad_norm": 565.9478759765625, + "learning_rate": 3.8991576198520015e-05, + "loss": 183.2019, + "step": 47840 + }, + { + "epoch": 0.19332005478411585, + "grad_norm": 962.8865966796875, + "learning_rate": 3.899070047682064e-05, + "loss": 118.0625, + "step": 47850 + }, + { + "epoch": 0.19336045604948346, + "grad_norm": 1416.96533203125, + "learning_rate": 3.898982438488778e-05, + "loss": 127.1912, + "step": 47860 + }, + { + "epoch": 0.1934008573148511, + "grad_norm": 586.7955322265625, + "learning_rate": 3.898894792273853e-05, + "loss": 114.4021, + "step": 47870 + }, + { + "epoch": 0.19344125858021874, + "grad_norm": 748.2214965820312, + "learning_rate": 3.898807109038995e-05, + "loss": 92.9319, + "step": 47880 + }, + { + "epoch": 0.19348165984558635, + "grad_norm": 929.320556640625, + "learning_rate": 3.898719388785915e-05, + "loss": 99.2743, + "step": 47890 + }, + { + "epoch": 0.193522061110954, + "grad_norm": 2149.06494140625, + "learning_rate": 3.898631631516323e-05, + "loss": 113.0213, + "step": 47900 + }, + { + "epoch": 0.19356246237632163, + "grad_norm": 754.402099609375, + "learning_rate": 3.89854383723193e-05, + "loss": 144.1465, + "step": 47910 + }, + { + "epoch": 0.19360286364168927, + "grad_norm": 1014.666015625, + "learning_rate": 3.898456005934447e-05, + "loss": 93.0868, + "step": 47920 + }, + { + "epoch": 0.19364326490705688, + "grad_norm": 1310.1051025390625, + "learning_rate": 3.8983681376255876e-05, + "loss": 158.4007, + "step": 47930 + }, + { + "epoch": 0.19368366617242452, + "grad_norm": 821.0430297851562, + "learning_rate": 3.898280232307064e-05, + "loss": 132.6509, + "step": 47940 + }, + { + "epoch": 0.19372406743779216, + "grad_norm": 3745.5439453125, + "learning_rate": 3.898192289980589e-05, + "loss": 125.3092, + "step": 47950 + }, + { + "epoch": 0.19376446870315978, + "grad_norm": 937.6454467773438, + "learning_rate": 3.898104310647879e-05, + "loss": 107.8471, + "step": 47960 + }, + { + "epoch": 0.19380486996852742, + "grad_norm": 299.5453186035156, + "learning_rate": 3.8980162943106486e-05, + "loss": 82.5901, + "step": 47970 + }, + { + "epoch": 0.19384527123389506, + "grad_norm": 1173.594482421875, + "learning_rate": 3.897928240970612e-05, + "loss": 111.3734, + "step": 47980 + }, + { + "epoch": 0.19388567249926267, + "grad_norm": 1305.3458251953125, + "learning_rate": 3.8978401506294885e-05, + "loss": 133.9361, + "step": 47990 + }, + { + "epoch": 0.1939260737646303, + "grad_norm": 840.4127197265625, + "learning_rate": 3.897752023288993e-05, + "loss": 160.2798, + "step": 48000 + }, + { + "epoch": 0.19396647502999795, + "grad_norm": 816.8165283203125, + "learning_rate": 3.8976638589508456e-05, + "loss": 68.6603, + "step": 48010 + }, + { + "epoch": 0.19400687629536556, + "grad_norm": 746.508056640625, + "learning_rate": 3.897575657616764e-05, + "loss": 97.5326, + "step": 48020 + }, + { + "epoch": 0.1940472775607332, + "grad_norm": 5892.21728515625, + "learning_rate": 3.897487419288468e-05, + "loss": 172.2627, + "step": 48030 + }, + { + "epoch": 0.19408767882610084, + "grad_norm": 2212.56396484375, + "learning_rate": 3.8973991439676783e-05, + "loss": 112.301, + "step": 48040 + }, + { + "epoch": 0.19412808009146845, + "grad_norm": 638.8115234375, + "learning_rate": 3.897310831656115e-05, + "loss": 85.8397, + "step": 48050 + }, + { + "epoch": 0.1941684813568361, + "grad_norm": 671.4449462890625, + "learning_rate": 3.897222482355501e-05, + "loss": 126.9506, + "step": 48060 + }, + { + "epoch": 0.19420888262220373, + "grad_norm": 549.748291015625, + "learning_rate": 3.897134096067557e-05, + "loss": 120.4295, + "step": 48070 + }, + { + "epoch": 0.19424928388757137, + "grad_norm": 506.0268859863281, + "learning_rate": 3.8970456727940076e-05, + "loss": 132.9972, + "step": 48080 + }, + { + "epoch": 0.19428968515293898, + "grad_norm": 547.9598388671875, + "learning_rate": 3.896957212536576e-05, + "loss": 108.1358, + "step": 48090 + }, + { + "epoch": 0.19433008641830662, + "grad_norm": 1059.8402099609375, + "learning_rate": 3.896868715296987e-05, + "loss": 136.6264, + "step": 48100 + }, + { + "epoch": 0.19437048768367426, + "grad_norm": 1440.77197265625, + "learning_rate": 3.896780181076966e-05, + "loss": 71.8482, + "step": 48110 + }, + { + "epoch": 0.19441088894904188, + "grad_norm": 1696.9234619140625, + "learning_rate": 3.8966916098782377e-05, + "loss": 103.9423, + "step": 48120 + }, + { + "epoch": 0.19445129021440952, + "grad_norm": 746.289794921875, + "learning_rate": 3.896603001702531e-05, + "loss": 89.491, + "step": 48130 + }, + { + "epoch": 0.19449169147977716, + "grad_norm": 980.9140625, + "learning_rate": 3.896514356551572e-05, + "loss": 126.0233, + "step": 48140 + }, + { + "epoch": 0.19453209274514477, + "grad_norm": 2855.701171875, + "learning_rate": 3.896425674427089e-05, + "loss": 87.5288, + "step": 48150 + }, + { + "epoch": 0.1945724940105124, + "grad_norm": 831.6000366210938, + "learning_rate": 3.896336955330812e-05, + "loss": 146.4545, + "step": 48160 + }, + { + "epoch": 0.19461289527588005, + "grad_norm": 2698.1435546875, + "learning_rate": 3.8962481992644694e-05, + "loss": 145.5768, + "step": 48170 + }, + { + "epoch": 0.19465329654124766, + "grad_norm": 915.8610229492188, + "learning_rate": 3.896159406229792e-05, + "loss": 88.6772, + "step": 48180 + }, + { + "epoch": 0.1946936978066153, + "grad_norm": 540.65087890625, + "learning_rate": 3.896070576228511e-05, + "loss": 98.2031, + "step": 48190 + }, + { + "epoch": 0.19473409907198294, + "grad_norm": 1191.4637451171875, + "learning_rate": 3.895981709262357e-05, + "loss": 82.2846, + "step": 48200 + }, + { + "epoch": 0.19477450033735055, + "grad_norm": 1605.32666015625, + "learning_rate": 3.895892805333064e-05, + "loss": 115.3226, + "step": 48210 + }, + { + "epoch": 0.1948149016027182, + "grad_norm": 1233.2899169921875, + "learning_rate": 3.895803864442365e-05, + "loss": 126.1156, + "step": 48220 + }, + { + "epoch": 0.19485530286808583, + "grad_norm": 300.4566650390625, + "learning_rate": 3.895714886591993e-05, + "loss": 109.8741, + "step": 48230 + }, + { + "epoch": 0.19489570413345347, + "grad_norm": 438.7498474121094, + "learning_rate": 3.895625871783684e-05, + "loss": 79.6374, + "step": 48240 + }, + { + "epoch": 0.19493610539882109, + "grad_norm": 695.031982421875, + "learning_rate": 3.895536820019172e-05, + "loss": 108.2644, + "step": 48250 + }, + { + "epoch": 0.19497650666418873, + "grad_norm": 1072.329345703125, + "learning_rate": 3.895447731300194e-05, + "loss": 84.6048, + "step": 48260 + }, + { + "epoch": 0.19501690792955637, + "grad_norm": 520.2815551757812, + "learning_rate": 3.8953586056284866e-05, + "loss": 67.4643, + "step": 48270 + }, + { + "epoch": 0.19505730919492398, + "grad_norm": 1035.45068359375, + "learning_rate": 3.895269443005788e-05, + "loss": 87.6268, + "step": 48280 + }, + { + "epoch": 0.19509771046029162, + "grad_norm": 1105.692626953125, + "learning_rate": 3.895180243433835e-05, + "loss": 108.2238, + "step": 48290 + }, + { + "epoch": 0.19513811172565926, + "grad_norm": 908.1073608398438, + "learning_rate": 3.895091006914368e-05, + "loss": 95.2036, + "step": 48300 + }, + { + "epoch": 0.19517851299102687, + "grad_norm": 1347.7313232421875, + "learning_rate": 3.8950017334491256e-05, + "loss": 128.3842, + "step": 48310 + }, + { + "epoch": 0.1952189142563945, + "grad_norm": 776.2427368164062, + "learning_rate": 3.894912423039849e-05, + "loss": 143.7808, + "step": 48320 + }, + { + "epoch": 0.19525931552176215, + "grad_norm": 588.1817016601562, + "learning_rate": 3.89482307568828e-05, + "loss": 105.036, + "step": 48330 + }, + { + "epoch": 0.19529971678712976, + "grad_norm": 2896.539794921875, + "learning_rate": 3.894733691396159e-05, + "loss": 89.6328, + "step": 48340 + }, + { + "epoch": 0.1953401180524974, + "grad_norm": 1050.2783203125, + "learning_rate": 3.894644270165229e-05, + "loss": 148.1094, + "step": 48350 + }, + { + "epoch": 0.19538051931786504, + "grad_norm": 986.3662719726562, + "learning_rate": 3.894554811997234e-05, + "loss": 89.1415, + "step": 48360 + }, + { + "epoch": 0.19542092058323265, + "grad_norm": 894.5977783203125, + "learning_rate": 3.894465316893917e-05, + "loss": 112.6819, + "step": 48370 + }, + { + "epoch": 0.1954613218486003, + "grad_norm": 929.5335693359375, + "learning_rate": 3.894375784857023e-05, + "loss": 126.2155, + "step": 48380 + }, + { + "epoch": 0.19550172311396793, + "grad_norm": 1690.3751220703125, + "learning_rate": 3.894286215888299e-05, + "loss": 126.3998, + "step": 48390 + }, + { + "epoch": 0.19554212437933557, + "grad_norm": 3643.14111328125, + "learning_rate": 3.894196609989489e-05, + "loss": 100.7497, + "step": 48400 + }, + { + "epoch": 0.1955825256447032, + "grad_norm": 1396.2137451171875, + "learning_rate": 3.894106967162341e-05, + "loss": 103.0135, + "step": 48410 + }, + { + "epoch": 0.19562292691007083, + "grad_norm": 1417.135498046875, + "learning_rate": 3.894017287408603e-05, + "loss": 99.1023, + "step": 48420 + }, + { + "epoch": 0.19566332817543847, + "grad_norm": 1084.1612548828125, + "learning_rate": 3.8939275707300226e-05, + "loss": 104.7564, + "step": 48430 + }, + { + "epoch": 0.19570372944080608, + "grad_norm": 978.8095092773438, + "learning_rate": 3.8938378171283485e-05, + "loss": 86.2037, + "step": 48440 + }, + { + "epoch": 0.19574413070617372, + "grad_norm": 989.3792114257812, + "learning_rate": 3.893748026605332e-05, + "loss": 83.5012, + "step": 48450 + }, + { + "epoch": 0.19578453197154136, + "grad_norm": 1319.920654296875, + "learning_rate": 3.893658199162723e-05, + "loss": 132.7873, + "step": 48460 + }, + { + "epoch": 0.19582493323690897, + "grad_norm": 2057.632568359375, + "learning_rate": 3.893568334802272e-05, + "loss": 140.3004, + "step": 48470 + }, + { + "epoch": 0.1958653345022766, + "grad_norm": 748.1298217773438, + "learning_rate": 3.893478433525731e-05, + "loss": 108.0157, + "step": 48480 + }, + { + "epoch": 0.19590573576764425, + "grad_norm": 1370.6361083984375, + "learning_rate": 3.8933884953348545e-05, + "loss": 112.1976, + "step": 48490 + }, + { + "epoch": 0.19594613703301186, + "grad_norm": 734.4710693359375, + "learning_rate": 3.893298520231393e-05, + "loss": 119.2809, + "step": 48500 + }, + { + "epoch": 0.1959865382983795, + "grad_norm": 2198.250244140625, + "learning_rate": 3.893208508217103e-05, + "loss": 127.122, + "step": 48510 + }, + { + "epoch": 0.19602693956374714, + "grad_norm": 693.5650634765625, + "learning_rate": 3.893118459293738e-05, + "loss": 94.3413, + "step": 48520 + }, + { + "epoch": 0.19606734082911476, + "grad_norm": 773.6821899414062, + "learning_rate": 3.893028373463054e-05, + "loss": 78.6874, + "step": 48530 + }, + { + "epoch": 0.1961077420944824, + "grad_norm": 884.8726196289062, + "learning_rate": 3.8929382507268074e-05, + "loss": 95.2842, + "step": 48540 + }, + { + "epoch": 0.19614814335985004, + "grad_norm": 596.2680053710938, + "learning_rate": 3.892848091086755e-05, + "loss": 120.6791, + "step": 48550 + }, + { + "epoch": 0.19618854462521768, + "grad_norm": 700.5574951171875, + "learning_rate": 3.8927578945446544e-05, + "loss": 101.2379, + "step": 48560 + }, + { + "epoch": 0.1962289458905853, + "grad_norm": 414.36065673828125, + "learning_rate": 3.892667661102265e-05, + "loss": 53.7003, + "step": 48570 + }, + { + "epoch": 0.19626934715595293, + "grad_norm": 405.298583984375, + "learning_rate": 3.892577390761344e-05, + "loss": 134.3708, + "step": 48580 + }, + { + "epoch": 0.19630974842132057, + "grad_norm": 1853.450927734375, + "learning_rate": 3.892487083523653e-05, + "loss": 174.1038, + "step": 48590 + }, + { + "epoch": 0.19635014968668818, + "grad_norm": 448.18304443359375, + "learning_rate": 3.892396739390952e-05, + "loss": 95.5795, + "step": 48600 + }, + { + "epoch": 0.19639055095205582, + "grad_norm": 1388.155029296875, + "learning_rate": 3.892306358365002e-05, + "loss": 132.2949, + "step": 48610 + }, + { + "epoch": 0.19643095221742346, + "grad_norm": 2661.12841796875, + "learning_rate": 3.8922159404475653e-05, + "loss": 91.1705, + "step": 48620 + }, + { + "epoch": 0.19647135348279107, + "grad_norm": 730.8650512695312, + "learning_rate": 3.8921254856404056e-05, + "loss": 118.3011, + "step": 48630 + }, + { + "epoch": 0.1965117547481587, + "grad_norm": 567.5228271484375, + "learning_rate": 3.892034993945284e-05, + "loss": 117.8787, + "step": 48640 + }, + { + "epoch": 0.19655215601352635, + "grad_norm": 1031.958984375, + "learning_rate": 3.891944465363968e-05, + "loss": 89.2363, + "step": 48650 + }, + { + "epoch": 0.19659255727889396, + "grad_norm": 989.8963012695312, + "learning_rate": 3.891853899898219e-05, + "loss": 133.3197, + "step": 48660 + }, + { + "epoch": 0.1966329585442616, + "grad_norm": 1048.5263671875, + "learning_rate": 3.8917632975498046e-05, + "loss": 81.6837, + "step": 48670 + }, + { + "epoch": 0.19667335980962924, + "grad_norm": 1031.2650146484375, + "learning_rate": 3.891672658320491e-05, + "loss": 62.9335, + "step": 48680 + }, + { + "epoch": 0.19671376107499686, + "grad_norm": 637.5947265625, + "learning_rate": 3.891581982212045e-05, + "loss": 61.6626, + "step": 48690 + }, + { + "epoch": 0.1967541623403645, + "grad_norm": 418.9532470703125, + "learning_rate": 3.891491269226234e-05, + "loss": 90.189, + "step": 48700 + }, + { + "epoch": 0.19679456360573214, + "grad_norm": 1134.782958984375, + "learning_rate": 3.891400519364827e-05, + "loss": 147.1169, + "step": 48710 + }, + { + "epoch": 0.19683496487109975, + "grad_norm": 1189.9422607421875, + "learning_rate": 3.891309732629593e-05, + "loss": 105.8131, + "step": 48720 + }, + { + "epoch": 0.1968753661364674, + "grad_norm": 520.81201171875, + "learning_rate": 3.891218909022302e-05, + "loss": 133.6037, + "step": 48730 + }, + { + "epoch": 0.19691576740183503, + "grad_norm": 1018.0159301757812, + "learning_rate": 3.891128048544726e-05, + "loss": 123.5187, + "step": 48740 + }, + { + "epoch": 0.19695616866720267, + "grad_norm": 1026.7572021484375, + "learning_rate": 3.891037151198634e-05, + "loss": 180.661, + "step": 48750 + }, + { + "epoch": 0.19699656993257028, + "grad_norm": 727.6256103515625, + "learning_rate": 3.890946216985799e-05, + "loss": 125.7881, + "step": 48760 + }, + { + "epoch": 0.19703697119793792, + "grad_norm": 591.1439208984375, + "learning_rate": 3.890855245907994e-05, + "loss": 121.2122, + "step": 48770 + }, + { + "epoch": 0.19707737246330556, + "grad_norm": 912.4683837890625, + "learning_rate": 3.890764237966993e-05, + "loss": 103.7165, + "step": 48780 + }, + { + "epoch": 0.19711777372867317, + "grad_norm": 1571.4981689453125, + "learning_rate": 3.8906731931645694e-05, + "loss": 153.2396, + "step": 48790 + }, + { + "epoch": 0.1971581749940408, + "grad_norm": 2669.1181640625, + "learning_rate": 3.8905821115024994e-05, + "loss": 123.65, + "step": 48800 + }, + { + "epoch": 0.19719857625940845, + "grad_norm": 686.392578125, + "learning_rate": 3.8904909929825565e-05, + "loss": 70.6198, + "step": 48810 + }, + { + "epoch": 0.19723897752477607, + "grad_norm": 617.6455078125, + "learning_rate": 3.890399837606519e-05, + "loss": 114.5608, + "step": 48820 + }, + { + "epoch": 0.1972793787901437, + "grad_norm": 802.8543090820312, + "learning_rate": 3.8903086453761634e-05, + "loss": 144.7911, + "step": 48830 + }, + { + "epoch": 0.19731978005551135, + "grad_norm": 1176.902099609375, + "learning_rate": 3.890217416293267e-05, + "loss": 113.7437, + "step": 48840 + }, + { + "epoch": 0.19736018132087896, + "grad_norm": 463.6819152832031, + "learning_rate": 3.89012615035961e-05, + "loss": 121.9712, + "step": 48850 + }, + { + "epoch": 0.1974005825862466, + "grad_norm": 674.3209228515625, + "learning_rate": 3.89003484757697e-05, + "loss": 67.4668, + "step": 48860 + }, + { + "epoch": 0.19744098385161424, + "grad_norm": 627.8193359375, + "learning_rate": 3.889943507947128e-05, + "loss": 87.9278, + "step": 48870 + }, + { + "epoch": 0.19748138511698185, + "grad_norm": 509.73406982421875, + "learning_rate": 3.889852131471863e-05, + "loss": 112.3446, + "step": 48880 + }, + { + "epoch": 0.1975217863823495, + "grad_norm": 510.49871826171875, + "learning_rate": 3.8897607181529594e-05, + "loss": 81.5005, + "step": 48890 + }, + { + "epoch": 0.19756218764771713, + "grad_norm": 925.0003662109375, + "learning_rate": 3.889669267992197e-05, + "loss": 106.8473, + "step": 48900 + }, + { + "epoch": 0.19760258891308477, + "grad_norm": 487.9031677246094, + "learning_rate": 3.889577780991359e-05, + "loss": 89.4692, + "step": 48910 + }, + { + "epoch": 0.19764299017845238, + "grad_norm": 663.2654418945312, + "learning_rate": 3.8894862571522297e-05, + "loss": 108.6358, + "step": 48920 + }, + { + "epoch": 0.19768339144382002, + "grad_norm": 601.095458984375, + "learning_rate": 3.889394696476593e-05, + "loss": 93.1609, + "step": 48930 + }, + { + "epoch": 0.19772379270918766, + "grad_norm": 1030.7294921875, + "learning_rate": 3.8893030989662336e-05, + "loss": 108.7546, + "step": 48940 + }, + { + "epoch": 0.19776419397455527, + "grad_norm": 404.0530090332031, + "learning_rate": 3.889211464622938e-05, + "loss": 110.9879, + "step": 48950 + }, + { + "epoch": 0.19780459523992291, + "grad_norm": 1252.054443359375, + "learning_rate": 3.889119793448493e-05, + "loss": 127.8656, + "step": 48960 + }, + { + "epoch": 0.19784499650529055, + "grad_norm": 494.51007080078125, + "learning_rate": 3.889028085444684e-05, + "loss": 79.719, + "step": 48970 + }, + { + "epoch": 0.19788539777065817, + "grad_norm": 1608.6728515625, + "learning_rate": 3.8889363406133e-05, + "loss": 111.1495, + "step": 48980 + }, + { + "epoch": 0.1979257990360258, + "grad_norm": 847.8425903320312, + "learning_rate": 3.88884455895613e-05, + "loss": 127.5354, + "step": 48990 + }, + { + "epoch": 0.19796620030139345, + "grad_norm": 1160.5299072265625, + "learning_rate": 3.888752740474962e-05, + "loss": 144.2973, + "step": 49000 + }, + { + "epoch": 0.19800660156676106, + "grad_norm": 1301.505126953125, + "learning_rate": 3.888660885171588e-05, + "loss": 118.5209, + "step": 49010 + }, + { + "epoch": 0.1980470028321287, + "grad_norm": 1259.0230712890625, + "learning_rate": 3.8885689930477974e-05, + "loss": 122.4128, + "step": 49020 + }, + { + "epoch": 0.19808740409749634, + "grad_norm": 543.0067138671875, + "learning_rate": 3.8884770641053815e-05, + "loss": 98.4755, + "step": 49030 + }, + { + "epoch": 0.19812780536286395, + "grad_norm": 685.6495971679688, + "learning_rate": 3.888385098346134e-05, + "loss": 88.3914, + "step": 49040 + }, + { + "epoch": 0.1981682066282316, + "grad_norm": 613.2335205078125, + "learning_rate": 3.888293095771846e-05, + "loss": 107.377, + "step": 49050 + }, + { + "epoch": 0.19820860789359923, + "grad_norm": 845.3970947265625, + "learning_rate": 3.888201056384312e-05, + "loss": 138.3754, + "step": 49060 + }, + { + "epoch": 0.19824900915896687, + "grad_norm": 523.9900512695312, + "learning_rate": 3.888108980185326e-05, + "loss": 148.3537, + "step": 49070 + }, + { + "epoch": 0.19828941042433448, + "grad_norm": 1142.1656494140625, + "learning_rate": 3.888016867176684e-05, + "loss": 105.28, + "step": 49080 + }, + { + "epoch": 0.19832981168970212, + "grad_norm": 246.11239624023438, + "learning_rate": 3.88792471736018e-05, + "loss": 102.9795, + "step": 49090 + }, + { + "epoch": 0.19837021295506976, + "grad_norm": 417.4292297363281, + "learning_rate": 3.887832530737613e-05, + "loss": 91.9698, + "step": 49100 + }, + { + "epoch": 0.19841061422043738, + "grad_norm": 474.63836669921875, + "learning_rate": 3.887740307310779e-05, + "loss": 68.3306, + "step": 49110 + }, + { + "epoch": 0.19845101548580502, + "grad_norm": 762.1257934570312, + "learning_rate": 3.887648047081475e-05, + "loss": 87.1243, + "step": 49120 + }, + { + "epoch": 0.19849141675117266, + "grad_norm": 2127.694091796875, + "learning_rate": 3.887555750051501e-05, + "loss": 148.771, + "step": 49130 + }, + { + "epoch": 0.19853181801654027, + "grad_norm": 1346.75439453125, + "learning_rate": 3.887463416222655e-05, + "loss": 83.1505, + "step": 49140 + }, + { + "epoch": 0.1985722192819079, + "grad_norm": 780.1781616210938, + "learning_rate": 3.887371045596739e-05, + "loss": 117.0809, + "step": 49150 + }, + { + "epoch": 0.19861262054727555, + "grad_norm": 552.8234252929688, + "learning_rate": 3.887278638175552e-05, + "loss": 111.1971, + "step": 49160 + }, + { + "epoch": 0.19865302181264316, + "grad_norm": 859.1828002929688, + "learning_rate": 3.8871861939608976e-05, + "loss": 121.7379, + "step": 49170 + }, + { + "epoch": 0.1986934230780108, + "grad_norm": 2509.05615234375, + "learning_rate": 3.8870937129545756e-05, + "loss": 112.7104, + "step": 49180 + }, + { + "epoch": 0.19873382434337844, + "grad_norm": 603.21142578125, + "learning_rate": 3.88700119515839e-05, + "loss": 69.2183, + "step": 49190 + }, + { + "epoch": 0.19877422560874605, + "grad_norm": 1038.1007080078125, + "learning_rate": 3.886908640574145e-05, + "loss": 144.4927, + "step": 49200 + }, + { + "epoch": 0.1988146268741137, + "grad_norm": 518.784912109375, + "learning_rate": 3.8868160492036444e-05, + "loss": 86.1316, + "step": 49210 + }, + { + "epoch": 0.19885502813948133, + "grad_norm": 763.0643920898438, + "learning_rate": 3.886723421048694e-05, + "loss": 81.8952, + "step": 49220 + }, + { + "epoch": 0.19889542940484897, + "grad_norm": 699.103759765625, + "learning_rate": 3.886630756111099e-05, + "loss": 141.2514, + "step": 49230 + }, + { + "epoch": 0.19893583067021658, + "grad_norm": 1020.3837890625, + "learning_rate": 3.886538054392665e-05, + "loss": 200.4808, + "step": 49240 + }, + { + "epoch": 0.19897623193558422, + "grad_norm": 383.2206115722656, + "learning_rate": 3.886445315895202e-05, + "loss": 98.0343, + "step": 49250 + }, + { + "epoch": 0.19901663320095186, + "grad_norm": 604.207275390625, + "learning_rate": 3.886352540620516e-05, + "loss": 109.7224, + "step": 49260 + }, + { + "epoch": 0.19905703446631948, + "grad_norm": 527.4348754882812, + "learning_rate": 3.886259728570416e-05, + "loss": 84.288, + "step": 49270 + }, + { + "epoch": 0.19909743573168712, + "grad_norm": 1168.240478515625, + "learning_rate": 3.8861668797467114e-05, + "loss": 123.5443, + "step": 49280 + }, + { + "epoch": 0.19913783699705476, + "grad_norm": 562.0802612304688, + "learning_rate": 3.886073994151212e-05, + "loss": 68.7056, + "step": 49290 + }, + { + "epoch": 0.19917823826242237, + "grad_norm": 407.64501953125, + "learning_rate": 3.8859810717857296e-05, + "loss": 72.0495, + "step": 49300 + }, + { + "epoch": 0.19921863952779, + "grad_norm": 632.0004272460938, + "learning_rate": 3.8858881126520745e-05, + "loss": 84.4053, + "step": 49310 + }, + { + "epoch": 0.19925904079315765, + "grad_norm": 1163.47802734375, + "learning_rate": 3.8857951167520605e-05, + "loss": 95.4835, + "step": 49320 + }, + { + "epoch": 0.19929944205852526, + "grad_norm": 1117.767578125, + "learning_rate": 3.885702084087499e-05, + "loss": 98.9114, + "step": 49330 + }, + { + "epoch": 0.1993398433238929, + "grad_norm": 716.6659545898438, + "learning_rate": 3.885609014660205e-05, + "loss": 129.9776, + "step": 49340 + }, + { + "epoch": 0.19938024458926054, + "grad_norm": 722.3320922851562, + "learning_rate": 3.885515908471993e-05, + "loss": 128.3364, + "step": 49350 + }, + { + "epoch": 0.19942064585462815, + "grad_norm": 528.6796264648438, + "learning_rate": 3.885422765524677e-05, + "loss": 80.1701, + "step": 49360 + }, + { + "epoch": 0.1994610471199958, + "grad_norm": 563.2837524414062, + "learning_rate": 3.885329585820074e-05, + "loss": 70.0826, + "step": 49370 + }, + { + "epoch": 0.19950144838536343, + "grad_norm": 479.2548828125, + "learning_rate": 3.8852363693599996e-05, + "loss": 112.6978, + "step": 49380 + }, + { + "epoch": 0.19954184965073107, + "grad_norm": 1382.210693359375, + "learning_rate": 3.885143116146272e-05, + "loss": 194.8959, + "step": 49390 + }, + { + "epoch": 0.19958225091609869, + "grad_norm": 3104.953857421875, + "learning_rate": 3.885049826180709e-05, + "loss": 111.9033, + "step": 49400 + }, + { + "epoch": 0.19962265218146633, + "grad_norm": 685.0942993164062, + "learning_rate": 3.8849564994651284e-05, + "loss": 68.0748, + "step": 49410 + }, + { + "epoch": 0.19966305344683397, + "grad_norm": 686.9029541015625, + "learning_rate": 3.884863136001351e-05, + "loss": 112.6801, + "step": 49420 + }, + { + "epoch": 0.19970345471220158, + "grad_norm": 884.4596557617188, + "learning_rate": 3.884769735791196e-05, + "loss": 175.4484, + "step": 49430 + }, + { + "epoch": 0.19974385597756922, + "grad_norm": 500.727783203125, + "learning_rate": 3.8846762988364845e-05, + "loss": 120.7187, + "step": 49440 + }, + { + "epoch": 0.19978425724293686, + "grad_norm": 246.88963317871094, + "learning_rate": 3.884582825139038e-05, + "loss": 90.6743, + "step": 49450 + }, + { + "epoch": 0.19982465850830447, + "grad_norm": 587.0258178710938, + "learning_rate": 3.88448931470068e-05, + "loss": 122.4879, + "step": 49460 + }, + { + "epoch": 0.1998650597736721, + "grad_norm": 972.78857421875, + "learning_rate": 3.884395767523232e-05, + "loss": 73.8851, + "step": 49470 + }, + { + "epoch": 0.19990546103903975, + "grad_norm": 776.7467041015625, + "learning_rate": 3.884302183608519e-05, + "loss": 94.3675, + "step": 49480 + }, + { + "epoch": 0.19994586230440736, + "grad_norm": 506.74737548828125, + "learning_rate": 3.884208562958364e-05, + "loss": 80.9581, + "step": 49490 + }, + { + "epoch": 0.199986263569775, + "grad_norm": 793.1278076171875, + "learning_rate": 3.884114905574594e-05, + "loss": 87.8245, + "step": 49500 + }, + { + "epoch": 0.20002666483514264, + "grad_norm": 1200.2794189453125, + "learning_rate": 3.884021211459033e-05, + "loss": 128.7718, + "step": 49510 + }, + { + "epoch": 0.20006706610051025, + "grad_norm": 741.8319091796875, + "learning_rate": 3.8839274806135095e-05, + "loss": 101.8399, + "step": 49520 + }, + { + "epoch": 0.2001074673658779, + "grad_norm": 1000.8616333007812, + "learning_rate": 3.883833713039849e-05, + "loss": 65.1733, + "step": 49530 + }, + { + "epoch": 0.20014786863124553, + "grad_norm": 821.3330078125, + "learning_rate": 3.8837399087398803e-05, + "loss": 87.6758, + "step": 49540 + }, + { + "epoch": 0.20018826989661317, + "grad_norm": 549.4091796875, + "learning_rate": 3.883646067715433e-05, + "loss": 94.0456, + "step": 49550 + }, + { + "epoch": 0.2002286711619808, + "grad_norm": 1184.468017578125, + "learning_rate": 3.8835521899683345e-05, + "loss": 146.9013, + "step": 49560 + }, + { + "epoch": 0.20026907242734843, + "grad_norm": 1318.5006103515625, + "learning_rate": 3.883458275500417e-05, + "loss": 136.7744, + "step": 49570 + }, + { + "epoch": 0.20030947369271607, + "grad_norm": 611.1451416015625, + "learning_rate": 3.883364324313512e-05, + "loss": 92.1274, + "step": 49580 + }, + { + "epoch": 0.20034987495808368, + "grad_norm": 988.3657836914062, + "learning_rate": 3.883270336409448e-05, + "loss": 88.35, + "step": 49590 + }, + { + "epoch": 0.20039027622345132, + "grad_norm": 1793.621826171875, + "learning_rate": 3.8831763117900605e-05, + "loss": 123.7871, + "step": 49600 + }, + { + "epoch": 0.20043067748881896, + "grad_norm": 515.9744262695312, + "learning_rate": 3.883082250457181e-05, + "loss": 111.7146, + "step": 49610 + }, + { + "epoch": 0.20047107875418657, + "grad_norm": 713.0057373046875, + "learning_rate": 3.882988152412643e-05, + "loss": 128.0237, + "step": 49620 + }, + { + "epoch": 0.2005114800195542, + "grad_norm": 944.158935546875, + "learning_rate": 3.882894017658283e-05, + "loss": 110.9587, + "step": 49630 + }, + { + "epoch": 0.20055188128492185, + "grad_norm": 605.8291625976562, + "learning_rate": 3.882799846195933e-05, + "loss": 81.711, + "step": 49640 + }, + { + "epoch": 0.20059228255028946, + "grad_norm": 679.453125, + "learning_rate": 3.8827056380274316e-05, + "loss": 82.916, + "step": 49650 + }, + { + "epoch": 0.2006326838156571, + "grad_norm": 1479.878173828125, + "learning_rate": 3.882611393154614e-05, + "loss": 116.1951, + "step": 49660 + }, + { + "epoch": 0.20067308508102474, + "grad_norm": 691.8966064453125, + "learning_rate": 3.882517111579319e-05, + "loss": 110.9224, + "step": 49670 + }, + { + "epoch": 0.20071348634639236, + "grad_norm": 1300.160888671875, + "learning_rate": 3.882422793303382e-05, + "loss": 87.8015, + "step": 49680 + }, + { + "epoch": 0.20075388761176, + "grad_norm": 599.1731567382812, + "learning_rate": 3.882328438328645e-05, + "loss": 86.3788, + "step": 49690 + }, + { + "epoch": 0.20079428887712764, + "grad_norm": 848.4064331054688, + "learning_rate": 3.8822340466569456e-05, + "loss": 104.235, + "step": 49700 + }, + { + "epoch": 0.20083469014249528, + "grad_norm": 400.5657958984375, + "learning_rate": 3.882139618290125e-05, + "loss": 104.7121, + "step": 49710 + }, + { + "epoch": 0.2008750914078629, + "grad_norm": 685.3753051757812, + "learning_rate": 3.882045153230022e-05, + "loss": 132.3183, + "step": 49720 + }, + { + "epoch": 0.20091549267323053, + "grad_norm": 1945.7601318359375, + "learning_rate": 3.881950651478481e-05, + "loss": 138.1802, + "step": 49730 + }, + { + "epoch": 0.20095589393859817, + "grad_norm": 857.56201171875, + "learning_rate": 3.8818561130373436e-05, + "loss": 139.4755, + "step": 49740 + }, + { + "epoch": 0.20099629520396578, + "grad_norm": 529.3978881835938, + "learning_rate": 3.8817615379084514e-05, + "loss": 122.1234, + "step": 49750 + }, + { + "epoch": 0.20103669646933342, + "grad_norm": 909.4279174804688, + "learning_rate": 3.8816669260936494e-05, + "loss": 151.264, + "step": 49760 + }, + { + "epoch": 0.20107709773470106, + "grad_norm": 729.3200073242188, + "learning_rate": 3.881572277594782e-05, + "loss": 152.712, + "step": 49770 + }, + { + "epoch": 0.20111749900006867, + "grad_norm": 975.0932006835938, + "learning_rate": 3.8814775924136945e-05, + "loss": 130.3473, + "step": 49780 + }, + { + "epoch": 0.2011579002654363, + "grad_norm": 908.1292724609375, + "learning_rate": 3.8813828705522325e-05, + "loss": 133.6763, + "step": 49790 + }, + { + "epoch": 0.20119830153080395, + "grad_norm": 944.3375854492188, + "learning_rate": 3.881288112012243e-05, + "loss": 132.5037, + "step": 49800 + }, + { + "epoch": 0.20123870279617156, + "grad_norm": 1361.216552734375, + "learning_rate": 3.8811933167955734e-05, + "loss": 168.1603, + "step": 49810 + }, + { + "epoch": 0.2012791040615392, + "grad_norm": 1050.24853515625, + "learning_rate": 3.8810984849040715e-05, + "loss": 120.847, + "step": 49820 + }, + { + "epoch": 0.20131950532690684, + "grad_norm": 698.7764892578125, + "learning_rate": 3.8810036163395855e-05, + "loss": 65.8107, + "step": 49830 + }, + { + "epoch": 0.20135990659227446, + "grad_norm": 606.53076171875, + "learning_rate": 3.880908711103966e-05, + "loss": 64.2751, + "step": 49840 + }, + { + "epoch": 0.2014003078576421, + "grad_norm": 908.67431640625, + "learning_rate": 3.8808137691990634e-05, + "loss": 158.4125, + "step": 49850 + }, + { + "epoch": 0.20144070912300974, + "grad_norm": 221.62594604492188, + "learning_rate": 3.880718790626728e-05, + "loss": 107.8299, + "step": 49860 + }, + { + "epoch": 0.20148111038837738, + "grad_norm": 723.91259765625, + "learning_rate": 3.8806237753888104e-05, + "loss": 64.1854, + "step": 49870 + }, + { + "epoch": 0.201521511653745, + "grad_norm": 761.8659057617188, + "learning_rate": 3.880528723487165e-05, + "loss": 71.2856, + "step": 49880 + }, + { + "epoch": 0.20156191291911263, + "grad_norm": 616.7673950195312, + "learning_rate": 3.880433634923644e-05, + "loss": 105.1779, + "step": 49890 + }, + { + "epoch": 0.20160231418448027, + "grad_norm": 564.9991455078125, + "learning_rate": 3.880338509700101e-05, + "loss": 76.4687, + "step": 49900 + }, + { + "epoch": 0.20164271544984788, + "grad_norm": 519.5814208984375, + "learning_rate": 3.88024334781839e-05, + "loss": 83.3373, + "step": 49910 + }, + { + "epoch": 0.20168311671521552, + "grad_norm": 529.5079345703125, + "learning_rate": 3.880148149280368e-05, + "loss": 159.1092, + "step": 49920 + }, + { + "epoch": 0.20172351798058316, + "grad_norm": 525.6912841796875, + "learning_rate": 3.8800529140878894e-05, + "loss": 86.0277, + "step": 49930 + }, + { + "epoch": 0.20176391924595077, + "grad_norm": 580.672607421875, + "learning_rate": 3.879957642242811e-05, + "loss": 122.8843, + "step": 49940 + }, + { + "epoch": 0.2018043205113184, + "grad_norm": 535.4327392578125, + "learning_rate": 3.8798623337469914e-05, + "loss": 127.2044, + "step": 49950 + }, + { + "epoch": 0.20184472177668605, + "grad_norm": 1216.2567138671875, + "learning_rate": 3.879766988602287e-05, + "loss": 99.0283, + "step": 49960 + }, + { + "epoch": 0.20188512304205367, + "grad_norm": 494.8635559082031, + "learning_rate": 3.879671606810558e-05, + "loss": 92.8346, + "step": 49970 + }, + { + "epoch": 0.2019255243074213, + "grad_norm": 645.3959350585938, + "learning_rate": 3.879576188373663e-05, + "loss": 117.8502, + "step": 49980 + }, + { + "epoch": 0.20196592557278895, + "grad_norm": 1207.5260009765625, + "learning_rate": 3.879480733293462e-05, + "loss": 162.9305, + "step": 49990 + }, + { + "epoch": 0.20200632683815656, + "grad_norm": 549.360107421875, + "learning_rate": 3.879385241571817e-05, + "loss": 87.7656, + "step": 50000 + }, + { + "epoch": 0.2020467281035242, + "grad_norm": 875.4998779296875, + "learning_rate": 3.879289713210589e-05, + "loss": 123.9949, + "step": 50010 + }, + { + "epoch": 0.20208712936889184, + "grad_norm": 613.0230712890625, + "learning_rate": 3.8791941482116404e-05, + "loss": 121.4142, + "step": 50020 + }, + { + "epoch": 0.20212753063425948, + "grad_norm": 3589.401123046875, + "learning_rate": 3.879098546576835e-05, + "loss": 117.9214, + "step": 50030 + }, + { + "epoch": 0.2021679318996271, + "grad_norm": 943.3099975585938, + "learning_rate": 3.879002908308036e-05, + "loss": 139.6938, + "step": 50040 + }, + { + "epoch": 0.20220833316499473, + "grad_norm": 1916.3447265625, + "learning_rate": 3.878907233407107e-05, + "loss": 105.8749, + "step": 50050 + }, + { + "epoch": 0.20224873443036237, + "grad_norm": 510.4132385253906, + "learning_rate": 3.878811521875915e-05, + "loss": 73.2249, + "step": 50060 + }, + { + "epoch": 0.20228913569572998, + "grad_norm": 541.2601928710938, + "learning_rate": 3.878715773716325e-05, + "loss": 112.6432, + "step": 50070 + }, + { + "epoch": 0.20232953696109762, + "grad_norm": 1115.61279296875, + "learning_rate": 3.8786199889302035e-05, + "loss": 91.951, + "step": 50080 + }, + { + "epoch": 0.20236993822646526, + "grad_norm": 666.957763671875, + "learning_rate": 3.8785241675194175e-05, + "loss": 89.9487, + "step": 50090 + }, + { + "epoch": 0.20241033949183287, + "grad_norm": 631.38037109375, + "learning_rate": 3.878428309485837e-05, + "loss": 86.6944, + "step": 50100 + }, + { + "epoch": 0.20245074075720051, + "grad_norm": 1043.9296875, + "learning_rate": 3.878332414831329e-05, + "loss": 144.775, + "step": 50110 + }, + { + "epoch": 0.20249114202256815, + "grad_norm": 775.1137084960938, + "learning_rate": 3.878236483557763e-05, + "loss": 136.1079, + "step": 50120 + }, + { + "epoch": 0.20253154328793577, + "grad_norm": 1109.0347900390625, + "learning_rate": 3.878140515667011e-05, + "loss": 162.0561, + "step": 50130 + }, + { + "epoch": 0.2025719445533034, + "grad_norm": 1243.5045166015625, + "learning_rate": 3.8780445111609414e-05, + "loss": 118.8121, + "step": 50140 + }, + { + "epoch": 0.20261234581867105, + "grad_norm": 860.9501342773438, + "learning_rate": 3.877948470041428e-05, + "loss": 105.0693, + "step": 50150 + }, + { + "epoch": 0.20265274708403866, + "grad_norm": 1546.9571533203125, + "learning_rate": 3.877852392310342e-05, + "loss": 117.0347, + "step": 50160 + }, + { + "epoch": 0.2026931483494063, + "grad_norm": 455.980224609375, + "learning_rate": 3.877756277969557e-05, + "loss": 109.1162, + "step": 50170 + }, + { + "epoch": 0.20273354961477394, + "grad_norm": 1121.0589599609375, + "learning_rate": 3.877660127020947e-05, + "loss": 99.8596, + "step": 50180 + }, + { + "epoch": 0.20277395088014158, + "grad_norm": 430.4801025390625, + "learning_rate": 3.877563939466385e-05, + "loss": 98.9723, + "step": 50190 + }, + { + "epoch": 0.2028143521455092, + "grad_norm": 825.2913208007812, + "learning_rate": 3.8774677153077485e-05, + "loss": 90.2848, + "step": 50200 + }, + { + "epoch": 0.20285475341087683, + "grad_norm": 600.7296142578125, + "learning_rate": 3.8773714545469116e-05, + "loss": 120.3652, + "step": 50210 + }, + { + "epoch": 0.20289515467624447, + "grad_norm": 1249.969482421875, + "learning_rate": 3.8772751571857525e-05, + "loss": 119.5768, + "step": 50220 + }, + { + "epoch": 0.20293555594161208, + "grad_norm": 1354.2354736328125, + "learning_rate": 3.877178823226147e-05, + "loss": 184.0295, + "step": 50230 + }, + { + "epoch": 0.20297595720697972, + "grad_norm": 429.67388916015625, + "learning_rate": 3.877082452669974e-05, + "loss": 125.3127, + "step": 50240 + }, + { + "epoch": 0.20301635847234736, + "grad_norm": 803.1959838867188, + "learning_rate": 3.876986045519112e-05, + "loss": 78.0817, + "step": 50250 + }, + { + "epoch": 0.20305675973771498, + "grad_norm": 364.17120361328125, + "learning_rate": 3.876889601775441e-05, + "loss": 94.224, + "step": 50260 + }, + { + "epoch": 0.20309716100308262, + "grad_norm": 1288.222900390625, + "learning_rate": 3.876793121440841e-05, + "loss": 115.9461, + "step": 50270 + }, + { + "epoch": 0.20313756226845026, + "grad_norm": 547.780029296875, + "learning_rate": 3.876696604517193e-05, + "loss": 92.6583, + "step": 50280 + }, + { + "epoch": 0.20317796353381787, + "grad_norm": 438.55780029296875, + "learning_rate": 3.876600051006378e-05, + "loss": 71.0222, + "step": 50290 + }, + { + "epoch": 0.2032183647991855, + "grad_norm": 770.5191650390625, + "learning_rate": 3.8765034609102794e-05, + "loss": 81.7851, + "step": 50300 + }, + { + "epoch": 0.20325876606455315, + "grad_norm": 1021.1569213867188, + "learning_rate": 3.876406834230779e-05, + "loss": 131.9598, + "step": 50310 + }, + { + "epoch": 0.20329916732992076, + "grad_norm": 988.243408203125, + "learning_rate": 3.8763101709697625e-05, + "loss": 107.6591, + "step": 50320 + }, + { + "epoch": 0.2033395685952884, + "grad_norm": 897.6083984375, + "learning_rate": 3.876213471129112e-05, + "loss": 135.6012, + "step": 50330 + }, + { + "epoch": 0.20337996986065604, + "grad_norm": 443.2470703125, + "learning_rate": 3.876116734710714e-05, + "loss": 66.3508, + "step": 50340 + }, + { + "epoch": 0.20342037112602368, + "grad_norm": 96.06703186035156, + "learning_rate": 3.8760199617164556e-05, + "loss": 116.2417, + "step": 50350 + }, + { + "epoch": 0.2034607723913913, + "grad_norm": 721.4225463867188, + "learning_rate": 3.8759231521482215e-05, + "loss": 68.8585, + "step": 50360 + }, + { + "epoch": 0.20350117365675893, + "grad_norm": 882.38525390625, + "learning_rate": 3.875826306007899e-05, + "loss": 95.5863, + "step": 50370 + }, + { + "epoch": 0.20354157492212657, + "grad_norm": 1170.8026123046875, + "learning_rate": 3.875729423297378e-05, + "loss": 95.2002, + "step": 50380 + }, + { + "epoch": 0.20358197618749418, + "grad_norm": 749.6962890625, + "learning_rate": 3.875632504018546e-05, + "loss": 102.5091, + "step": 50390 + }, + { + "epoch": 0.20362237745286182, + "grad_norm": 993.4655151367188, + "learning_rate": 3.875535548173292e-05, + "loss": 85.2011, + "step": 50400 + }, + { + "epoch": 0.20366277871822946, + "grad_norm": 795.72509765625, + "learning_rate": 3.8754385557635076e-05, + "loss": 114.8559, + "step": 50410 + }, + { + "epoch": 0.20370317998359708, + "grad_norm": 629.043212890625, + "learning_rate": 3.875341526791083e-05, + "loss": 75.8726, + "step": 50420 + }, + { + "epoch": 0.20374358124896472, + "grad_norm": 1252.609619140625, + "learning_rate": 3.87524446125791e-05, + "loss": 101.8378, + "step": 50430 + }, + { + "epoch": 0.20378398251433236, + "grad_norm": 1640.451416015625, + "learning_rate": 3.8751473591658805e-05, + "loss": 114.0799, + "step": 50440 + }, + { + "epoch": 0.20382438377969997, + "grad_norm": 953.9592895507812, + "learning_rate": 3.8750502205168876e-05, + "loss": 97.1922, + "step": 50450 + }, + { + "epoch": 0.2038647850450676, + "grad_norm": 548.4063720703125, + "learning_rate": 3.874953045312825e-05, + "loss": 97.5786, + "step": 50460 + }, + { + "epoch": 0.20390518631043525, + "grad_norm": 1097.5439453125, + "learning_rate": 3.8748558335555885e-05, + "loss": 187.9458, + "step": 50470 + }, + { + "epoch": 0.20394558757580286, + "grad_norm": 1089.19873046875, + "learning_rate": 3.8747585852470716e-05, + "loss": 126.6578, + "step": 50480 + }, + { + "epoch": 0.2039859888411705, + "grad_norm": 638.5709228515625, + "learning_rate": 3.874661300389171e-05, + "loss": 101.2087, + "step": 50490 + }, + { + "epoch": 0.20402639010653814, + "grad_norm": 839.0718383789062, + "learning_rate": 3.874563978983784e-05, + "loss": 102.8855, + "step": 50500 + }, + { + "epoch": 0.20406679137190578, + "grad_norm": 824.844970703125, + "learning_rate": 3.874466621032806e-05, + "loss": 98.2643, + "step": 50510 + }, + { + "epoch": 0.2041071926372734, + "grad_norm": 274.7867431640625, + "learning_rate": 3.874369226538136e-05, + "loss": 58.8422, + "step": 50520 + }, + { + "epoch": 0.20414759390264103, + "grad_norm": 670.2001953125, + "learning_rate": 3.874271795501674e-05, + "loss": 94.0495, + "step": 50530 + }, + { + "epoch": 0.20418799516800867, + "grad_norm": 1116.9580078125, + "learning_rate": 3.8741743279253176e-05, + "loss": 123.4894, + "step": 50540 + }, + { + "epoch": 0.20422839643337629, + "grad_norm": 890.3828125, + "learning_rate": 3.874076823810968e-05, + "loss": 137.0354, + "step": 50550 + }, + { + "epoch": 0.20426879769874393, + "grad_norm": 918.1715698242188, + "learning_rate": 3.873979283160526e-05, + "loss": 104.1163, + "step": 50560 + }, + { + "epoch": 0.20430919896411157, + "grad_norm": 2313.255615234375, + "learning_rate": 3.8738817059758936e-05, + "loss": 112.2054, + "step": 50570 + }, + { + "epoch": 0.20434960022947918, + "grad_norm": 542.1470336914062, + "learning_rate": 3.873784092258972e-05, + "loss": 77.7575, + "step": 50580 + }, + { + "epoch": 0.20439000149484682, + "grad_norm": 930.6566162109375, + "learning_rate": 3.873686442011665e-05, + "loss": 88.2868, + "step": 50590 + }, + { + "epoch": 0.20443040276021446, + "grad_norm": 1828.058837890625, + "learning_rate": 3.873588755235876e-05, + "loss": 104.844, + "step": 50600 + }, + { + "epoch": 0.20447080402558207, + "grad_norm": 627.7743530273438, + "learning_rate": 3.87349103193351e-05, + "loss": 131.6029, + "step": 50610 + }, + { + "epoch": 0.2045112052909497, + "grad_norm": 988.9513549804688, + "learning_rate": 3.8733932721064714e-05, + "loss": 100.8985, + "step": 50620 + }, + { + "epoch": 0.20455160655631735, + "grad_norm": 630.0882568359375, + "learning_rate": 3.873295475756667e-05, + "loss": 94.2314, + "step": 50630 + }, + { + "epoch": 0.20459200782168496, + "grad_norm": 555.4786376953125, + "learning_rate": 3.873197642886002e-05, + "loss": 111.9631, + "step": 50640 + }, + { + "epoch": 0.2046324090870526, + "grad_norm": 665.03076171875, + "learning_rate": 3.873099773496385e-05, + "loss": 128.3123, + "step": 50650 + }, + { + "epoch": 0.20467281035242024, + "grad_norm": 613.6649169921875, + "learning_rate": 3.873001867589723e-05, + "loss": 73.9499, + "step": 50660 + }, + { + "epoch": 0.20471321161778788, + "grad_norm": 775.0741577148438, + "learning_rate": 3.872903925167927e-05, + "loss": 86.195, + "step": 50670 + }, + { + "epoch": 0.2047536128831555, + "grad_norm": 1221.2705078125, + "learning_rate": 3.8728059462329035e-05, + "loss": 83.5076, + "step": 50680 + }, + { + "epoch": 0.20479401414852313, + "grad_norm": 1068.9427490234375, + "learning_rate": 3.872707930786564e-05, + "loss": 141.047, + "step": 50690 + }, + { + "epoch": 0.20483441541389077, + "grad_norm": 1124.5196533203125, + "learning_rate": 3.872609878830819e-05, + "loss": 114.4162, + "step": 50700 + }, + { + "epoch": 0.2048748166792584, + "grad_norm": 529.0643920898438, + "learning_rate": 3.872511790367581e-05, + "loss": 96.1979, + "step": 50710 + }, + { + "epoch": 0.20491521794462603, + "grad_norm": 608.4442138671875, + "learning_rate": 3.872413665398761e-05, + "loss": 140.9307, + "step": 50720 + }, + { + "epoch": 0.20495561920999367, + "grad_norm": 558.7986450195312, + "learning_rate": 3.872315503926273e-05, + "loss": 93.23, + "step": 50730 + }, + { + "epoch": 0.20499602047536128, + "grad_norm": 620.9832153320312, + "learning_rate": 3.87221730595203e-05, + "loss": 79.7111, + "step": 50740 + }, + { + "epoch": 0.20503642174072892, + "grad_norm": 1022.7816772460938, + "learning_rate": 3.872119071477947e-05, + "loss": 108.9605, + "step": 50750 + }, + { + "epoch": 0.20507682300609656, + "grad_norm": 673.4683837890625, + "learning_rate": 3.8720208005059386e-05, + "loss": 85.5521, + "step": 50760 + }, + { + "epoch": 0.20511722427146417, + "grad_norm": 1069.5440673828125, + "learning_rate": 3.871922493037921e-05, + "loss": 150.283, + "step": 50770 + }, + { + "epoch": 0.2051576255368318, + "grad_norm": 912.4382934570312, + "learning_rate": 3.8718241490758106e-05, + "loss": 111.2829, + "step": 50780 + }, + { + "epoch": 0.20519802680219945, + "grad_norm": 840.4956665039062, + "learning_rate": 3.871725768621526e-05, + "loss": 98.7855, + "step": 50790 + }, + { + "epoch": 0.20523842806756706, + "grad_norm": 664.5938110351562, + "learning_rate": 3.871627351676982e-05, + "loss": 85.322, + "step": 50800 + }, + { + "epoch": 0.2052788293329347, + "grad_norm": 498.4617614746094, + "learning_rate": 3.8715288982441e-05, + "loss": 112.2279, + "step": 50810 + }, + { + "epoch": 0.20531923059830234, + "grad_norm": 438.1976623535156, + "learning_rate": 3.8714304083247984e-05, + "loss": 69.8458, + "step": 50820 + }, + { + "epoch": 0.20535963186366998, + "grad_norm": 756.526123046875, + "learning_rate": 3.871331881920998e-05, + "loss": 70.3773, + "step": 50830 + }, + { + "epoch": 0.2054000331290376, + "grad_norm": 1314.40185546875, + "learning_rate": 3.871233319034619e-05, + "loss": 162.1459, + "step": 50840 + }, + { + "epoch": 0.20544043439440524, + "grad_norm": 653.92919921875, + "learning_rate": 3.871134719667583e-05, + "loss": 126.1633, + "step": 50850 + }, + { + "epoch": 0.20548083565977288, + "grad_norm": 881.9590454101562, + "learning_rate": 3.8710360838218125e-05, + "loss": 125.21, + "step": 50860 + }, + { + "epoch": 0.2055212369251405, + "grad_norm": 783.8955078125, + "learning_rate": 3.870937411499229e-05, + "loss": 108.3043, + "step": 50870 + }, + { + "epoch": 0.20556163819050813, + "grad_norm": 1151.00048828125, + "learning_rate": 3.870838702701759e-05, + "loss": 115.4297, + "step": 50880 + }, + { + "epoch": 0.20560203945587577, + "grad_norm": 933.3064575195312, + "learning_rate": 3.8707399574313246e-05, + "loss": 132.5293, + "step": 50890 + }, + { + "epoch": 0.20564244072124338, + "grad_norm": 743.0598754882812, + "learning_rate": 3.870641175689852e-05, + "loss": 96.2465, + "step": 50900 + }, + { + "epoch": 0.20568284198661102, + "grad_norm": 1055.22705078125, + "learning_rate": 3.870542357479266e-05, + "loss": 123.1659, + "step": 50910 + }, + { + "epoch": 0.20572324325197866, + "grad_norm": 808.2647094726562, + "learning_rate": 3.870443502801494e-05, + "loss": 91.3329, + "step": 50920 + }, + { + "epoch": 0.20576364451734627, + "grad_norm": 580.4387817382812, + "learning_rate": 3.870344611658464e-05, + "loss": 146.4781, + "step": 50930 + }, + { + "epoch": 0.2058040457827139, + "grad_norm": 1018.585205078125, + "learning_rate": 3.870245684052101e-05, + "loss": 139.4332, + "step": 50940 + }, + { + "epoch": 0.20584444704808155, + "grad_norm": 506.18841552734375, + "learning_rate": 3.870146719984337e-05, + "loss": 120.0252, + "step": 50950 + }, + { + "epoch": 0.20588484831344916, + "grad_norm": 4309.322265625, + "learning_rate": 3.870047719457099e-05, + "loss": 139.2318, + "step": 50960 + }, + { + "epoch": 0.2059252495788168, + "grad_norm": 841.4314575195312, + "learning_rate": 3.869948682472318e-05, + "loss": 82.2493, + "step": 50970 + }, + { + "epoch": 0.20596565084418444, + "grad_norm": 2173.218017578125, + "learning_rate": 3.8698496090319257e-05, + "loss": 109.211, + "step": 50980 + }, + { + "epoch": 0.20600605210955208, + "grad_norm": 692.8660888671875, + "learning_rate": 3.869750499137851e-05, + "loss": 86.2279, + "step": 50990 + }, + { + "epoch": 0.2060464533749197, + "grad_norm": 825.2990112304688, + "learning_rate": 3.869651352792029e-05, + "loss": 114.7764, + "step": 51000 + }, + { + "epoch": 0.20608685464028734, + "grad_norm": 833.8541259765625, + "learning_rate": 3.869552169996391e-05, + "loss": 124.5552, + "step": 51010 + }, + { + "epoch": 0.20612725590565498, + "grad_norm": 1200.4974365234375, + "learning_rate": 3.869452950752871e-05, + "loss": 92.5732, + "step": 51020 + }, + { + "epoch": 0.2061676571710226, + "grad_norm": 1312.2685546875, + "learning_rate": 3.869353695063403e-05, + "loss": 100.4753, + "step": 51030 + }, + { + "epoch": 0.20620805843639023, + "grad_norm": 1320.7943115234375, + "learning_rate": 3.8692544029299225e-05, + "loss": 90.4748, + "step": 51040 + }, + { + "epoch": 0.20624845970175787, + "grad_norm": 1162.41259765625, + "learning_rate": 3.869155074354365e-05, + "loss": 109.0408, + "step": 51050 + }, + { + "epoch": 0.20628886096712548, + "grad_norm": 1245.6553955078125, + "learning_rate": 3.869055709338667e-05, + "loss": 147.5316, + "step": 51060 + }, + { + "epoch": 0.20632926223249312, + "grad_norm": 865.0747680664062, + "learning_rate": 3.8689563078847656e-05, + "loss": 88.9873, + "step": 51070 + }, + { + "epoch": 0.20636966349786076, + "grad_norm": 656.0969848632812, + "learning_rate": 3.868856869994599e-05, + "loss": 125.0611, + "step": 51080 + }, + { + "epoch": 0.20641006476322837, + "grad_norm": 1334.56494140625, + "learning_rate": 3.868757395670105e-05, + "loss": 166.0245, + "step": 51090 + }, + { + "epoch": 0.206450466028596, + "grad_norm": 1313.0301513671875, + "learning_rate": 3.8686578849132244e-05, + "loss": 122.7232, + "step": 51100 + }, + { + "epoch": 0.20649086729396365, + "grad_norm": 1238.9815673828125, + "learning_rate": 3.8685583377258955e-05, + "loss": 78.3687, + "step": 51110 + }, + { + "epoch": 0.20653126855933127, + "grad_norm": 388.1924133300781, + "learning_rate": 3.86845875411006e-05, + "loss": 108.5818, + "step": 51120 + }, + { + "epoch": 0.2065716698246989, + "grad_norm": 916.14306640625, + "learning_rate": 3.8683591340676596e-05, + "loss": 114.0382, + "step": 51130 + }, + { + "epoch": 0.20661207109006655, + "grad_norm": 616.5899658203125, + "learning_rate": 3.868259477600636e-05, + "loss": 110.3037, + "step": 51140 + }, + { + "epoch": 0.20665247235543419, + "grad_norm": 772.9498291015625, + "learning_rate": 3.868159784710931e-05, + "loss": 153.4889, + "step": 51150 + }, + { + "epoch": 0.2066928736208018, + "grad_norm": 741.1786499023438, + "learning_rate": 3.8680600554004905e-05, + "loss": 93.8081, + "step": 51160 + }, + { + "epoch": 0.20673327488616944, + "grad_norm": 950.9337768554688, + "learning_rate": 3.867960289671257e-05, + "loss": 95.8008, + "step": 51170 + }, + { + "epoch": 0.20677367615153708, + "grad_norm": 800.55517578125, + "learning_rate": 3.867860487525175e-05, + "loss": 138.089, + "step": 51180 + }, + { + "epoch": 0.2068140774169047, + "grad_norm": 627.2918701171875, + "learning_rate": 3.8677606489641924e-05, + "loss": 85.5499, + "step": 51190 + }, + { + "epoch": 0.20685447868227233, + "grad_norm": 1063.9034423828125, + "learning_rate": 3.867660773990254e-05, + "loss": 136.646, + "step": 51200 + }, + { + "epoch": 0.20689487994763997, + "grad_norm": 433.5802001953125, + "learning_rate": 3.867560862605307e-05, + "loss": 77.3737, + "step": 51210 + }, + { + "epoch": 0.20693528121300758, + "grad_norm": 378.2963562011719, + "learning_rate": 3.8674609148112996e-05, + "loss": 102.003, + "step": 51220 + }, + { + "epoch": 0.20697568247837522, + "grad_norm": 452.7121887207031, + "learning_rate": 3.86736093061018e-05, + "loss": 131.616, + "step": 51230 + }, + { + "epoch": 0.20701608374374286, + "grad_norm": 728.0429077148438, + "learning_rate": 3.8672609100038986e-05, + "loss": 95.1354, + "step": 51240 + }, + { + "epoch": 0.20705648500911047, + "grad_norm": 503.1011047363281, + "learning_rate": 3.8671608529944035e-05, + "loss": 118.5864, + "step": 51250 + }, + { + "epoch": 0.20709688627447811, + "grad_norm": 831.1845092773438, + "learning_rate": 3.867060759583647e-05, + "loss": 81.831, + "step": 51260 + }, + { + "epoch": 0.20713728753984575, + "grad_norm": 646.74609375, + "learning_rate": 3.866960629773579e-05, + "loss": 109.3257, + "step": 51270 + }, + { + "epoch": 0.20717768880521337, + "grad_norm": 567.3231201171875, + "learning_rate": 3.866860463566153e-05, + "loss": 93.6975, + "step": 51280 + }, + { + "epoch": 0.207218090070581, + "grad_norm": 959.0054321289062, + "learning_rate": 3.866760260963321e-05, + "loss": 83.6959, + "step": 51290 + }, + { + "epoch": 0.20725849133594865, + "grad_norm": 553.5897216796875, + "learning_rate": 3.8666600219670365e-05, + "loss": 107.1929, + "step": 51300 + }, + { + "epoch": 0.2072988926013163, + "grad_norm": 508.02471923828125, + "learning_rate": 3.866559746579254e-05, + "loss": 92.0839, + "step": 51310 + }, + { + "epoch": 0.2073392938666839, + "grad_norm": 1195.8870849609375, + "learning_rate": 3.866459434801928e-05, + "loss": 108.9866, + "step": 51320 + }, + { + "epoch": 0.20737969513205154, + "grad_norm": 651.8486938476562, + "learning_rate": 3.8663590866370147e-05, + "loss": 90.4832, + "step": 51330 + }, + { + "epoch": 0.20742009639741918, + "grad_norm": 922.624267578125, + "learning_rate": 3.86625870208647e-05, + "loss": 117.7784, + "step": 51340 + }, + { + "epoch": 0.2074604976627868, + "grad_norm": 919.3548583984375, + "learning_rate": 3.866158281152251e-05, + "loss": 100.6365, + "step": 51350 + }, + { + "epoch": 0.20750089892815443, + "grad_norm": 1454.072509765625, + "learning_rate": 3.8660578238363156e-05, + "loss": 72.895, + "step": 51360 + }, + { + "epoch": 0.20754130019352207, + "grad_norm": 1251.739013671875, + "learning_rate": 3.865957330140622e-05, + "loss": 113.8492, + "step": 51370 + }, + { + "epoch": 0.20758170145888968, + "grad_norm": 1465.5675048828125, + "learning_rate": 3.86585680006713e-05, + "loss": 110.4542, + "step": 51380 + }, + { + "epoch": 0.20762210272425732, + "grad_norm": 675.8663940429688, + "learning_rate": 3.865756233617799e-05, + "loss": 125.9496, + "step": 51390 + }, + { + "epoch": 0.20766250398962496, + "grad_norm": 557.3988647460938, + "learning_rate": 3.8656556307945894e-05, + "loss": 76.2204, + "step": 51400 + }, + { + "epoch": 0.20770290525499258, + "grad_norm": 530.0462646484375, + "learning_rate": 3.8655549915994626e-05, + "loss": 99.9612, + "step": 51410 + }, + { + "epoch": 0.20774330652036022, + "grad_norm": 948.0579223632812, + "learning_rate": 3.8654543160343816e-05, + "loss": 146.0215, + "step": 51420 + }, + { + "epoch": 0.20778370778572786, + "grad_norm": 1174.2886962890625, + "learning_rate": 3.8653536041013075e-05, + "loss": 90.1425, + "step": 51430 + }, + { + "epoch": 0.20782410905109547, + "grad_norm": 842.8555297851562, + "learning_rate": 3.865252855802205e-05, + "loss": 66.3848, + "step": 51440 + }, + { + "epoch": 0.2078645103164631, + "grad_norm": 945.161376953125, + "learning_rate": 3.865152071139038e-05, + "loss": 94.353, + "step": 51450 + }, + { + "epoch": 0.20790491158183075, + "grad_norm": 850.419189453125, + "learning_rate": 3.8650512501137704e-05, + "loss": 99.3069, + "step": 51460 + }, + { + "epoch": 0.2079453128471984, + "grad_norm": 1764.5081787109375, + "learning_rate": 3.8649503927283686e-05, + "loss": 155.47, + "step": 51470 + }, + { + "epoch": 0.207985714112566, + "grad_norm": 1138.6641845703125, + "learning_rate": 3.864849498984799e-05, + "loss": 80.6237, + "step": 51480 + }, + { + "epoch": 0.20802611537793364, + "grad_norm": 659.2052612304688, + "learning_rate": 3.864748568885029e-05, + "loss": 133.0089, + "step": 51490 + }, + { + "epoch": 0.20806651664330128, + "grad_norm": 619.71875, + "learning_rate": 3.864647602431025e-05, + "loss": 79.966, + "step": 51500 + }, + { + "epoch": 0.2081069179086689, + "grad_norm": 805.0341186523438, + "learning_rate": 3.864546599624756e-05, + "loss": 100.3651, + "step": 51510 + }, + { + "epoch": 0.20814731917403653, + "grad_norm": 705.2735595703125, + "learning_rate": 3.864445560468191e-05, + "loss": 109.6481, + "step": 51520 + }, + { + "epoch": 0.20818772043940417, + "grad_norm": 1212.481689453125, + "learning_rate": 3.8643444849633e-05, + "loss": 136.3803, + "step": 51530 + }, + { + "epoch": 0.20822812170477178, + "grad_norm": 475.7823791503906, + "learning_rate": 3.864243373112053e-05, + "loss": 66.6733, + "step": 51540 + }, + { + "epoch": 0.20826852297013942, + "grad_norm": 873.8244018554688, + "learning_rate": 3.864142224916422e-05, + "loss": 108.5026, + "step": 51550 + }, + { + "epoch": 0.20830892423550706, + "grad_norm": 854.0802612304688, + "learning_rate": 3.864041040378379e-05, + "loss": 62.2453, + "step": 51560 + }, + { + "epoch": 0.20834932550087468, + "grad_norm": 654.131591796875, + "learning_rate": 3.863939819499896e-05, + "loss": 83.3596, + "step": 51570 + }, + { + "epoch": 0.20838972676624232, + "grad_norm": 926.9273681640625, + "learning_rate": 3.8638385622829464e-05, + "loss": 144.1666, + "step": 51580 + }, + { + "epoch": 0.20843012803160996, + "grad_norm": 504.29925537109375, + "learning_rate": 3.863737268729504e-05, + "loss": 78.0327, + "step": 51590 + }, + { + "epoch": 0.20847052929697757, + "grad_norm": 982.362548828125, + "learning_rate": 3.863635938841545e-05, + "loss": 87.2406, + "step": 51600 + }, + { + "epoch": 0.2085109305623452, + "grad_norm": 495.6214599609375, + "learning_rate": 3.8635345726210435e-05, + "loss": 132.5894, + "step": 51610 + }, + { + "epoch": 0.20855133182771285, + "grad_norm": 1815.7562255859375, + "learning_rate": 3.863433170069976e-05, + "loss": 88.248, + "step": 51620 + }, + { + "epoch": 0.20859173309308046, + "grad_norm": 503.46502685546875, + "learning_rate": 3.86333173119032e-05, + "loss": 95.734, + "step": 51630 + }, + { + "epoch": 0.2086321343584481, + "grad_norm": 1016.3768310546875, + "learning_rate": 3.863230255984052e-05, + "loss": 59.7907, + "step": 51640 + }, + { + "epoch": 0.20867253562381574, + "grad_norm": 381.75238037109375, + "learning_rate": 3.863128744453152e-05, + "loss": 78.9992, + "step": 51650 + }, + { + "epoch": 0.20871293688918338, + "grad_norm": 511.2264404296875, + "learning_rate": 3.863027196599596e-05, + "loss": 109.8095, + "step": 51660 + }, + { + "epoch": 0.208753338154551, + "grad_norm": 1152.7122802734375, + "learning_rate": 3.8629256124253675e-05, + "loss": 117.9183, + "step": 51670 + }, + { + "epoch": 0.20879373941991863, + "grad_norm": 1969.9962158203125, + "learning_rate": 3.8628239919324435e-05, + "loss": 74.0447, + "step": 51680 + }, + { + "epoch": 0.20883414068528627, + "grad_norm": 912.0073852539062, + "learning_rate": 3.862722335122808e-05, + "loss": 117.4194, + "step": 51690 + }, + { + "epoch": 0.20887454195065389, + "grad_norm": 1044.0916748046875, + "learning_rate": 3.862620641998441e-05, + "loss": 102.5931, + "step": 51700 + }, + { + "epoch": 0.20891494321602153, + "grad_norm": 774.9388427734375, + "learning_rate": 3.862518912561326e-05, + "loss": 88.5834, + "step": 51710 + }, + { + "epoch": 0.20895534448138917, + "grad_norm": 313.0509948730469, + "learning_rate": 3.862417146813445e-05, + "loss": 72.5891, + "step": 51720 + }, + { + "epoch": 0.20899574574675678, + "grad_norm": 584.10205078125, + "learning_rate": 3.862315344756784e-05, + "loss": 152.6032, + "step": 51730 + }, + { + "epoch": 0.20903614701212442, + "grad_norm": 418.42529296875, + "learning_rate": 3.862213506393326e-05, + "loss": 105.9709, + "step": 51740 + }, + { + "epoch": 0.20907654827749206, + "grad_norm": 982.24365234375, + "learning_rate": 3.862111631725057e-05, + "loss": 123.7015, + "step": 51750 + }, + { + "epoch": 0.20911694954285967, + "grad_norm": 546.85107421875, + "learning_rate": 3.862009720753963e-05, + "loss": 141.8278, + "step": 51760 + }, + { + "epoch": 0.2091573508082273, + "grad_norm": 686.5492553710938, + "learning_rate": 3.8619077734820314e-05, + "loss": 86.8984, + "step": 51770 + }, + { + "epoch": 0.20919775207359495, + "grad_norm": 930.6064453125, + "learning_rate": 3.861805789911248e-05, + "loss": 169.7111, + "step": 51780 + }, + { + "epoch": 0.20923815333896256, + "grad_norm": 792.90673828125, + "learning_rate": 3.8617037700436034e-05, + "loss": 79.8294, + "step": 51790 + }, + { + "epoch": 0.2092785546043302, + "grad_norm": 657.9528198242188, + "learning_rate": 3.861601713881085e-05, + "loss": 79.0626, + "step": 51800 + }, + { + "epoch": 0.20931895586969784, + "grad_norm": 1208.543701171875, + "learning_rate": 3.8614996214256826e-05, + "loss": 110.3382, + "step": 51810 + }, + { + "epoch": 0.20935935713506548, + "grad_norm": 731.6337280273438, + "learning_rate": 3.861397492679387e-05, + "loss": 85.1332, + "step": 51820 + }, + { + "epoch": 0.2093997584004331, + "grad_norm": 1248.1378173828125, + "learning_rate": 3.861295327644189e-05, + "loss": 96.5375, + "step": 51830 + }, + { + "epoch": 0.20944015966580073, + "grad_norm": 934.6705932617188, + "learning_rate": 3.8611931263220794e-05, + "loss": 105.9522, + "step": 51840 + }, + { + "epoch": 0.20948056093116837, + "grad_norm": 1036.827880859375, + "learning_rate": 3.861090888715052e-05, + "loss": 119.8157, + "step": 51850 + }, + { + "epoch": 0.209520962196536, + "grad_norm": 966.8258666992188, + "learning_rate": 3.8609886148251006e-05, + "loss": 166.5316, + "step": 51860 + }, + { + "epoch": 0.20956136346190363, + "grad_norm": 891.9923095703125, + "learning_rate": 3.8608863046542164e-05, + "loss": 78.9604, + "step": 51870 + }, + { + "epoch": 0.20960176472727127, + "grad_norm": 627.9998779296875, + "learning_rate": 3.860783958204397e-05, + "loss": 113.6222, + "step": 51880 + }, + { + "epoch": 0.20964216599263888, + "grad_norm": 472.5683288574219, + "learning_rate": 3.860681575477636e-05, + "loss": 80.3333, + "step": 51890 + }, + { + "epoch": 0.20968256725800652, + "grad_norm": 335.07757568359375, + "learning_rate": 3.8605791564759296e-05, + "loss": 80.0421, + "step": 51900 + }, + { + "epoch": 0.20972296852337416, + "grad_norm": 1141.6392822265625, + "learning_rate": 3.8604767012012746e-05, + "loss": 114.9431, + "step": 51910 + }, + { + "epoch": 0.20976336978874177, + "grad_norm": 843.5435180664062, + "learning_rate": 3.8603742096556687e-05, + "loss": 141.0085, + "step": 51920 + }, + { + "epoch": 0.2098037710541094, + "grad_norm": 656.4926147460938, + "learning_rate": 3.8602716818411093e-05, + "loss": 97.4436, + "step": 51930 + }, + { + "epoch": 0.20984417231947705, + "grad_norm": 726.24072265625, + "learning_rate": 3.8601691177595964e-05, + "loss": 95.2998, + "step": 51940 + }, + { + "epoch": 0.20988457358484466, + "grad_norm": 1842.2354736328125, + "learning_rate": 3.860066517413129e-05, + "loss": 113.0804, + "step": 51950 + }, + { + "epoch": 0.2099249748502123, + "grad_norm": 719.6060791015625, + "learning_rate": 3.859963880803706e-05, + "loss": 79.0089, + "step": 51960 + }, + { + "epoch": 0.20996537611557994, + "grad_norm": 333.9413146972656, + "learning_rate": 3.859861207933331e-05, + "loss": 89.66, + "step": 51970 + }, + { + "epoch": 0.21000577738094758, + "grad_norm": 898.9905395507812, + "learning_rate": 3.8597584988040034e-05, + "loss": 122.6174, + "step": 51980 + }, + { + "epoch": 0.2100461786463152, + "grad_norm": 1054.6978759765625, + "learning_rate": 3.859655753417726e-05, + "loss": 111.2412, + "step": 51990 + }, + { + "epoch": 0.21008657991168284, + "grad_norm": 810.895263671875, + "learning_rate": 3.859552971776503e-05, + "loss": 118.1001, + "step": 52000 + }, + { + "epoch": 0.21012698117705048, + "grad_norm": 1131.67626953125, + "learning_rate": 3.8594501538823374e-05, + "loss": 75.2654, + "step": 52010 + }, + { + "epoch": 0.2101673824424181, + "grad_norm": 715.561767578125, + "learning_rate": 3.8593472997372336e-05, + "loss": 107.7704, + "step": 52020 + }, + { + "epoch": 0.21020778370778573, + "grad_norm": 1144.4610595703125, + "learning_rate": 3.8592444093431976e-05, + "loss": 74.9168, + "step": 52030 + }, + { + "epoch": 0.21024818497315337, + "grad_norm": 912.3102416992188, + "learning_rate": 3.859141482702233e-05, + "loss": 115.1821, + "step": 52040 + }, + { + "epoch": 0.21028858623852098, + "grad_norm": 516.47265625, + "learning_rate": 3.8590385198163495e-05, + "loss": 87.2147, + "step": 52050 + }, + { + "epoch": 0.21032898750388862, + "grad_norm": 1425.9791259765625, + "learning_rate": 3.858935520687553e-05, + "loss": 102.2372, + "step": 52060 + }, + { + "epoch": 0.21036938876925626, + "grad_norm": 661.391845703125, + "learning_rate": 3.858832485317851e-05, + "loss": 127.1126, + "step": 52070 + }, + { + "epoch": 0.21040979003462387, + "grad_norm": 473.6487121582031, + "learning_rate": 3.8587294137092526e-05, + "loss": 88.9348, + "step": 52080 + }, + { + "epoch": 0.2104501912999915, + "grad_norm": 903.9165649414062, + "learning_rate": 3.858626305863767e-05, + "loss": 109.6857, + "step": 52090 + }, + { + "epoch": 0.21049059256535915, + "grad_norm": 1055.830810546875, + "learning_rate": 3.8585231617834054e-05, + "loss": 123.2931, + "step": 52100 + }, + { + "epoch": 0.21053099383072676, + "grad_norm": 1105.235107421875, + "learning_rate": 3.8584199814701774e-05, + "loss": 89.4342, + "step": 52110 + }, + { + "epoch": 0.2105713950960944, + "grad_norm": 1118.4744873046875, + "learning_rate": 3.8583167649260956e-05, + "loss": 127.9554, + "step": 52120 + }, + { + "epoch": 0.21061179636146204, + "grad_norm": 513.7301025390625, + "learning_rate": 3.858213512153171e-05, + "loss": 183.9393, + "step": 52130 + }, + { + "epoch": 0.21065219762682968, + "grad_norm": 854.0028076171875, + "learning_rate": 3.858110223153418e-05, + "loss": 59.6559, + "step": 52140 + }, + { + "epoch": 0.2106925988921973, + "grad_norm": 528.820068359375, + "learning_rate": 3.8580068979288495e-05, + "loss": 104.9786, + "step": 52150 + }, + { + "epoch": 0.21073300015756494, + "grad_norm": 810.1767578125, + "learning_rate": 3.8579035364814793e-05, + "loss": 122.0594, + "step": 52160 + }, + { + "epoch": 0.21077340142293258, + "grad_norm": 569.7324829101562, + "learning_rate": 3.857800138813324e-05, + "loss": 131.7921, + "step": 52170 + }, + { + "epoch": 0.2108138026883002, + "grad_norm": 914.1824340820312, + "learning_rate": 3.857696704926398e-05, + "loss": 90.7004, + "step": 52180 + }, + { + "epoch": 0.21085420395366783, + "grad_norm": 1238.8817138671875, + "learning_rate": 3.857593234822718e-05, + "loss": 119.4266, + "step": 52190 + }, + { + "epoch": 0.21089460521903547, + "grad_norm": 1287.376953125, + "learning_rate": 3.857489728504303e-05, + "loss": 94.6866, + "step": 52200 + }, + { + "epoch": 0.21093500648440308, + "grad_norm": 346.40997314453125, + "learning_rate": 3.857386185973168e-05, + "loss": 142.1526, + "step": 52210 + }, + { + "epoch": 0.21097540774977072, + "grad_norm": 889.7410278320312, + "learning_rate": 3.857282607231334e-05, + "loss": 94.0721, + "step": 52220 + }, + { + "epoch": 0.21101580901513836, + "grad_norm": 491.09967041015625, + "learning_rate": 3.857178992280819e-05, + "loss": 116.4351, + "step": 52230 + }, + { + "epoch": 0.21105621028050597, + "grad_norm": 608.4370727539062, + "learning_rate": 3.857075341123643e-05, + "loss": 82.9319, + "step": 52240 + }, + { + "epoch": 0.2110966115458736, + "grad_norm": 669.2770385742188, + "learning_rate": 3.856971653761827e-05, + "loss": 106.9901, + "step": 52250 + }, + { + "epoch": 0.21113701281124125, + "grad_norm": 1251.7674560546875, + "learning_rate": 3.856867930197393e-05, + "loss": 128.5489, + "step": 52260 + }, + { + "epoch": 0.21117741407660887, + "grad_norm": 698.3843994140625, + "learning_rate": 3.8567641704323636e-05, + "loss": 114.8987, + "step": 52270 + }, + { + "epoch": 0.2112178153419765, + "grad_norm": 535.988037109375, + "learning_rate": 3.8566603744687595e-05, + "loss": 99.4671, + "step": 52280 + }, + { + "epoch": 0.21125821660734415, + "grad_norm": 635.9625854492188, + "learning_rate": 3.856556542308606e-05, + "loss": 169.3537, + "step": 52290 + }, + { + "epoch": 0.21129861787271179, + "grad_norm": 553.920654296875, + "learning_rate": 3.8564526739539266e-05, + "loss": 99.1198, + "step": 52300 + }, + { + "epoch": 0.2113390191380794, + "grad_norm": 517.5501098632812, + "learning_rate": 3.856348769406747e-05, + "loss": 74.6461, + "step": 52310 + }, + { + "epoch": 0.21137942040344704, + "grad_norm": 1410.1646728515625, + "learning_rate": 3.856244828669092e-05, + "loss": 139.3621, + "step": 52320 + }, + { + "epoch": 0.21141982166881468, + "grad_norm": 480.0911560058594, + "learning_rate": 3.856140851742989e-05, + "loss": 66.3464, + "step": 52330 + }, + { + "epoch": 0.2114602229341823, + "grad_norm": 1078.4588623046875, + "learning_rate": 3.856036838630464e-05, + "loss": 147.4885, + "step": 52340 + }, + { + "epoch": 0.21150062419954993, + "grad_norm": 298.8399963378906, + "learning_rate": 3.855932789333546e-05, + "loss": 130.5147, + "step": 52350 + }, + { + "epoch": 0.21154102546491757, + "grad_norm": 806.4171752929688, + "learning_rate": 3.855828703854262e-05, + "loss": 127.6185, + "step": 52360 + }, + { + "epoch": 0.21158142673028518, + "grad_norm": 737.4076538085938, + "learning_rate": 3.8557245821946414e-05, + "loss": 105.2953, + "step": 52370 + }, + { + "epoch": 0.21162182799565282, + "grad_norm": 904.2188720703125, + "learning_rate": 3.8556204243567156e-05, + "loss": 168.2607, + "step": 52380 + }, + { + "epoch": 0.21166222926102046, + "grad_norm": 714.0831298828125, + "learning_rate": 3.855516230342514e-05, + "loss": 85.8001, + "step": 52390 + }, + { + "epoch": 0.21170263052638807, + "grad_norm": 972.1199340820312, + "learning_rate": 3.8554120001540684e-05, + "loss": 83.7086, + "step": 52400 + }, + { + "epoch": 0.21174303179175571, + "grad_norm": 2245.96044921875, + "learning_rate": 3.8553077337934114e-05, + "loss": 147.9068, + "step": 52410 + }, + { + "epoch": 0.21178343305712335, + "grad_norm": 344.1960144042969, + "learning_rate": 3.855203431262574e-05, + "loss": 84.0372, + "step": 52420 + }, + { + "epoch": 0.21182383432249097, + "grad_norm": 611.7655029296875, + "learning_rate": 3.85509909256359e-05, + "loss": 150.8013, + "step": 52430 + }, + { + "epoch": 0.2118642355878586, + "grad_norm": 1171.38623046875, + "learning_rate": 3.854994717698495e-05, + "loss": 133.929, + "step": 52440 + }, + { + "epoch": 0.21190463685322625, + "grad_norm": 1088.0391845703125, + "learning_rate": 3.8548903066693234e-05, + "loss": 82.4372, + "step": 52450 + }, + { + "epoch": 0.2119450381185939, + "grad_norm": 811.6929931640625, + "learning_rate": 3.854785859478109e-05, + "loss": 95.6484, + "step": 52460 + }, + { + "epoch": 0.2119854393839615, + "grad_norm": 565.797607421875, + "learning_rate": 3.85468137612689e-05, + "loss": 65.4899, + "step": 52470 + }, + { + "epoch": 0.21202584064932914, + "grad_norm": 509.7371520996094, + "learning_rate": 3.8545768566177034e-05, + "loss": 79.9028, + "step": 52480 + }, + { + "epoch": 0.21206624191469678, + "grad_norm": 1354.4044189453125, + "learning_rate": 3.8544723009525855e-05, + "loss": 106.8125, + "step": 52490 + }, + { + "epoch": 0.2121066431800644, + "grad_norm": 649.8846435546875, + "learning_rate": 3.854367709133575e-05, + "loss": 80.2912, + "step": 52500 + }, + { + "epoch": 0.21214704444543203, + "grad_norm": 1109.665771484375, + "learning_rate": 3.854263081162712e-05, + "loss": 125.2893, + "step": 52510 + }, + { + "epoch": 0.21218744571079967, + "grad_norm": 811.7406616210938, + "learning_rate": 3.854158417042035e-05, + "loss": 120.9456, + "step": 52520 + }, + { + "epoch": 0.21222784697616728, + "grad_norm": 688.89404296875, + "learning_rate": 3.854053716773586e-05, + "loss": 92.9888, + "step": 52530 + }, + { + "epoch": 0.21226824824153492, + "grad_norm": 856.7677612304688, + "learning_rate": 3.8539489803594044e-05, + "loss": 83.4437, + "step": 52540 + }, + { + "epoch": 0.21230864950690256, + "grad_norm": 623.6085205078125, + "learning_rate": 3.853844207801533e-05, + "loss": 95.2033, + "step": 52550 + }, + { + "epoch": 0.21234905077227018, + "grad_norm": 1209.919677734375, + "learning_rate": 3.853739399102014e-05, + "loss": 115.9854, + "step": 52560 + }, + { + "epoch": 0.21238945203763782, + "grad_norm": 995.747802734375, + "learning_rate": 3.8536345542628925e-05, + "loss": 93.9572, + "step": 52570 + }, + { + "epoch": 0.21242985330300546, + "grad_norm": 1574.4676513671875, + "learning_rate": 3.85352967328621e-05, + "loss": 121.6029, + "step": 52580 + }, + { + "epoch": 0.21247025456837307, + "grad_norm": 416.8481750488281, + "learning_rate": 3.8534247561740124e-05, + "loss": 99.7631, + "step": 52590 + }, + { + "epoch": 0.2125106558337407, + "grad_norm": 659.5416259765625, + "learning_rate": 3.853319802928345e-05, + "loss": 104.323, + "step": 52600 + }, + { + "epoch": 0.21255105709910835, + "grad_norm": 1452.5733642578125, + "learning_rate": 3.853214813551254e-05, + "loss": 111.4744, + "step": 52610 + }, + { + "epoch": 0.212591458364476, + "grad_norm": 754.7325439453125, + "learning_rate": 3.8531097880447854e-05, + "loss": 89.0483, + "step": 52620 + }, + { + "epoch": 0.2126318596298436, + "grad_norm": 1597.6492919921875, + "learning_rate": 3.853004726410988e-05, + "loss": 136.0863, + "step": 52630 + }, + { + "epoch": 0.21267226089521124, + "grad_norm": 1151.273681640625, + "learning_rate": 3.852899628651909e-05, + "loss": 120.7912, + "step": 52640 + }, + { + "epoch": 0.21271266216057888, + "grad_norm": 257.0396423339844, + "learning_rate": 3.852794494769599e-05, + "loss": 96.1703, + "step": 52650 + }, + { + "epoch": 0.2127530634259465, + "grad_norm": 1389.708740234375, + "learning_rate": 3.8526893247661056e-05, + "loss": 103.4866, + "step": 52660 + }, + { + "epoch": 0.21279346469131413, + "grad_norm": 1026.8753662109375, + "learning_rate": 3.85258411864348e-05, + "loss": 122.804, + "step": 52670 + }, + { + "epoch": 0.21283386595668177, + "grad_norm": 525.7338256835938, + "learning_rate": 3.852478876403773e-05, + "loss": 130.1317, + "step": 52680 + }, + { + "epoch": 0.21287426722204938, + "grad_norm": 1094.2755126953125, + "learning_rate": 3.8523735980490375e-05, + "loss": 114.5118, + "step": 52690 + }, + { + "epoch": 0.21291466848741702, + "grad_norm": 1490.2103271484375, + "learning_rate": 3.8522682835813244e-05, + "loss": 94.5273, + "step": 52700 + }, + { + "epoch": 0.21295506975278466, + "grad_norm": 491.453369140625, + "learning_rate": 3.852162933002687e-05, + "loss": 77.7721, + "step": 52710 + }, + { + "epoch": 0.21299547101815228, + "grad_norm": 638.9677734375, + "learning_rate": 3.8520575463151805e-05, + "loss": 71.4721, + "step": 52720 + }, + { + "epoch": 0.21303587228351992, + "grad_norm": 1119.8577880859375, + "learning_rate": 3.851952123520859e-05, + "loss": 112.6437, + "step": 52730 + }, + { + "epoch": 0.21307627354888756, + "grad_norm": 1442.1292724609375, + "learning_rate": 3.851846664621776e-05, + "loss": 79.7571, + "step": 52740 + }, + { + "epoch": 0.21311667481425517, + "grad_norm": 691.775634765625, + "learning_rate": 3.85174116961999e-05, + "loss": 139.3773, + "step": 52750 + }, + { + "epoch": 0.2131570760796228, + "grad_norm": 1109.8311767578125, + "learning_rate": 3.851635638517556e-05, + "loss": 80.4588, + "step": 52760 + }, + { + "epoch": 0.21319747734499045, + "grad_norm": 605.345458984375, + "learning_rate": 3.851530071316532e-05, + "loss": 106.7823, + "step": 52770 + }, + { + "epoch": 0.2132378786103581, + "grad_norm": 882.8757934570312, + "learning_rate": 3.851424468018976e-05, + "loss": 94.4526, + "step": 52780 + }, + { + "epoch": 0.2132782798757257, + "grad_norm": 780.5964965820312, + "learning_rate": 3.851318828626947e-05, + "loss": 198.7649, + "step": 52790 + }, + { + "epoch": 0.21331868114109334, + "grad_norm": 457.7998046875, + "learning_rate": 3.851213153142505e-05, + "loss": 77.6022, + "step": 52800 + }, + { + "epoch": 0.21335908240646098, + "grad_norm": 1136.515380859375, + "learning_rate": 3.851107441567708e-05, + "loss": 92.675, + "step": 52810 + }, + { + "epoch": 0.2133994836718286, + "grad_norm": 1148.6451416015625, + "learning_rate": 3.85100169390462e-05, + "loss": 94.7596, + "step": 52820 + }, + { + "epoch": 0.21343988493719623, + "grad_norm": 952.776123046875, + "learning_rate": 3.8508959101553e-05, + "loss": 109.1033, + "step": 52830 + }, + { + "epoch": 0.21348028620256387, + "grad_norm": 705.25927734375, + "learning_rate": 3.850790090321812e-05, + "loss": 88.8572, + "step": 52840 + }, + { + "epoch": 0.21352068746793149, + "grad_norm": 687.5982055664062, + "learning_rate": 3.850684234406217e-05, + "loss": 110.7515, + "step": 52850 + }, + { + "epoch": 0.21356108873329913, + "grad_norm": 741.2318115234375, + "learning_rate": 3.850578342410581e-05, + "loss": 66.0446, + "step": 52860 + }, + { + "epoch": 0.21360148999866677, + "grad_norm": 1406.8551025390625, + "learning_rate": 3.8504724143369674e-05, + "loss": 66.521, + "step": 52870 + }, + { + "epoch": 0.21364189126403438, + "grad_norm": 626.2271728515625, + "learning_rate": 3.850366450187442e-05, + "loss": 119.2516, + "step": 52880 + }, + { + "epoch": 0.21368229252940202, + "grad_norm": 872.215087890625, + "learning_rate": 3.850260449964069e-05, + "loss": 109.6687, + "step": 52890 + }, + { + "epoch": 0.21372269379476966, + "grad_norm": 526.4197387695312, + "learning_rate": 3.850154413668916e-05, + "loss": 107.8513, + "step": 52900 + }, + { + "epoch": 0.21376309506013727, + "grad_norm": 707.72607421875, + "learning_rate": 3.8500483413040514e-05, + "loss": 87.9285, + "step": 52910 + }, + { + "epoch": 0.2138034963255049, + "grad_norm": 883.9945678710938, + "learning_rate": 3.849942232871541e-05, + "loss": 129.1004, + "step": 52920 + }, + { + "epoch": 0.21384389759087255, + "grad_norm": 693.5299072265625, + "learning_rate": 3.849836088373454e-05, + "loss": 92.2962, + "step": 52930 + }, + { + "epoch": 0.2138842988562402, + "grad_norm": 612.9338989257812, + "learning_rate": 3.849729907811861e-05, + "loss": 109.8112, + "step": 52940 + }, + { + "epoch": 0.2139247001216078, + "grad_norm": 847.2424926757812, + "learning_rate": 3.8496236911888306e-05, + "loss": 97.3556, + "step": 52950 + }, + { + "epoch": 0.21396510138697544, + "grad_norm": 1805.6605224609375, + "learning_rate": 3.8495174385064335e-05, + "loss": 126.527, + "step": 52960 + }, + { + "epoch": 0.21400550265234308, + "grad_norm": 577.7283325195312, + "learning_rate": 3.849411149766743e-05, + "loss": 118.4838, + "step": 52970 + }, + { + "epoch": 0.2140459039177107, + "grad_norm": 1355.56640625, + "learning_rate": 3.849304824971829e-05, + "loss": 88.0504, + "step": 52980 + }, + { + "epoch": 0.21408630518307833, + "grad_norm": 642.6680297851562, + "learning_rate": 3.849198464123766e-05, + "loss": 162.2041, + "step": 52990 + }, + { + "epoch": 0.21412670644844597, + "grad_norm": 734.218505859375, + "learning_rate": 3.849092067224627e-05, + "loss": 76.0011, + "step": 53000 + }, + { + "epoch": 0.2141671077138136, + "grad_norm": 1401.511962890625, + "learning_rate": 3.848985634276486e-05, + "loss": 117.9591, + "step": 53010 + }, + { + "epoch": 0.21420750897918123, + "grad_norm": 1131.876953125, + "learning_rate": 3.848879165281417e-05, + "loss": 83.0302, + "step": 53020 + }, + { + "epoch": 0.21424791024454887, + "grad_norm": 755.670166015625, + "learning_rate": 3.848772660241499e-05, + "loss": 107.5068, + "step": 53030 + }, + { + "epoch": 0.21428831150991648, + "grad_norm": 739.3159790039062, + "learning_rate": 3.848666119158804e-05, + "loss": 79.4875, + "step": 53040 + }, + { + "epoch": 0.21432871277528412, + "grad_norm": 1317.7801513671875, + "learning_rate": 3.848559542035412e-05, + "loss": 91.0646, + "step": 53050 + }, + { + "epoch": 0.21436911404065176, + "grad_norm": 1127.0994873046875, + "learning_rate": 3.848452928873401e-05, + "loss": 133.0553, + "step": 53060 + }, + { + "epoch": 0.21440951530601937, + "grad_norm": 830.6521606445312, + "learning_rate": 3.848346279674847e-05, + "loss": 131.7083, + "step": 53070 + }, + { + "epoch": 0.214449916571387, + "grad_norm": 736.0692749023438, + "learning_rate": 3.848239594441831e-05, + "loss": 124.4132, + "step": 53080 + }, + { + "epoch": 0.21449031783675465, + "grad_norm": 1535.660888671875, + "learning_rate": 3.848132873176434e-05, + "loss": 120.6561, + "step": 53090 + }, + { + "epoch": 0.2145307191021223, + "grad_norm": 810.8010864257812, + "learning_rate": 3.848026115880734e-05, + "loss": 71.7073, + "step": 53100 + }, + { + "epoch": 0.2145711203674899, + "grad_norm": 710.4933471679688, + "learning_rate": 3.847919322556814e-05, + "loss": 135.1957, + "step": 53110 + }, + { + "epoch": 0.21461152163285754, + "grad_norm": 1442.503173828125, + "learning_rate": 3.847812493206754e-05, + "loss": 104.4677, + "step": 53120 + }, + { + "epoch": 0.21465192289822518, + "grad_norm": 1448.3548583984375, + "learning_rate": 3.84770562783264e-05, + "loss": 105.9044, + "step": 53130 + }, + { + "epoch": 0.2146923241635928, + "grad_norm": 733.7570190429688, + "learning_rate": 3.847598726436553e-05, + "loss": 82.1757, + "step": 53140 + }, + { + "epoch": 0.21473272542896044, + "grad_norm": 969.9849853515625, + "learning_rate": 3.847491789020576e-05, + "loss": 208.6913, + "step": 53150 + }, + { + "epoch": 0.21477312669432808, + "grad_norm": 630.7406005859375, + "learning_rate": 3.8473848155867976e-05, + "loss": 111.4094, + "step": 53160 + }, + { + "epoch": 0.2148135279596957, + "grad_norm": 742.5579223632812, + "learning_rate": 3.8472778061373e-05, + "loss": 102.9299, + "step": 53170 + }, + { + "epoch": 0.21485392922506333, + "grad_norm": 1555.4393310546875, + "learning_rate": 3.8471707606741706e-05, + "loss": 92.9988, + "step": 53180 + }, + { + "epoch": 0.21489433049043097, + "grad_norm": 848.40087890625, + "learning_rate": 3.8470636791994965e-05, + "loss": 98.8991, + "step": 53190 + }, + { + "epoch": 0.21493473175579858, + "grad_norm": 1134.3018798828125, + "learning_rate": 3.8469565617153646e-05, + "loss": 124.3966, + "step": 53200 + }, + { + "epoch": 0.21497513302116622, + "grad_norm": 1249.229248046875, + "learning_rate": 3.846849408223865e-05, + "loss": 77.4994, + "step": 53210 + }, + { + "epoch": 0.21501553428653386, + "grad_norm": 1212.0462646484375, + "learning_rate": 3.846742218727084e-05, + "loss": 103.1711, + "step": 53220 + }, + { + "epoch": 0.21505593555190147, + "grad_norm": 990.0776977539062, + "learning_rate": 3.846634993227113e-05, + "loss": 128.1203, + "step": 53230 + }, + { + "epoch": 0.2150963368172691, + "grad_norm": 693.29248046875, + "learning_rate": 3.846527731726042e-05, + "loss": 78.3947, + "step": 53240 + }, + { + "epoch": 0.21513673808263675, + "grad_norm": 859.3899536132812, + "learning_rate": 3.846420434225962e-05, + "loss": 85.8838, + "step": 53250 + }, + { + "epoch": 0.2151771393480044, + "grad_norm": 1442.988525390625, + "learning_rate": 3.846313100728965e-05, + "loss": 128.3282, + "step": 53260 + }, + { + "epoch": 0.215217540613372, + "grad_norm": 1069.2225341796875, + "learning_rate": 3.846205731237144e-05, + "loss": 78.2289, + "step": 53270 + }, + { + "epoch": 0.21525794187873964, + "grad_norm": 727.5737915039062, + "learning_rate": 3.846098325752592e-05, + "loss": 97.7968, + "step": 53280 + }, + { + "epoch": 0.21529834314410728, + "grad_norm": 701.9346313476562, + "learning_rate": 3.845990884277402e-05, + "loss": 102.3193, + "step": 53290 + }, + { + "epoch": 0.2153387444094749, + "grad_norm": 766.0593872070312, + "learning_rate": 3.8458834068136704e-05, + "loss": 90.3599, + "step": 53300 + }, + { + "epoch": 0.21537914567484254, + "grad_norm": 3409.10009765625, + "learning_rate": 3.8457758933634905e-05, + "loss": 165.382, + "step": 53310 + }, + { + "epoch": 0.21541954694021018, + "grad_norm": 462.9276428222656, + "learning_rate": 3.84566834392896e-05, + "loss": 84.2569, + "step": 53320 + }, + { + "epoch": 0.2154599482055778, + "grad_norm": 705.4737548828125, + "learning_rate": 3.8455607585121745e-05, + "loss": 121.436, + "step": 53330 + }, + { + "epoch": 0.21550034947094543, + "grad_norm": 531.6485595703125, + "learning_rate": 3.8454531371152315e-05, + "loss": 81.8277, + "step": 53340 + }, + { + "epoch": 0.21554075073631307, + "grad_norm": 928.2781372070312, + "learning_rate": 3.84534547974023e-05, + "loss": 145.5113, + "step": 53350 + }, + { + "epoch": 0.21558115200168068, + "grad_norm": 1030.874267578125, + "learning_rate": 3.8452377863892686e-05, + "loss": 88.9576, + "step": 53360 + }, + { + "epoch": 0.21562155326704832, + "grad_norm": 1026.483154296875, + "learning_rate": 3.845130057064446e-05, + "loss": 96.9796, + "step": 53370 + }, + { + "epoch": 0.21566195453241596, + "grad_norm": 620.7993774414062, + "learning_rate": 3.8450222917678626e-05, + "loss": 105.1409, + "step": 53380 + }, + { + "epoch": 0.21570235579778357, + "grad_norm": 651.0725708007812, + "learning_rate": 3.84491449050162e-05, + "loss": 114.6284, + "step": 53390 + }, + { + "epoch": 0.2157427570631512, + "grad_norm": 430.5691833496094, + "learning_rate": 3.8448066532678206e-05, + "loss": 91.8052, + "step": 53400 + }, + { + "epoch": 0.21578315832851885, + "grad_norm": 1831.330078125, + "learning_rate": 3.8446987800685646e-05, + "loss": 163.5094, + "step": 53410 + }, + { + "epoch": 0.2158235595938865, + "grad_norm": 844.5257568359375, + "learning_rate": 3.844590870905957e-05, + "loss": 113.5751, + "step": 53420 + }, + { + "epoch": 0.2158639608592541, + "grad_norm": 971.8028564453125, + "learning_rate": 3.8444829257821e-05, + "loss": 137.6763, + "step": 53430 + }, + { + "epoch": 0.21590436212462175, + "grad_norm": 986.920654296875, + "learning_rate": 3.844374944699099e-05, + "loss": 69.5061, + "step": 53440 + }, + { + "epoch": 0.21594476338998939, + "grad_norm": 359.1644592285156, + "learning_rate": 3.844266927659059e-05, + "loss": 100.7511, + "step": 53450 + }, + { + "epoch": 0.215985164655357, + "grad_norm": 505.7735595703125, + "learning_rate": 3.8441588746640856e-05, + "loss": 74.6089, + "step": 53460 + }, + { + "epoch": 0.21602556592072464, + "grad_norm": 693.46240234375, + "learning_rate": 3.8440507857162856e-05, + "loss": 82.2816, + "step": 53470 + }, + { + "epoch": 0.21606596718609228, + "grad_norm": 634.1375732421875, + "learning_rate": 3.843942660817767e-05, + "loss": 98.636, + "step": 53480 + }, + { + "epoch": 0.2161063684514599, + "grad_norm": 1153.658203125, + "learning_rate": 3.8438344999706355e-05, + "loss": 124.9, + "step": 53490 + }, + { + "epoch": 0.21614676971682753, + "grad_norm": 590.3118896484375, + "learning_rate": 3.8437263031770015e-05, + "loss": 86.0672, + "step": 53500 + }, + { + "epoch": 0.21618717098219517, + "grad_norm": 701.5838623046875, + "learning_rate": 3.843618070438974e-05, + "loss": 120.0804, + "step": 53510 + }, + { + "epoch": 0.21622757224756278, + "grad_norm": 658.0077514648438, + "learning_rate": 3.8435098017586635e-05, + "loss": 83.5979, + "step": 53520 + }, + { + "epoch": 0.21626797351293042, + "grad_norm": 508.51666259765625, + "learning_rate": 3.84340149713818e-05, + "loss": 125.0046, + "step": 53530 + }, + { + "epoch": 0.21630837477829806, + "grad_norm": 795.531982421875, + "learning_rate": 3.843293156579635e-05, + "loss": 106.2928, + "step": 53540 + }, + { + "epoch": 0.21634877604366567, + "grad_norm": 629.8718872070312, + "learning_rate": 3.843184780085142e-05, + "loss": 115.3543, + "step": 53550 + }, + { + "epoch": 0.21638917730903331, + "grad_norm": 919.7099609375, + "learning_rate": 3.843076367656811e-05, + "loss": 110.5907, + "step": 53560 + }, + { + "epoch": 0.21642957857440095, + "grad_norm": 663.2372436523438, + "learning_rate": 3.842967919296759e-05, + "loss": 78.242, + "step": 53570 + }, + { + "epoch": 0.2164699798397686, + "grad_norm": 639.2510986328125, + "learning_rate": 3.842859435007098e-05, + "loss": 84.1427, + "step": 53580 + }, + { + "epoch": 0.2165103811051362, + "grad_norm": 713.604248046875, + "learning_rate": 3.8427509147899436e-05, + "loss": 108.712, + "step": 53590 + }, + { + "epoch": 0.21655078237050385, + "grad_norm": 895.99755859375, + "learning_rate": 3.842642358647411e-05, + "loss": 96.2259, + "step": 53600 + }, + { + "epoch": 0.2165911836358715, + "grad_norm": 1623.079833984375, + "learning_rate": 3.842533766581618e-05, + "loss": 113.5395, + "step": 53610 + }, + { + "epoch": 0.2166315849012391, + "grad_norm": 479.018798828125, + "learning_rate": 3.842425138594679e-05, + "loss": 86.2147, + "step": 53620 + }, + { + "epoch": 0.21667198616660674, + "grad_norm": 746.0004272460938, + "learning_rate": 3.842316474688715e-05, + "loss": 198.0614, + "step": 53630 + }, + { + "epoch": 0.21671238743197438, + "grad_norm": 795.5638427734375, + "learning_rate": 3.8422077748658415e-05, + "loss": 65.3402, + "step": 53640 + }, + { + "epoch": 0.216752788697342, + "grad_norm": 666.8555908203125, + "learning_rate": 3.842099039128179e-05, + "loss": 98.3733, + "step": 53650 + }, + { + "epoch": 0.21679318996270963, + "grad_norm": 1627.993896484375, + "learning_rate": 3.8419902674778484e-05, + "loss": 114.0325, + "step": 53660 + }, + { + "epoch": 0.21683359122807727, + "grad_norm": 783.4403076171875, + "learning_rate": 3.8418814599169684e-05, + "loss": 102.4001, + "step": 53670 + }, + { + "epoch": 0.21687399249344488, + "grad_norm": 868.920654296875, + "learning_rate": 3.841772616447662e-05, + "loss": 89.4975, + "step": 53680 + }, + { + "epoch": 0.21691439375881252, + "grad_norm": 866.7809448242188, + "learning_rate": 3.84166373707205e-05, + "loss": 82.5924, + "step": 53690 + }, + { + "epoch": 0.21695479502418016, + "grad_norm": 902.5274047851562, + "learning_rate": 3.8415548217922546e-05, + "loss": 127.5404, + "step": 53700 + }, + { + "epoch": 0.21699519628954778, + "grad_norm": 856.0499877929688, + "learning_rate": 3.8414458706104e-05, + "loss": 107.8265, + "step": 53710 + }, + { + "epoch": 0.21703559755491542, + "grad_norm": 1142.2034912109375, + "learning_rate": 3.8413368835286104e-05, + "loss": 138.4438, + "step": 53720 + }, + { + "epoch": 0.21707599882028306, + "grad_norm": 1046.224609375, + "learning_rate": 3.84122786054901e-05, + "loss": 123.6185, + "step": 53730 + }, + { + "epoch": 0.2171164000856507, + "grad_norm": 754.1229858398438, + "learning_rate": 3.8411188016737245e-05, + "loss": 111.4583, + "step": 53740 + }, + { + "epoch": 0.2171568013510183, + "grad_norm": 1016.8790893554688, + "learning_rate": 3.841009706904881e-05, + "loss": 88.5139, + "step": 53750 + }, + { + "epoch": 0.21719720261638595, + "grad_norm": 631.764892578125, + "learning_rate": 3.8409005762446046e-05, + "loss": 81.1774, + "step": 53760 + }, + { + "epoch": 0.2172376038817536, + "grad_norm": 1074.966552734375, + "learning_rate": 3.840791409695024e-05, + "loss": 117.7015, + "step": 53770 + }, + { + "epoch": 0.2172780051471212, + "grad_norm": 943.3117065429688, + "learning_rate": 3.840682207258267e-05, + "loss": 87.8956, + "step": 53780 + }, + { + "epoch": 0.21731840641248884, + "grad_norm": 851.1962280273438, + "learning_rate": 3.8405729689364625e-05, + "loss": 115.5596, + "step": 53790 + }, + { + "epoch": 0.21735880767785648, + "grad_norm": 1076.39892578125, + "learning_rate": 3.840463694731741e-05, + "loss": 114.9321, + "step": 53800 + }, + { + "epoch": 0.2173992089432241, + "grad_norm": 595.8029174804688, + "learning_rate": 3.840354384646232e-05, + "loss": 138.3591, + "step": 53810 + }, + { + "epoch": 0.21743961020859173, + "grad_norm": 900.909912109375, + "learning_rate": 3.840245038682067e-05, + "loss": 81.8543, + "step": 53820 + }, + { + "epoch": 0.21748001147395937, + "grad_norm": 602.6289672851562, + "learning_rate": 3.8401356568413766e-05, + "loss": 112.4752, + "step": 53830 + }, + { + "epoch": 0.21752041273932698, + "grad_norm": 950.2291870117188, + "learning_rate": 3.840026239126295e-05, + "loss": 187.1066, + "step": 53840 + }, + { + "epoch": 0.21756081400469462, + "grad_norm": 638.8729858398438, + "learning_rate": 3.839916785538955e-05, + "loss": 148.2975, + "step": 53850 + }, + { + "epoch": 0.21760121527006226, + "grad_norm": 518.1243286132812, + "learning_rate": 3.839807296081489e-05, + "loss": 66.4836, + "step": 53860 + }, + { + "epoch": 0.21764161653542988, + "grad_norm": 522.3984985351562, + "learning_rate": 3.839697770756033e-05, + "loss": 57.4947, + "step": 53870 + }, + { + "epoch": 0.21768201780079752, + "grad_norm": 1058.81201171875, + "learning_rate": 3.839588209564721e-05, + "loss": 115.2527, + "step": 53880 + }, + { + "epoch": 0.21772241906616516, + "grad_norm": 382.9750671386719, + "learning_rate": 3.839478612509691e-05, + "loss": 86.6743, + "step": 53890 + }, + { + "epoch": 0.2177628203315328, + "grad_norm": 1144.360595703125, + "learning_rate": 3.839368979593079e-05, + "loss": 102.4853, + "step": 53900 + }, + { + "epoch": 0.2178032215969004, + "grad_norm": 610.3412475585938, + "learning_rate": 3.83925931081702e-05, + "loss": 59.0362, + "step": 53910 + }, + { + "epoch": 0.21784362286226805, + "grad_norm": 752.2454833984375, + "learning_rate": 3.8391496061836545e-05, + "loss": 84.6537, + "step": 53920 + }, + { + "epoch": 0.2178840241276357, + "grad_norm": 628.65234375, + "learning_rate": 3.8390398656951204e-05, + "loss": 84.1721, + "step": 53930 + }, + { + "epoch": 0.2179244253930033, + "grad_norm": 3009.22900390625, + "learning_rate": 3.838930089353558e-05, + "loss": 129.9984, + "step": 53940 + }, + { + "epoch": 0.21796482665837094, + "grad_norm": 480.97430419921875, + "learning_rate": 3.838820277161106e-05, + "loss": 79.4925, + "step": 53950 + }, + { + "epoch": 0.21800522792373858, + "grad_norm": 751.2764892578125, + "learning_rate": 3.8387104291199066e-05, + "loss": 71.9811, + "step": 53960 + }, + { + "epoch": 0.2180456291891062, + "grad_norm": 452.3200378417969, + "learning_rate": 3.8386005452321e-05, + "loss": 74.7782, + "step": 53970 + }, + { + "epoch": 0.21808603045447383, + "grad_norm": 756.34521484375, + "learning_rate": 3.83849062549983e-05, + "loss": 122.5561, + "step": 53980 + }, + { + "epoch": 0.21812643171984147, + "grad_norm": 1283.9202880859375, + "learning_rate": 3.838380669925238e-05, + "loss": 85.6037, + "step": 53990 + }, + { + "epoch": 0.21816683298520909, + "grad_norm": 975.7578125, + "learning_rate": 3.838270678510469e-05, + "loss": 81.059, + "step": 54000 + }, + { + "epoch": 0.21820723425057673, + "grad_norm": 889.5814208984375, + "learning_rate": 3.8381606512576664e-05, + "loss": 119.0333, + "step": 54010 + }, + { + "epoch": 0.21824763551594437, + "grad_norm": 766.3934936523438, + "learning_rate": 3.838050588168976e-05, + "loss": 102.7243, + "step": 54020 + }, + { + "epoch": 0.21828803678131198, + "grad_norm": 1146.227783203125, + "learning_rate": 3.837940489246543e-05, + "loss": 87.2841, + "step": 54030 + }, + { + "epoch": 0.21832843804667962, + "grad_norm": 1154.3778076171875, + "learning_rate": 3.8378303544925134e-05, + "loss": 80.3675, + "step": 54040 + }, + { + "epoch": 0.21836883931204726, + "grad_norm": 719.3204956054688, + "learning_rate": 3.837720183909035e-05, + "loss": 123.2549, + "step": 54050 + }, + { + "epoch": 0.2184092405774149, + "grad_norm": 576.6984252929688, + "learning_rate": 3.8376099774982553e-05, + "loss": 110.8605, + "step": 54060 + }, + { + "epoch": 0.2184496418427825, + "grad_norm": 760.093505859375, + "learning_rate": 3.8374997352623234e-05, + "loss": 102.9069, + "step": 54070 + }, + { + "epoch": 0.21849004310815015, + "grad_norm": 770.3103637695312, + "learning_rate": 3.837389457203388e-05, + "loss": 99.087, + "step": 54080 + }, + { + "epoch": 0.2185304443735178, + "grad_norm": 970.0531616210938, + "learning_rate": 3.8372791433236e-05, + "loss": 121.4278, + "step": 54090 + }, + { + "epoch": 0.2185708456388854, + "grad_norm": 815.7464599609375, + "learning_rate": 3.8371687936251085e-05, + "loss": 92.3814, + "step": 54100 + }, + { + "epoch": 0.21861124690425304, + "grad_norm": 583.1131591796875, + "learning_rate": 3.837058408110066e-05, + "loss": 115.1073, + "step": 54110 + }, + { + "epoch": 0.21865164816962068, + "grad_norm": 487.0845031738281, + "learning_rate": 3.836947986780623e-05, + "loss": 189.3706, + "step": 54120 + }, + { + "epoch": 0.2186920494349883, + "grad_norm": 1206.000244140625, + "learning_rate": 3.836837529638934e-05, + "loss": 148.6398, + "step": 54130 + }, + { + "epoch": 0.21873245070035593, + "grad_norm": 633.65185546875, + "learning_rate": 3.836727036687151e-05, + "loss": 97.7975, + "step": 54140 + }, + { + "epoch": 0.21877285196572357, + "grad_norm": 667.3596801757812, + "learning_rate": 3.83661650792743e-05, + "loss": 104.047, + "step": 54150 + }, + { + "epoch": 0.2188132532310912, + "grad_norm": 983.1024169921875, + "learning_rate": 3.8365059433619236e-05, + "loss": 88.5237, + "step": 54160 + }, + { + "epoch": 0.21885365449645883, + "grad_norm": 635.7048950195312, + "learning_rate": 3.836395342992789e-05, + "loss": 76.7203, + "step": 54170 + }, + { + "epoch": 0.21889405576182647, + "grad_norm": 508.2273254394531, + "learning_rate": 3.8362847068221816e-05, + "loss": 100.3717, + "step": 54180 + }, + { + "epoch": 0.21893445702719408, + "grad_norm": 949.3069458007812, + "learning_rate": 3.836174034852258e-05, + "loss": 120.2033, + "step": 54190 + }, + { + "epoch": 0.21897485829256172, + "grad_norm": 492.22955322265625, + "learning_rate": 3.8360633270851765e-05, + "loss": 83.3284, + "step": 54200 + }, + { + "epoch": 0.21901525955792936, + "grad_norm": 761.6959838867188, + "learning_rate": 3.835952583523095e-05, + "loss": 89.2281, + "step": 54210 + }, + { + "epoch": 0.219055660823297, + "grad_norm": 1410.0286865234375, + "learning_rate": 3.835841804168174e-05, + "loss": 93.5119, + "step": 54220 + }, + { + "epoch": 0.2190960620886646, + "grad_norm": 1807.4326171875, + "learning_rate": 3.8357309890225696e-05, + "loss": 130.1427, + "step": 54230 + }, + { + "epoch": 0.21913646335403225, + "grad_norm": 512.2711181640625, + "learning_rate": 3.8356201380884465e-05, + "loss": 82.8554, + "step": 54240 + }, + { + "epoch": 0.2191768646193999, + "grad_norm": 1023.811279296875, + "learning_rate": 3.835509251367963e-05, + "loss": 137.6365, + "step": 54250 + }, + { + "epoch": 0.2192172658847675, + "grad_norm": 1408.8125, + "learning_rate": 3.835398328863281e-05, + "loss": 96.5144, + "step": 54260 + }, + { + "epoch": 0.21925766715013514, + "grad_norm": 952.6309814453125, + "learning_rate": 3.835287370576564e-05, + "loss": 102.7014, + "step": 54270 + }, + { + "epoch": 0.21929806841550278, + "grad_norm": 663.3465576171875, + "learning_rate": 3.8351763765099755e-05, + "loss": 127.2183, + "step": 54280 + }, + { + "epoch": 0.2193384696808704, + "grad_norm": 544.1553955078125, + "learning_rate": 3.835065346665679e-05, + "loss": 102.8417, + "step": 54290 + }, + { + "epoch": 0.21937887094623804, + "grad_norm": 1049.561279296875, + "learning_rate": 3.8349542810458374e-05, + "loss": 83.7194, + "step": 54300 + }, + { + "epoch": 0.21941927221160568, + "grad_norm": 308.5175476074219, + "learning_rate": 3.834843179652618e-05, + "loss": 98.6339, + "step": 54310 + }, + { + "epoch": 0.2194596734769733, + "grad_norm": 854.31396484375, + "learning_rate": 3.834732042488186e-05, + "loss": 72.7972, + "step": 54320 + }, + { + "epoch": 0.21950007474234093, + "grad_norm": 753.4955444335938, + "learning_rate": 3.834620869554709e-05, + "loss": 91.7339, + "step": 54330 + }, + { + "epoch": 0.21954047600770857, + "grad_norm": 971.181884765625, + "learning_rate": 3.834509660854353e-05, + "loss": 86.6772, + "step": 54340 + }, + { + "epoch": 0.21958087727307618, + "grad_norm": 928.732177734375, + "learning_rate": 3.834398416389287e-05, + "loss": 92.5086, + "step": 54350 + }, + { + "epoch": 0.21962127853844382, + "grad_norm": 932.6981811523438, + "learning_rate": 3.834287136161679e-05, + "loss": 121.2294, + "step": 54360 + }, + { + "epoch": 0.21966167980381146, + "grad_norm": 645.329345703125, + "learning_rate": 3.8341758201736995e-05, + "loss": 107.3622, + "step": 54370 + }, + { + "epoch": 0.2197020810691791, + "grad_norm": 760.1530151367188, + "learning_rate": 3.8340644684275184e-05, + "loss": 130.2834, + "step": 54380 + }, + { + "epoch": 0.2197424823345467, + "grad_norm": 843.722900390625, + "learning_rate": 3.833953080925305e-05, + "loss": 59.3238, + "step": 54390 + }, + { + "epoch": 0.21978288359991435, + "grad_norm": 586.7412109375, + "learning_rate": 3.8338416576692335e-05, + "loss": 71.9199, + "step": 54400 + }, + { + "epoch": 0.219823284865282, + "grad_norm": 1171.0694580078125, + "learning_rate": 3.833730198661473e-05, + "loss": 120.2158, + "step": 54410 + }, + { + "epoch": 0.2198636861306496, + "grad_norm": 1557.15283203125, + "learning_rate": 3.8336187039042e-05, + "loss": 122.4784, + "step": 54420 + }, + { + "epoch": 0.21990408739601724, + "grad_norm": 1529.3203125, + "learning_rate": 3.833507173399586e-05, + "loss": 101.9275, + "step": 54430 + }, + { + "epoch": 0.21994448866138488, + "grad_norm": 501.3184814453125, + "learning_rate": 3.8333956071498047e-05, + "loss": 108.6041, + "step": 54440 + }, + { + "epoch": 0.2199848899267525, + "grad_norm": 1145.056884765625, + "learning_rate": 3.833284005157033e-05, + "loss": 90.1059, + "step": 54450 + }, + { + "epoch": 0.22002529119212014, + "grad_norm": 813.9520263671875, + "learning_rate": 3.833172367423445e-05, + "loss": 109.6603, + "step": 54460 + }, + { + "epoch": 0.22006569245748778, + "grad_norm": 680.5341186523438, + "learning_rate": 3.833060693951219e-05, + "loss": 72.7033, + "step": 54470 + }, + { + "epoch": 0.2201060937228554, + "grad_norm": 866.5415649414062, + "learning_rate": 3.83294898474253e-05, + "loss": 116.4384, + "step": 54480 + }, + { + "epoch": 0.22014649498822303, + "grad_norm": 2035.0565185546875, + "learning_rate": 3.832837239799558e-05, + "loss": 197.5343, + "step": 54490 + }, + { + "epoch": 0.22018689625359067, + "grad_norm": 581.3812255859375, + "learning_rate": 3.8327254591244795e-05, + "loss": 91.3056, + "step": 54500 + }, + { + "epoch": 0.22022729751895828, + "grad_norm": 484.7955627441406, + "learning_rate": 3.832613642719475e-05, + "loss": 53.4161, + "step": 54510 + }, + { + "epoch": 0.22026769878432592, + "grad_norm": 1182.396240234375, + "learning_rate": 3.832501790586724e-05, + "loss": 119.244, + "step": 54520 + }, + { + "epoch": 0.22030810004969356, + "grad_norm": 994.3511962890625, + "learning_rate": 3.832389902728407e-05, + "loss": 134.9546, + "step": 54530 + }, + { + "epoch": 0.2203485013150612, + "grad_norm": 476.15716552734375, + "learning_rate": 3.832277979146706e-05, + "loss": 60.2438, + "step": 54540 + }, + { + "epoch": 0.2203889025804288, + "grad_norm": 1074.7857666015625, + "learning_rate": 3.8321660198438013e-05, + "loss": 113.015, + "step": 54550 + }, + { + "epoch": 0.22042930384579645, + "grad_norm": 1133.215087890625, + "learning_rate": 3.832054024821877e-05, + "loss": 133.5108, + "step": 54560 + }, + { + "epoch": 0.2204697051111641, + "grad_norm": 1013.3253173828125, + "learning_rate": 3.831941994083118e-05, + "loss": 77.2305, + "step": 54570 + }, + { + "epoch": 0.2205101063765317, + "grad_norm": 508.3328552246094, + "learning_rate": 3.831829927629705e-05, + "loss": 97.8195, + "step": 54580 + }, + { + "epoch": 0.22055050764189935, + "grad_norm": 950.2012939453125, + "learning_rate": 3.831717825463825e-05, + "loss": 150.5083, + "step": 54590 + }, + { + "epoch": 0.22059090890726699, + "grad_norm": 839.1326293945312, + "learning_rate": 3.831605687587663e-05, + "loss": 139.3696, + "step": 54600 + }, + { + "epoch": 0.2206313101726346, + "grad_norm": 758.6932983398438, + "learning_rate": 3.831493514003405e-05, + "loss": 75.044, + "step": 54610 + }, + { + "epoch": 0.22067171143800224, + "grad_norm": 1253.0968017578125, + "learning_rate": 3.8313813047132384e-05, + "loss": 100.2923, + "step": 54620 + }, + { + "epoch": 0.22071211270336988, + "grad_norm": 1439.7862548828125, + "learning_rate": 3.8312690597193494e-05, + "loss": 101.8751, + "step": 54630 + }, + { + "epoch": 0.2207525139687375, + "grad_norm": 1169.7767333984375, + "learning_rate": 3.8311567790239284e-05, + "loss": 133.2136, + "step": 54640 + }, + { + "epoch": 0.22079291523410513, + "grad_norm": 413.7186584472656, + "learning_rate": 3.831044462629163e-05, + "loss": 115.3029, + "step": 54650 + }, + { + "epoch": 0.22083331649947277, + "grad_norm": 905.2556762695312, + "learning_rate": 3.830932110537242e-05, + "loss": 109.2695, + "step": 54660 + }, + { + "epoch": 0.22087371776484038, + "grad_norm": 631.7772827148438, + "learning_rate": 3.830819722750358e-05, + "loss": 153.5299, + "step": 54670 + }, + { + "epoch": 0.22091411903020802, + "grad_norm": 1027.359619140625, + "learning_rate": 3.830707299270701e-05, + "loss": 91.3771, + "step": 54680 + }, + { + "epoch": 0.22095452029557566, + "grad_norm": 810.7000732421875, + "learning_rate": 3.830594840100463e-05, + "loss": 96.6715, + "step": 54690 + }, + { + "epoch": 0.22099492156094327, + "grad_norm": 972.98291015625, + "learning_rate": 3.830482345241835e-05, + "loss": 82.1826, + "step": 54700 + }, + { + "epoch": 0.22103532282631091, + "grad_norm": 1382.8974609375, + "learning_rate": 3.830369814697013e-05, + "loss": 126.1696, + "step": 54710 + }, + { + "epoch": 0.22107572409167855, + "grad_norm": 833.8832397460938, + "learning_rate": 3.830257248468187e-05, + "loss": 94.7859, + "step": 54720 + }, + { + "epoch": 0.2211161253570462, + "grad_norm": 1924.7059326171875, + "learning_rate": 3.8301446465575554e-05, + "loss": 106.4743, + "step": 54730 + }, + { + "epoch": 0.2211565266224138, + "grad_norm": 348.6738586425781, + "learning_rate": 3.8300320089673106e-05, + "loss": 71.0374, + "step": 54740 + }, + { + "epoch": 0.22119692788778145, + "grad_norm": 636.140380859375, + "learning_rate": 3.82991933569965e-05, + "loss": 90.2757, + "step": 54750 + }, + { + "epoch": 0.2212373291531491, + "grad_norm": 1410.787841796875, + "learning_rate": 3.82980662675677e-05, + "loss": 151.8761, + "step": 54760 + }, + { + "epoch": 0.2212777304185167, + "grad_norm": 975.005859375, + "learning_rate": 3.829693882140867e-05, + "loss": 127.304, + "step": 54770 + }, + { + "epoch": 0.22131813168388434, + "grad_norm": 747.5069580078125, + "learning_rate": 3.8295811018541406e-05, + "loss": 72.7137, + "step": 54780 + }, + { + "epoch": 0.22135853294925198, + "grad_norm": 639.4030151367188, + "learning_rate": 3.829468285898789e-05, + "loss": 105.5334, + "step": 54790 + }, + { + "epoch": 0.2213989342146196, + "grad_norm": 907.65283203125, + "learning_rate": 3.82935543427701e-05, + "loss": 94.1844, + "step": 54800 + }, + { + "epoch": 0.22143933547998723, + "grad_norm": 952.908203125, + "learning_rate": 3.829242546991006e-05, + "loss": 93.5801, + "step": 54810 + }, + { + "epoch": 0.22147973674535487, + "grad_norm": 896.0051879882812, + "learning_rate": 3.8291296240429766e-05, + "loss": 103.2496, + "step": 54820 + }, + { + "epoch": 0.22152013801072248, + "grad_norm": 696.0478515625, + "learning_rate": 3.8290166654351236e-05, + "loss": 76.4237, + "step": 54830 + }, + { + "epoch": 0.22156053927609012, + "grad_norm": 430.4982604980469, + "learning_rate": 3.828903671169649e-05, + "loss": 110.573, + "step": 54840 + }, + { + "epoch": 0.22160094054145776, + "grad_norm": 937.9089965820312, + "learning_rate": 3.8287906412487555e-05, + "loss": 130.462, + "step": 54850 + }, + { + "epoch": 0.22164134180682538, + "grad_norm": 753.573486328125, + "learning_rate": 3.828677575674647e-05, + "loss": 78.0791, + "step": 54860 + }, + { + "epoch": 0.22168174307219302, + "grad_norm": 1420.5155029296875, + "learning_rate": 3.828564474449527e-05, + "loss": 115.7697, + "step": 54870 + }, + { + "epoch": 0.22172214433756066, + "grad_norm": 594.8196411132812, + "learning_rate": 3.8284513375756024e-05, + "loss": 73.9741, + "step": 54880 + }, + { + "epoch": 0.2217625456029283, + "grad_norm": 635.4664306640625, + "learning_rate": 3.828338165055077e-05, + "loss": 112.5728, + "step": 54890 + }, + { + "epoch": 0.2218029468682959, + "grad_norm": 1149.6103515625, + "learning_rate": 3.828224956890158e-05, + "loss": 98.5603, + "step": 54900 + }, + { + "epoch": 0.22184334813366355, + "grad_norm": 1111.482177734375, + "learning_rate": 3.828111713083052e-05, + "loss": 90.5046, + "step": 54910 + }, + { + "epoch": 0.2218837493990312, + "grad_norm": 777.1270141601562, + "learning_rate": 3.827998433635967e-05, + "loss": 89.8713, + "step": 54920 + }, + { + "epoch": 0.2219241506643988, + "grad_norm": 1125.58935546875, + "learning_rate": 3.8278851185511115e-05, + "loss": 145.5318, + "step": 54930 + }, + { + "epoch": 0.22196455192976644, + "grad_norm": 926.93603515625, + "learning_rate": 3.8277717678306946e-05, + "loss": 127.1147, + "step": 54940 + }, + { + "epoch": 0.22200495319513408, + "grad_norm": 829.6301879882812, + "learning_rate": 3.8276583814769265e-05, + "loss": 67.1728, + "step": 54950 + }, + { + "epoch": 0.2220453544605017, + "grad_norm": 640.4507446289062, + "learning_rate": 3.827544959492017e-05, + "loss": 100.9623, + "step": 54960 + }, + { + "epoch": 0.22208575572586933, + "grad_norm": 977.3050537109375, + "learning_rate": 3.827431501878177e-05, + "loss": 117.248, + "step": 54970 + }, + { + "epoch": 0.22212615699123697, + "grad_norm": 788.4990234375, + "learning_rate": 3.8273180086376195e-05, + "loss": 120.7264, + "step": 54980 + }, + { + "epoch": 0.22216655825660458, + "grad_norm": 1648.5985107421875, + "learning_rate": 3.827204479772557e-05, + "loss": 73.7668, + "step": 54990 + }, + { + "epoch": 0.22220695952197222, + "grad_norm": 1311.59375, + "learning_rate": 3.827090915285202e-05, + "loss": 172.5847, + "step": 55000 + }, + { + "epoch": 0.22224736078733986, + "grad_norm": 1032.01611328125, + "learning_rate": 3.826977315177769e-05, + "loss": 72.0989, + "step": 55010 + }, + { + "epoch": 0.22228776205270748, + "grad_norm": 1169.3946533203125, + "learning_rate": 3.8268636794524724e-05, + "loss": 113.9003, + "step": 55020 + }, + { + "epoch": 0.22232816331807512, + "grad_norm": 1220.8397216796875, + "learning_rate": 3.826750008111529e-05, + "loss": 178.7197, + "step": 55030 + }, + { + "epoch": 0.22236856458344276, + "grad_norm": 690.1135864257812, + "learning_rate": 3.826636301157152e-05, + "loss": 85.4977, + "step": 55040 + }, + { + "epoch": 0.2224089658488104, + "grad_norm": 1117.2713623046875, + "learning_rate": 3.8265225585915616e-05, + "loss": 88.4073, + "step": 55050 + }, + { + "epoch": 0.222449367114178, + "grad_norm": 2682.635498046875, + "learning_rate": 3.826408780416973e-05, + "loss": 107.6712, + "step": 55060 + }, + { + "epoch": 0.22248976837954565, + "grad_norm": 603.7355346679688, + "learning_rate": 3.8262949666356046e-05, + "loss": 60.4101, + "step": 55070 + }, + { + "epoch": 0.2225301696449133, + "grad_norm": 650.5640258789062, + "learning_rate": 3.826181117249676e-05, + "loss": 79.3109, + "step": 55080 + }, + { + "epoch": 0.2225705709102809, + "grad_norm": 810.7201538085938, + "learning_rate": 3.8260672322614054e-05, + "loss": 109.1662, + "step": 55090 + }, + { + "epoch": 0.22261097217564854, + "grad_norm": 905.609619140625, + "learning_rate": 3.825953311673015e-05, + "loss": 107.925, + "step": 55100 + }, + { + "epoch": 0.22265137344101618, + "grad_norm": 619.0980224609375, + "learning_rate": 3.825839355486724e-05, + "loss": 102.35, + "step": 55110 + }, + { + "epoch": 0.2226917747063838, + "grad_norm": 544.830810546875, + "learning_rate": 3.825725363704755e-05, + "loss": 107.2402, + "step": 55120 + }, + { + "epoch": 0.22273217597175143, + "grad_norm": 1526.8023681640625, + "learning_rate": 3.82561133632933e-05, + "loss": 114.0663, + "step": 55130 + }, + { + "epoch": 0.22277257723711907, + "grad_norm": 707.8275756835938, + "learning_rate": 3.825497273362673e-05, + "loss": 91.076, + "step": 55140 + }, + { + "epoch": 0.22281297850248669, + "grad_norm": 747.1464233398438, + "learning_rate": 3.825383174807006e-05, + "loss": 116.8466, + "step": 55150 + }, + { + "epoch": 0.22285337976785433, + "grad_norm": 937.8566284179688, + "learning_rate": 3.825269040664554e-05, + "loss": 162.3864, + "step": 55160 + }, + { + "epoch": 0.22289378103322197, + "grad_norm": 515.2022705078125, + "learning_rate": 3.825154870937543e-05, + "loss": 81.0418, + "step": 55170 + }, + { + "epoch": 0.22293418229858958, + "grad_norm": 1711.75634765625, + "learning_rate": 3.825040665628198e-05, + "loss": 103.2414, + "step": 55180 + }, + { + "epoch": 0.22297458356395722, + "grad_norm": 778.2398681640625, + "learning_rate": 3.824926424738745e-05, + "loss": 141.0379, + "step": 55190 + }, + { + "epoch": 0.22301498482932486, + "grad_norm": 674.637451171875, + "learning_rate": 3.824812148271413e-05, + "loss": 80.6995, + "step": 55200 + }, + { + "epoch": 0.2230553860946925, + "grad_norm": 848.3941040039062, + "learning_rate": 3.824697836228428e-05, + "loss": 89.436, + "step": 55210 + }, + { + "epoch": 0.2230957873600601, + "grad_norm": 545.8349609375, + "learning_rate": 3.824583488612019e-05, + "loss": 90.9351, + "step": 55220 + }, + { + "epoch": 0.22313618862542775, + "grad_norm": 1059.5601806640625, + "learning_rate": 3.824469105424416e-05, + "loss": 100.1431, + "step": 55230 + }, + { + "epoch": 0.2231765898907954, + "grad_norm": 464.3710632324219, + "learning_rate": 3.824354686667848e-05, + "loss": 91.6817, + "step": 55240 + }, + { + "epoch": 0.223216991156163, + "grad_norm": 991.6779174804688, + "learning_rate": 3.8242402323445464e-05, + "loss": 95.8331, + "step": 55250 + }, + { + "epoch": 0.22325739242153064, + "grad_norm": 648.117431640625, + "learning_rate": 3.824125742456742e-05, + "loss": 82.7466, + "step": 55260 + }, + { + "epoch": 0.22329779368689828, + "grad_norm": 570.035888671875, + "learning_rate": 3.824011217006668e-05, + "loss": 129.7278, + "step": 55270 + }, + { + "epoch": 0.2233381949522659, + "grad_norm": 612.2469482421875, + "learning_rate": 3.823896655996556e-05, + "loss": 121.0202, + "step": 55280 + }, + { + "epoch": 0.22337859621763353, + "grad_norm": 824.7863159179688, + "learning_rate": 3.823782059428639e-05, + "loss": 97.3133, + "step": 55290 + }, + { + "epoch": 0.22341899748300117, + "grad_norm": 1290.49169921875, + "learning_rate": 3.823667427305152e-05, + "loss": 113.6475, + "step": 55300 + }, + { + "epoch": 0.2234593987483688, + "grad_norm": 666.7347412109375, + "learning_rate": 3.8235527596283294e-05, + "loss": 100.5432, + "step": 55310 + }, + { + "epoch": 0.22349980001373643, + "grad_norm": 1224.248779296875, + "learning_rate": 3.823438056400408e-05, + "loss": 119.4734, + "step": 55320 + }, + { + "epoch": 0.22354020127910407, + "grad_norm": 675.4926147460938, + "learning_rate": 3.823323317623622e-05, + "loss": 69.7473, + "step": 55330 + }, + { + "epoch": 0.22358060254447168, + "grad_norm": 1385.3265380859375, + "learning_rate": 3.823208543300209e-05, + "loss": 133.8401, + "step": 55340 + }, + { + "epoch": 0.22362100380983932, + "grad_norm": 1751.1173095703125, + "learning_rate": 3.8230937334324075e-05, + "loss": 120.7242, + "step": 55350 + }, + { + "epoch": 0.22366140507520696, + "grad_norm": 1001.9264526367188, + "learning_rate": 3.822978888022455e-05, + "loss": 129.3128, + "step": 55360 + }, + { + "epoch": 0.2237018063405746, + "grad_norm": 597.7722778320312, + "learning_rate": 3.82286400707259e-05, + "loss": 94.234, + "step": 55370 + }, + { + "epoch": 0.2237422076059422, + "grad_norm": 900.214111328125, + "learning_rate": 3.8227490905850534e-05, + "loss": 109.3518, + "step": 55380 + }, + { + "epoch": 0.22378260887130985, + "grad_norm": 660.6973266601562, + "learning_rate": 3.8226341385620845e-05, + "loss": 102.7326, + "step": 55390 + }, + { + "epoch": 0.2238230101366775, + "grad_norm": 721.3206176757812, + "learning_rate": 3.822519151005925e-05, + "loss": 114.6839, + "step": 55400 + }, + { + "epoch": 0.2238634114020451, + "grad_norm": 493.4786376953125, + "learning_rate": 3.822404127918816e-05, + "loss": 120.0313, + "step": 55410 + }, + { + "epoch": 0.22390381266741274, + "grad_norm": 630.815673828125, + "learning_rate": 3.8222890693030007e-05, + "loss": 136.0426, + "step": 55420 + }, + { + "epoch": 0.22394421393278038, + "grad_norm": 439.9546813964844, + "learning_rate": 3.8221739751607205e-05, + "loss": 102.1026, + "step": 55430 + }, + { + "epoch": 0.223984615198148, + "grad_norm": 734.9124145507812, + "learning_rate": 3.822058845494222e-05, + "loss": 85.8745, + "step": 55440 + }, + { + "epoch": 0.22402501646351564, + "grad_norm": 732.6204833984375, + "learning_rate": 3.8219436803057484e-05, + "loss": 105.0916, + "step": 55450 + }, + { + "epoch": 0.22406541772888328, + "grad_norm": 283.3540954589844, + "learning_rate": 3.821828479597543e-05, + "loss": 59.1957, + "step": 55460 + }, + { + "epoch": 0.2241058189942509, + "grad_norm": 431.41839599609375, + "learning_rate": 3.8217132433718556e-05, + "loss": 125.1538, + "step": 55470 + }, + { + "epoch": 0.22414622025961853, + "grad_norm": 682.907958984375, + "learning_rate": 3.821597971630929e-05, + "loss": 106.2006, + "step": 55480 + }, + { + "epoch": 0.22418662152498617, + "grad_norm": 765.4852905273438, + "learning_rate": 3.821482664377013e-05, + "loss": 89.8252, + "step": 55490 + }, + { + "epoch": 0.22422702279035378, + "grad_norm": 4025.2099609375, + "learning_rate": 3.821367321612354e-05, + "loss": 139.9191, + "step": 55500 + }, + { + "epoch": 0.22426742405572142, + "grad_norm": 1293.9072265625, + "learning_rate": 3.821251943339202e-05, + "loss": 108.7704, + "step": 55510 + }, + { + "epoch": 0.22430782532108906, + "grad_norm": 515.4328002929688, + "learning_rate": 3.821136529559806e-05, + "loss": 112.0151, + "step": 55520 + }, + { + "epoch": 0.2243482265864567, + "grad_norm": 703.8489990234375, + "learning_rate": 3.821021080276415e-05, + "loss": 86.5409, + "step": 55530 + }, + { + "epoch": 0.2243886278518243, + "grad_norm": 494.3564147949219, + "learning_rate": 3.8209055954912815e-05, + "loss": 112.1109, + "step": 55540 + }, + { + "epoch": 0.22442902911719195, + "grad_norm": 649.3157958984375, + "learning_rate": 3.820790075206655e-05, + "loss": 127.8423, + "step": 55550 + }, + { + "epoch": 0.2244694303825596, + "grad_norm": 624.2719116210938, + "learning_rate": 3.820674519424789e-05, + "loss": 78.8339, + "step": 55560 + }, + { + "epoch": 0.2245098316479272, + "grad_norm": 921.0108642578125, + "learning_rate": 3.820558928147935e-05, + "loss": 101.9858, + "step": 55570 + }, + { + "epoch": 0.22455023291329484, + "grad_norm": 907.411865234375, + "learning_rate": 3.820443301378348e-05, + "loss": 53.907, + "step": 55580 + }, + { + "epoch": 0.22459063417866248, + "grad_norm": 682.5847778320312, + "learning_rate": 3.820327639118282e-05, + "loss": 119.4029, + "step": 55590 + }, + { + "epoch": 0.2246310354440301, + "grad_norm": 1129.36083984375, + "learning_rate": 3.8202119413699914e-05, + "loss": 72.0605, + "step": 55600 + }, + { + "epoch": 0.22467143670939774, + "grad_norm": 1446.9840087890625, + "learning_rate": 3.820096208135732e-05, + "loss": 128.2112, + "step": 55610 + }, + { + "epoch": 0.22471183797476538, + "grad_norm": 691.3804321289062, + "learning_rate": 3.8199804394177594e-05, + "loss": 70.8323, + "step": 55620 + }, + { + "epoch": 0.224752239240133, + "grad_norm": 705.3590087890625, + "learning_rate": 3.819864635218332e-05, + "loss": 115.2171, + "step": 55630 + }, + { + "epoch": 0.22479264050550063, + "grad_norm": 588.4963989257812, + "learning_rate": 3.819748795539706e-05, + "loss": 118.0868, + "step": 55640 + }, + { + "epoch": 0.22483304177086827, + "grad_norm": 741.12255859375, + "learning_rate": 3.8196329203841404e-05, + "loss": 146.4526, + "step": 55650 + }, + { + "epoch": 0.22487344303623588, + "grad_norm": 832.9857177734375, + "learning_rate": 3.819517009753894e-05, + "loss": 108.9431, + "step": 55660 + }, + { + "epoch": 0.22491384430160352, + "grad_norm": 1198.30078125, + "learning_rate": 3.8194010636512274e-05, + "loss": 85.857, + "step": 55670 + }, + { + "epoch": 0.22495424556697116, + "grad_norm": 443.41448974609375, + "learning_rate": 3.8192850820784e-05, + "loss": 96.9895, + "step": 55680 + }, + { + "epoch": 0.2249946468323388, + "grad_norm": 912.131103515625, + "learning_rate": 3.8191690650376734e-05, + "loss": 139.702, + "step": 55690 + }, + { + "epoch": 0.2250350480977064, + "grad_norm": 1615.9251708984375, + "learning_rate": 3.819053012531309e-05, + "loss": 126.4906, + "step": 55700 + }, + { + "epoch": 0.22507544936307405, + "grad_norm": 1126.037109375, + "learning_rate": 3.8189369245615695e-05, + "loss": 130.0721, + "step": 55710 + }, + { + "epoch": 0.2251158506284417, + "grad_norm": 817.4421997070312, + "learning_rate": 3.818820801130719e-05, + "loss": 101.9801, + "step": 55720 + }, + { + "epoch": 0.2251562518938093, + "grad_norm": 853.7957763671875, + "learning_rate": 3.81870464224102e-05, + "loss": 94.6266, + "step": 55730 + }, + { + "epoch": 0.22519665315917695, + "grad_norm": 703.4712524414062, + "learning_rate": 3.818588447894738e-05, + "loss": 107.9467, + "step": 55740 + }, + { + "epoch": 0.22523705442454459, + "grad_norm": 1948.759521484375, + "learning_rate": 3.8184722180941376e-05, + "loss": 115.7349, + "step": 55750 + }, + { + "epoch": 0.2252774556899122, + "grad_norm": 875.17724609375, + "learning_rate": 3.818355952841485e-05, + "loss": 104.6729, + "step": 55760 + }, + { + "epoch": 0.22531785695527984, + "grad_norm": 539.0001831054688, + "learning_rate": 3.818239652139047e-05, + "loss": 88.5361, + "step": 55770 + }, + { + "epoch": 0.22535825822064748, + "grad_norm": 332.4164123535156, + "learning_rate": 3.818123315989091e-05, + "loss": 122.1441, + "step": 55780 + }, + { + "epoch": 0.2253986594860151, + "grad_norm": 398.97808837890625, + "learning_rate": 3.818006944393885e-05, + "loss": 114.4421, + "step": 55790 + }, + { + "epoch": 0.22543906075138273, + "grad_norm": 480.5595397949219, + "learning_rate": 3.817890537355698e-05, + "loss": 80.7767, + "step": 55800 + }, + { + "epoch": 0.22547946201675037, + "grad_norm": 438.9368591308594, + "learning_rate": 3.817774094876798e-05, + "loss": 85.5584, + "step": 55810 + }, + { + "epoch": 0.22551986328211798, + "grad_norm": 486.41021728515625, + "learning_rate": 3.8176576169594574e-05, + "loss": 114.937, + "step": 55820 + }, + { + "epoch": 0.22556026454748562, + "grad_norm": 820.587890625, + "learning_rate": 3.817541103605945e-05, + "loss": 122.3118, + "step": 55830 + }, + { + "epoch": 0.22560066581285326, + "grad_norm": 898.918212890625, + "learning_rate": 3.817424554818533e-05, + "loss": 80.9702, + "step": 55840 + }, + { + "epoch": 0.2256410670782209, + "grad_norm": 734.275390625, + "learning_rate": 3.817307970599494e-05, + "loss": 87.5143, + "step": 55850 + }, + { + "epoch": 0.22568146834358851, + "grad_norm": 865.2683715820312, + "learning_rate": 3.817191350951101e-05, + "loss": 105.2953, + "step": 55860 + }, + { + "epoch": 0.22572186960895615, + "grad_norm": 1428.0037841796875, + "learning_rate": 3.817074695875626e-05, + "loss": 113.229, + "step": 55870 + }, + { + "epoch": 0.2257622708743238, + "grad_norm": 512.0474243164062, + "learning_rate": 3.816958005375344e-05, + "loss": 128.9295, + "step": 55880 + }, + { + "epoch": 0.2258026721396914, + "grad_norm": 1637.209228515625, + "learning_rate": 3.816841279452532e-05, + "loss": 126.9019, + "step": 55890 + }, + { + "epoch": 0.22584307340505905, + "grad_norm": 1556.1134033203125, + "learning_rate": 3.816724518109463e-05, + "loss": 90.5246, + "step": 55900 + }, + { + "epoch": 0.2258834746704267, + "grad_norm": 514.300537109375, + "learning_rate": 3.8166077213484135e-05, + "loss": 76.1302, + "step": 55910 + }, + { + "epoch": 0.2259238759357943, + "grad_norm": 1368.739990234375, + "learning_rate": 3.816490889171662e-05, + "loss": 94.5712, + "step": 55920 + }, + { + "epoch": 0.22596427720116194, + "grad_norm": 1132.2021484375, + "learning_rate": 3.816374021581485e-05, + "loss": 123.4543, + "step": 55930 + }, + { + "epoch": 0.22600467846652958, + "grad_norm": 552.1500854492188, + "learning_rate": 3.816257118580161e-05, + "loss": 109.8899, + "step": 55940 + }, + { + "epoch": 0.2260450797318972, + "grad_norm": 956.635498046875, + "learning_rate": 3.816140180169971e-05, + "loss": 101.0549, + "step": 55950 + }, + { + "epoch": 0.22608548099726483, + "grad_norm": 709.8731689453125, + "learning_rate": 3.816023206353192e-05, + "loss": 112.3765, + "step": 55960 + }, + { + "epoch": 0.22612588226263247, + "grad_norm": 527.2216186523438, + "learning_rate": 3.815906197132106e-05, + "loss": 76.0223, + "step": 55970 + }, + { + "epoch": 0.22616628352800008, + "grad_norm": 1809.823974609375, + "learning_rate": 3.815789152508993e-05, + "loss": 79.81, + "step": 55980 + }, + { + "epoch": 0.22620668479336772, + "grad_norm": 963.7578125, + "learning_rate": 3.815672072486136e-05, + "loss": 112.547, + "step": 55990 + }, + { + "epoch": 0.22624708605873536, + "grad_norm": 317.4357604980469, + "learning_rate": 3.815554957065818e-05, + "loss": 82.7273, + "step": 56000 + }, + { + "epoch": 0.226287487324103, + "grad_norm": 549.1414794921875, + "learning_rate": 3.8154378062503207e-05, + "loss": 113.8832, + "step": 56010 + }, + { + "epoch": 0.22632788858947062, + "grad_norm": 687.55029296875, + "learning_rate": 3.815320620041929e-05, + "loss": 82.6664, + "step": 56020 + }, + { + "epoch": 0.22636828985483826, + "grad_norm": 1230.7120361328125, + "learning_rate": 3.815203398442927e-05, + "loss": 88.2701, + "step": 56030 + }, + { + "epoch": 0.2264086911202059, + "grad_norm": 1268.81298828125, + "learning_rate": 3.8150861414556e-05, + "loss": 86.3638, + "step": 56040 + }, + { + "epoch": 0.2264490923855735, + "grad_norm": 527.530517578125, + "learning_rate": 3.814968849082234e-05, + "loss": 106.4566, + "step": 56050 + }, + { + "epoch": 0.22648949365094115, + "grad_norm": 1075.1612548828125, + "learning_rate": 3.8148515213251166e-05, + "loss": 115.1653, + "step": 56060 + }, + { + "epoch": 0.2265298949163088, + "grad_norm": 414.3692932128906, + "learning_rate": 3.814734158186534e-05, + "loss": 86.9755, + "step": 56070 + }, + { + "epoch": 0.2265702961816764, + "grad_norm": 699.307373046875, + "learning_rate": 3.8146167596687746e-05, + "loss": 106.0024, + "step": 56080 + }, + { + "epoch": 0.22661069744704404, + "grad_norm": 829.2940673828125, + "learning_rate": 3.814499325774128e-05, + "loss": 98.3182, + "step": 56090 + }, + { + "epoch": 0.22665109871241168, + "grad_norm": 871.7859497070312, + "learning_rate": 3.814381856504882e-05, + "loss": 93.5845, + "step": 56100 + }, + { + "epoch": 0.2266914999777793, + "grad_norm": 730.6956176757812, + "learning_rate": 3.814264351863328e-05, + "loss": 61.4111, + "step": 56110 + }, + { + "epoch": 0.22673190124314693, + "grad_norm": 429.155029296875, + "learning_rate": 3.814146811851757e-05, + "loss": 74.4641, + "step": 56120 + }, + { + "epoch": 0.22677230250851457, + "grad_norm": 847.5466918945312, + "learning_rate": 3.814029236472459e-05, + "loss": 78.5706, + "step": 56130 + }, + { + "epoch": 0.22681270377388218, + "grad_norm": 1300.9559326171875, + "learning_rate": 3.813911625727727e-05, + "loss": 94.0009, + "step": 56140 + }, + { + "epoch": 0.22685310503924982, + "grad_norm": 1070.789306640625, + "learning_rate": 3.813793979619854e-05, + "loss": 70.8665, + "step": 56150 + }, + { + "epoch": 0.22689350630461746, + "grad_norm": 657.74658203125, + "learning_rate": 3.813676298151135e-05, + "loss": 128.043, + "step": 56160 + }, + { + "epoch": 0.2269339075699851, + "grad_norm": 1185.731201171875, + "learning_rate": 3.8135585813238616e-05, + "loss": 129.8655, + "step": 56170 + }, + { + "epoch": 0.22697430883535272, + "grad_norm": 739.6994018554688, + "learning_rate": 3.81344082914033e-05, + "loss": 79.084, + "step": 56180 + }, + { + "epoch": 0.22701471010072036, + "grad_norm": 1271.3480224609375, + "learning_rate": 3.8133230416028355e-05, + "loss": 95.1999, + "step": 56190 + }, + { + "epoch": 0.227055111366088, + "grad_norm": 1311.511962890625, + "learning_rate": 3.813205218713676e-05, + "loss": 140.4144, + "step": 56200 + }, + { + "epoch": 0.2270955126314556, + "grad_norm": 454.7865295410156, + "learning_rate": 3.813087360475146e-05, + "loss": 62.7374, + "step": 56210 + }, + { + "epoch": 0.22713591389682325, + "grad_norm": 1077.718994140625, + "learning_rate": 3.812969466889545e-05, + "loss": 85.4546, + "step": 56220 + }, + { + "epoch": 0.2271763151621909, + "grad_norm": 2175.621826171875, + "learning_rate": 3.812851537959171e-05, + "loss": 100.4553, + "step": 56230 + }, + { + "epoch": 0.2272167164275585, + "grad_norm": 1417.822509765625, + "learning_rate": 3.8127335736863227e-05, + "loss": 105.6238, + "step": 56240 + }, + { + "epoch": 0.22725711769292614, + "grad_norm": 442.9526062011719, + "learning_rate": 3.812615574073301e-05, + "loss": 76.5183, + "step": 56250 + }, + { + "epoch": 0.22729751895829378, + "grad_norm": 1707.21533203125, + "learning_rate": 3.812497539122404e-05, + "loss": 131.6317, + "step": 56260 + }, + { + "epoch": 0.2273379202236614, + "grad_norm": 783.8552856445312, + "learning_rate": 3.812379468835935e-05, + "loss": 80.0517, + "step": 56270 + }, + { + "epoch": 0.22737832148902903, + "grad_norm": 1992.5341796875, + "learning_rate": 3.812261363216195e-05, + "loss": 108.6731, + "step": 56280 + }, + { + "epoch": 0.22741872275439667, + "grad_norm": 679.7243041992188, + "learning_rate": 3.812143222265487e-05, + "loss": 116.8327, + "step": 56290 + }, + { + "epoch": 0.22745912401976429, + "grad_norm": 714.0056762695312, + "learning_rate": 3.8120250459861144e-05, + "loss": 77.7708, + "step": 56300 + }, + { + "epoch": 0.22749952528513193, + "grad_norm": 1190.2813720703125, + "learning_rate": 3.81190683438038e-05, + "loss": 141.524, + "step": 56310 + }, + { + "epoch": 0.22753992655049957, + "grad_norm": 837.7686157226562, + "learning_rate": 3.811788587450589e-05, + "loss": 84.9918, + "step": 56320 + }, + { + "epoch": 0.2275803278158672, + "grad_norm": 535.9450073242188, + "learning_rate": 3.8116703051990464e-05, + "loss": 98.8781, + "step": 56330 + }, + { + "epoch": 0.22762072908123482, + "grad_norm": 1504.6859130859375, + "learning_rate": 3.811551987628059e-05, + "loss": 72.0623, + "step": 56340 + }, + { + "epoch": 0.22766113034660246, + "grad_norm": 750.7572021484375, + "learning_rate": 3.811433634739933e-05, + "loss": 62.2339, + "step": 56350 + }, + { + "epoch": 0.2277015316119701, + "grad_norm": 723.6486206054688, + "learning_rate": 3.8113152465369756e-05, + "loss": 83.9102, + "step": 56360 + }, + { + "epoch": 0.2277419328773377, + "grad_norm": 1265.8897705078125, + "learning_rate": 3.8111968230214945e-05, + "loss": 89.5198, + "step": 56370 + }, + { + "epoch": 0.22778233414270535, + "grad_norm": 859.3117065429688, + "learning_rate": 3.811078364195799e-05, + "loss": 78.4007, + "step": 56380 + }, + { + "epoch": 0.227822735408073, + "grad_norm": 871.0656127929688, + "learning_rate": 3.810959870062199e-05, + "loss": 105.4606, + "step": 56390 + }, + { + "epoch": 0.2278631366734406, + "grad_norm": 872.7322998046875, + "learning_rate": 3.810841340623004e-05, + "loss": 157.768, + "step": 56400 + }, + { + "epoch": 0.22790353793880824, + "grad_norm": 639.68359375, + "learning_rate": 3.810722775880524e-05, + "loss": 87.8456, + "step": 56410 + }, + { + "epoch": 0.22794393920417588, + "grad_norm": 637.336669921875, + "learning_rate": 3.810604175837072e-05, + "loss": 117.3123, + "step": 56420 + }, + { + "epoch": 0.2279843404695435, + "grad_norm": 1182.2080078125, + "learning_rate": 3.8104855404949585e-05, + "loss": 81.7558, + "step": 56430 + }, + { + "epoch": 0.22802474173491113, + "grad_norm": 807.4472045898438, + "learning_rate": 3.810366869856498e-05, + "loss": 118.6587, + "step": 56440 + }, + { + "epoch": 0.22806514300027877, + "grad_norm": 1060.9754638671875, + "learning_rate": 3.8102481639240035e-05, + "loss": 97.9204, + "step": 56450 + }, + { + "epoch": 0.2281055442656464, + "grad_norm": 769.6349487304688, + "learning_rate": 3.810129422699789e-05, + "loss": 126.8758, + "step": 56460 + }, + { + "epoch": 0.22814594553101403, + "grad_norm": 1039.654541015625, + "learning_rate": 3.810010646186169e-05, + "loss": 126.8552, + "step": 56470 + }, + { + "epoch": 0.22818634679638167, + "grad_norm": 1282.518798828125, + "learning_rate": 3.80989183438546e-05, + "loss": 101.5466, + "step": 56480 + }, + { + "epoch": 0.2282267480617493, + "grad_norm": 485.8934631347656, + "learning_rate": 3.809772987299978e-05, + "loss": 157.8158, + "step": 56490 + }, + { + "epoch": 0.22826714932711692, + "grad_norm": 640.9640502929688, + "learning_rate": 3.809654104932039e-05, + "loss": 153.7198, + "step": 56500 + }, + { + "epoch": 0.22830755059248456, + "grad_norm": 544.7276000976562, + "learning_rate": 3.809535187283962e-05, + "loss": 107.4445, + "step": 56510 + }, + { + "epoch": 0.2283479518578522, + "grad_norm": 468.20379638671875, + "learning_rate": 3.8094162343580654e-05, + "loss": 125.4887, + "step": 56520 + }, + { + "epoch": 0.2283883531232198, + "grad_norm": 1096.58203125, + "learning_rate": 3.809297246156668e-05, + "loss": 99.9516, + "step": 56530 + }, + { + "epoch": 0.22842875438858745, + "grad_norm": 573.7517700195312, + "learning_rate": 3.8091782226820886e-05, + "loss": 124.1552, + "step": 56540 + }, + { + "epoch": 0.2284691556539551, + "grad_norm": 758.5760498046875, + "learning_rate": 3.809059163936648e-05, + "loss": 67.5945, + "step": 56550 + }, + { + "epoch": 0.2285095569193227, + "grad_norm": 1296.6275634765625, + "learning_rate": 3.808940069922669e-05, + "loss": 126.3181, + "step": 56560 + }, + { + "epoch": 0.22854995818469034, + "grad_norm": 575.740966796875, + "learning_rate": 3.808820940642471e-05, + "loss": 77.8614, + "step": 56570 + }, + { + "epoch": 0.22859035945005798, + "grad_norm": 624.5924682617188, + "learning_rate": 3.8087017760983774e-05, + "loss": 89.8068, + "step": 56580 + }, + { + "epoch": 0.2286307607154256, + "grad_norm": 650.568359375, + "learning_rate": 3.808582576292712e-05, + "loss": 107.7232, + "step": 56590 + }, + { + "epoch": 0.22867116198079324, + "grad_norm": 1033.136474609375, + "learning_rate": 3.8084633412277974e-05, + "loss": 89.8193, + "step": 56600 + }, + { + "epoch": 0.22871156324616088, + "grad_norm": 761.9685668945312, + "learning_rate": 3.808344070905959e-05, + "loss": 51.0237, + "step": 56610 + }, + { + "epoch": 0.2287519645115285, + "grad_norm": 1210.3616943359375, + "learning_rate": 3.808224765329523e-05, + "loss": 146.9132, + "step": 56620 + }, + { + "epoch": 0.22879236577689613, + "grad_norm": 1028.709228515625, + "learning_rate": 3.808105424500814e-05, + "loss": 158.2257, + "step": 56630 + }, + { + "epoch": 0.22883276704226377, + "grad_norm": 894.2236938476562, + "learning_rate": 3.807986048422158e-05, + "loss": 106.8958, + "step": 56640 + }, + { + "epoch": 0.2288731683076314, + "grad_norm": 1025.10498046875, + "learning_rate": 3.807866637095884e-05, + "loss": 120.4186, + "step": 56650 + }, + { + "epoch": 0.22891356957299902, + "grad_norm": 675.10791015625, + "learning_rate": 3.807747190524318e-05, + "loss": 114.8852, + "step": 56660 + }, + { + "epoch": 0.22895397083836666, + "grad_norm": 693.18212890625, + "learning_rate": 3.8076277087097915e-05, + "loss": 78.5636, + "step": 56670 + }, + { + "epoch": 0.2289943721037343, + "grad_norm": 1115.8829345703125, + "learning_rate": 3.807508191654632e-05, + "loss": 129.0966, + "step": 56680 + }, + { + "epoch": 0.2290347733691019, + "grad_norm": 277.85791015625, + "learning_rate": 3.807388639361168e-05, + "loss": 86.6932, + "step": 56690 + }, + { + "epoch": 0.22907517463446955, + "grad_norm": 1218.150634765625, + "learning_rate": 3.8072690518317334e-05, + "loss": 74.8243, + "step": 56700 + }, + { + "epoch": 0.2291155758998372, + "grad_norm": 805.25146484375, + "learning_rate": 3.807149429068658e-05, + "loss": 88.1177, + "step": 56710 + }, + { + "epoch": 0.2291559771652048, + "grad_norm": 1168.702880859375, + "learning_rate": 3.807029771074274e-05, + "loss": 124.6714, + "step": 56720 + }, + { + "epoch": 0.22919637843057244, + "grad_norm": 1125.881591796875, + "learning_rate": 3.806910077850914e-05, + "loss": 109.5174, + "step": 56730 + }, + { + "epoch": 0.22923677969594008, + "grad_norm": 509.4538879394531, + "learning_rate": 3.806790349400912e-05, + "loss": 82.0062, + "step": 56740 + }, + { + "epoch": 0.2292771809613077, + "grad_norm": 1841.9864501953125, + "learning_rate": 3.806670585726602e-05, + "loss": 118.7667, + "step": 56750 + }, + { + "epoch": 0.22931758222667534, + "grad_norm": 899.99365234375, + "learning_rate": 3.806550786830319e-05, + "loss": 124.0399, + "step": 56760 + }, + { + "epoch": 0.22935798349204298, + "grad_norm": 411.6260681152344, + "learning_rate": 3.806430952714398e-05, + "loss": 82.055, + "step": 56770 + }, + { + "epoch": 0.2293983847574106, + "grad_norm": 692.7327270507812, + "learning_rate": 3.806311083381175e-05, + "loss": 122.9506, + "step": 56780 + }, + { + "epoch": 0.22943878602277823, + "grad_norm": 962.8499145507812, + "learning_rate": 3.806191178832989e-05, + "loss": 72.3307, + "step": 56790 + }, + { + "epoch": 0.22947918728814587, + "grad_norm": 852.694580078125, + "learning_rate": 3.806071239072175e-05, + "loss": 69.1236, + "step": 56800 + }, + { + "epoch": 0.2295195885535135, + "grad_norm": 1051.81591796875, + "learning_rate": 3.8059512641010726e-05, + "loss": 119.8456, + "step": 56810 + }, + { + "epoch": 0.22955998981888112, + "grad_norm": 1180.85107421875, + "learning_rate": 3.80583125392202e-05, + "loss": 82.298, + "step": 56820 + }, + { + "epoch": 0.22960039108424876, + "grad_norm": 668.0035400390625, + "learning_rate": 3.805711208537358e-05, + "loss": 83.2489, + "step": 56830 + }, + { + "epoch": 0.2296407923496164, + "grad_norm": 807.3283081054688, + "learning_rate": 3.805591127949426e-05, + "loss": 112.8339, + "step": 56840 + }, + { + "epoch": 0.229681193614984, + "grad_norm": 932.2955322265625, + "learning_rate": 3.805471012160566e-05, + "loss": 121.7063, + "step": 56850 + }, + { + "epoch": 0.22972159488035165, + "grad_norm": 603.8250122070312, + "learning_rate": 3.8053508611731185e-05, + "loss": 124.868, + "step": 56860 + }, + { + "epoch": 0.2297619961457193, + "grad_norm": 814.9713745117188, + "learning_rate": 3.805230674989427e-05, + "loss": 108.9946, + "step": 56870 + }, + { + "epoch": 0.2298023974110869, + "grad_norm": 908.1614379882812, + "learning_rate": 3.805110453611834e-05, + "loss": 113.9732, + "step": 56880 + }, + { + "epoch": 0.22984279867645455, + "grad_norm": 1009.3179931640625, + "learning_rate": 3.804990197042683e-05, + "loss": 110.5536, + "step": 56890 + }, + { + "epoch": 0.22988319994182219, + "grad_norm": 974.9014282226562, + "learning_rate": 3.804869905284319e-05, + "loss": 81.4192, + "step": 56900 + }, + { + "epoch": 0.2299236012071898, + "grad_norm": 729.4176025390625, + "learning_rate": 3.8047495783390874e-05, + "loss": 106.3683, + "step": 56910 + }, + { + "epoch": 0.22996400247255744, + "grad_norm": 435.5649108886719, + "learning_rate": 3.804629216209333e-05, + "loss": 94.5071, + "step": 56920 + }, + { + "epoch": 0.23000440373792508, + "grad_norm": 744.3433227539062, + "learning_rate": 3.8045088188974026e-05, + "loss": 152.0293, + "step": 56930 + }, + { + "epoch": 0.2300448050032927, + "grad_norm": 1294.310791015625, + "learning_rate": 3.804388386405645e-05, + "loss": 124.8272, + "step": 56940 + }, + { + "epoch": 0.23008520626866033, + "grad_norm": 712.5318603515625, + "learning_rate": 3.804267918736406e-05, + "loss": 101.3534, + "step": 56950 + }, + { + "epoch": 0.23012560753402797, + "grad_norm": 1186.490234375, + "learning_rate": 3.8041474158920356e-05, + "loss": 120.1226, + "step": 56960 + }, + { + "epoch": 0.2301660087993956, + "grad_norm": 614.9903564453125, + "learning_rate": 3.804026877874882e-05, + "loss": 114.0646, + "step": 56970 + }, + { + "epoch": 0.23020641006476322, + "grad_norm": 903.8734741210938, + "learning_rate": 3.803906304687296e-05, + "loss": 105.403, + "step": 56980 + }, + { + "epoch": 0.23024681133013086, + "grad_norm": 434.8164978027344, + "learning_rate": 3.803785696331627e-05, + "loss": 110.2086, + "step": 56990 + }, + { + "epoch": 0.2302872125954985, + "grad_norm": 454.1942138671875, + "learning_rate": 3.803665052810228e-05, + "loss": 78.5141, + "step": 57000 + }, + { + "epoch": 0.23032761386086611, + "grad_norm": 826.4489135742188, + "learning_rate": 3.803544374125449e-05, + "loss": 77.3587, + "step": 57010 + }, + { + "epoch": 0.23036801512623375, + "grad_norm": 670.23583984375, + "learning_rate": 3.803423660279646e-05, + "loss": 94.7174, + "step": 57020 + }, + { + "epoch": 0.2304084163916014, + "grad_norm": 788.89501953125, + "learning_rate": 3.803302911275169e-05, + "loss": 122.1704, + "step": 57030 + }, + { + "epoch": 0.230448817656969, + "grad_norm": 1461.5723876953125, + "learning_rate": 3.803182127114374e-05, + "loss": 73.6563, + "step": 57040 + }, + { + "epoch": 0.23048921892233665, + "grad_norm": 532.5935668945312, + "learning_rate": 3.803061307799614e-05, + "loss": 106.8059, + "step": 57050 + }, + { + "epoch": 0.2305296201877043, + "grad_norm": 449.291748046875, + "learning_rate": 3.802940453333246e-05, + "loss": 111.5942, + "step": 57060 + }, + { + "epoch": 0.2305700214530719, + "grad_norm": 707.6343383789062, + "learning_rate": 3.802819563717626e-05, + "loss": 147.069, + "step": 57070 + }, + { + "epoch": 0.23061042271843954, + "grad_norm": 962.5892333984375, + "learning_rate": 3.8026986389551105e-05, + "loss": 110.4691, + "step": 57080 + }, + { + "epoch": 0.23065082398380718, + "grad_norm": 375.018798828125, + "learning_rate": 3.802577679048057e-05, + "loss": 64.0588, + "step": 57090 + }, + { + "epoch": 0.2306912252491748, + "grad_norm": 572.849609375, + "learning_rate": 3.802456683998823e-05, + "loss": 70.7066, + "step": 57100 + }, + { + "epoch": 0.23073162651454243, + "grad_norm": 1206.847900390625, + "learning_rate": 3.802335653809768e-05, + "loss": 108.3018, + "step": 57110 + }, + { + "epoch": 0.23077202777991007, + "grad_norm": 1384.28564453125, + "learning_rate": 3.802214588483252e-05, + "loss": 116.4599, + "step": 57120 + }, + { + "epoch": 0.2308124290452777, + "grad_norm": 381.70574951171875, + "learning_rate": 3.8020934880216344e-05, + "loss": 111.3915, + "step": 57130 + }, + { + "epoch": 0.23085283031064532, + "grad_norm": 970.7710571289062, + "learning_rate": 3.801972352427277e-05, + "loss": 135.0401, + "step": 57140 + }, + { + "epoch": 0.23089323157601296, + "grad_norm": 553.1760864257812, + "learning_rate": 3.801851181702541e-05, + "loss": 89.0234, + "step": 57150 + }, + { + "epoch": 0.2309336328413806, + "grad_norm": 1294.71826171875, + "learning_rate": 3.8017299758497875e-05, + "loss": 99.812, + "step": 57160 + }, + { + "epoch": 0.23097403410674822, + "grad_norm": 885.74755859375, + "learning_rate": 3.801608734871381e-05, + "loss": 77.9519, + "step": 57170 + }, + { + "epoch": 0.23101443537211586, + "grad_norm": 860.6478881835938, + "learning_rate": 3.8014874587696846e-05, + "loss": 120.5781, + "step": 57180 + }, + { + "epoch": 0.2310548366374835, + "grad_norm": 836.8544311523438, + "learning_rate": 3.8013661475470634e-05, + "loss": 85.4121, + "step": 57190 + }, + { + "epoch": 0.2310952379028511, + "grad_norm": 1417.952880859375, + "learning_rate": 3.801244801205881e-05, + "loss": 123.9357, + "step": 57200 + }, + { + "epoch": 0.23113563916821875, + "grad_norm": 1695.4693603515625, + "learning_rate": 3.801123419748504e-05, + "loss": 144.4365, + "step": 57210 + }, + { + "epoch": 0.2311760404335864, + "grad_norm": 1155.4124755859375, + "learning_rate": 3.801002003177299e-05, + "loss": 93.3574, + "step": 57220 + }, + { + "epoch": 0.231216441698954, + "grad_norm": 1515.8018798828125, + "learning_rate": 3.800880551494632e-05, + "loss": 114.6759, + "step": 57230 + }, + { + "epoch": 0.23125684296432164, + "grad_norm": 636.5213012695312, + "learning_rate": 3.800759064702873e-05, + "loss": 126.6744, + "step": 57240 + }, + { + "epoch": 0.23129724422968928, + "grad_norm": 444.4427490234375, + "learning_rate": 3.800637542804387e-05, + "loss": 83.217, + "step": 57250 + }, + { + "epoch": 0.2313376454950569, + "grad_norm": 741.2324829101562, + "learning_rate": 3.800515985801546e-05, + "loss": 72.103, + "step": 57260 + }, + { + "epoch": 0.23137804676042453, + "grad_norm": 540.2615966796875, + "learning_rate": 3.800394393696719e-05, + "loss": 126.7831, + "step": 57270 + }, + { + "epoch": 0.23141844802579217, + "grad_norm": 864.940185546875, + "learning_rate": 3.800272766492277e-05, + "loss": 80.342, + "step": 57280 + }, + { + "epoch": 0.2314588492911598, + "grad_norm": 1458.8101806640625, + "learning_rate": 3.800151104190589e-05, + "loss": 118.1697, + "step": 57290 + }, + { + "epoch": 0.23149925055652743, + "grad_norm": 1241.495849609375, + "learning_rate": 3.8000294067940295e-05, + "loss": 124.1844, + "step": 57300 + }, + { + "epoch": 0.23153965182189506, + "grad_norm": 396.0081481933594, + "learning_rate": 3.79990767430497e-05, + "loss": 132.7233, + "step": 57310 + }, + { + "epoch": 0.2315800530872627, + "grad_norm": 1773.5672607421875, + "learning_rate": 3.799785906725784e-05, + "loss": 147.039, + "step": 57320 + }, + { + "epoch": 0.23162045435263032, + "grad_norm": 1712.5682373046875, + "learning_rate": 3.7996641040588443e-05, + "loss": 126.1013, + "step": 57330 + }, + { + "epoch": 0.23166085561799796, + "grad_norm": 643.7217407226562, + "learning_rate": 3.7995422663065264e-05, + "loss": 96.8093, + "step": 57340 + }, + { + "epoch": 0.2317012568833656, + "grad_norm": 702.2538452148438, + "learning_rate": 3.799420393471206e-05, + "loss": 125.1134, + "step": 57350 + }, + { + "epoch": 0.2317416581487332, + "grad_norm": 3458.546630859375, + "learning_rate": 3.799298485555259e-05, + "loss": 100.4862, + "step": 57360 + }, + { + "epoch": 0.23178205941410085, + "grad_norm": 645.4619140625, + "learning_rate": 3.799176542561061e-05, + "loss": 100.295, + "step": 57370 + }, + { + "epoch": 0.2318224606794685, + "grad_norm": 674.1900634765625, + "learning_rate": 3.7990545644909905e-05, + "loss": 81.6741, + "step": 57380 + }, + { + "epoch": 0.2318628619448361, + "grad_norm": 911.9677734375, + "learning_rate": 3.798932551347424e-05, + "loss": 109.944, + "step": 57390 + }, + { + "epoch": 0.23190326321020374, + "grad_norm": 841.6908569335938, + "learning_rate": 3.798810503132742e-05, + "loss": 98.041, + "step": 57400 + }, + { + "epoch": 0.23194366447557138, + "grad_norm": 995.7146606445312, + "learning_rate": 3.798688419849324e-05, + "loss": 122.2726, + "step": 57410 + }, + { + "epoch": 0.231984065740939, + "grad_norm": 368.1296691894531, + "learning_rate": 3.798566301499548e-05, + "loss": 136.8573, + "step": 57420 + }, + { + "epoch": 0.23202446700630663, + "grad_norm": 812.0774536132812, + "learning_rate": 3.798444148085796e-05, + "loss": 70.9371, + "step": 57430 + }, + { + "epoch": 0.23206486827167427, + "grad_norm": 390.5767517089844, + "learning_rate": 3.79832195961045e-05, + "loss": 109.8272, + "step": 57440 + }, + { + "epoch": 0.23210526953704191, + "grad_norm": 997.0044555664062, + "learning_rate": 3.7981997360758917e-05, + "loss": 113.3395, + "step": 57450 + }, + { + "epoch": 0.23214567080240953, + "grad_norm": 986.1064453125, + "learning_rate": 3.7980774774845035e-05, + "loss": 104.7502, + "step": 57460 + }, + { + "epoch": 0.23218607206777717, + "grad_norm": 737.5077514648438, + "learning_rate": 3.797955183838669e-05, + "loss": 144.5562, + "step": 57470 + }, + { + "epoch": 0.2322264733331448, + "grad_norm": 724.896484375, + "learning_rate": 3.797832855140773e-05, + "loss": 96.0681, + "step": 57480 + }, + { + "epoch": 0.23226687459851242, + "grad_norm": 574.3898315429688, + "learning_rate": 3.797710491393199e-05, + "loss": 98.7946, + "step": 57490 + }, + { + "epoch": 0.23230727586388006, + "grad_norm": 789.36865234375, + "learning_rate": 3.7975880925983345e-05, + "loss": 112.934, + "step": 57500 + }, + { + "epoch": 0.2323476771292477, + "grad_norm": 567.2487182617188, + "learning_rate": 3.7974656587585645e-05, + "loss": 112.838, + "step": 57510 + }, + { + "epoch": 0.2323880783946153, + "grad_norm": 833.6903076171875, + "learning_rate": 3.7973431898762757e-05, + "loss": 108.917, + "step": 57520 + }, + { + "epoch": 0.23242847965998295, + "grad_norm": 569.2434692382812, + "learning_rate": 3.797220685953856e-05, + "loss": 93.1947, + "step": 57530 + }, + { + "epoch": 0.2324688809253506, + "grad_norm": 688.5921630859375, + "learning_rate": 3.7970981469936936e-05, + "loss": 93.3327, + "step": 57540 + }, + { + "epoch": 0.2325092821907182, + "grad_norm": 899.4078979492188, + "learning_rate": 3.7969755729981776e-05, + "loss": 80.3051, + "step": 57550 + }, + { + "epoch": 0.23254968345608584, + "grad_norm": 1015.1488037109375, + "learning_rate": 3.796852963969699e-05, + "loss": 116.4375, + "step": 57560 + }, + { + "epoch": 0.23259008472145348, + "grad_norm": 765.8465576171875, + "learning_rate": 3.796730319910645e-05, + "loss": 125.0767, + "step": 57570 + }, + { + "epoch": 0.2326304859868211, + "grad_norm": 849.9150390625, + "learning_rate": 3.796607640823409e-05, + "loss": 132.4627, + "step": 57580 + }, + { + "epoch": 0.23267088725218874, + "grad_norm": 826.0771484375, + "learning_rate": 3.7964849267103824e-05, + "loss": 86.093, + "step": 57590 + }, + { + "epoch": 0.23271128851755638, + "grad_norm": 597.1438598632812, + "learning_rate": 3.796362177573957e-05, + "loss": 112.3205, + "step": 57600 + }, + { + "epoch": 0.23275168978292402, + "grad_norm": 1425.2646484375, + "learning_rate": 3.796239393416526e-05, + "loss": 114.0667, + "step": 57610 + }, + { + "epoch": 0.23279209104829163, + "grad_norm": 576.45703125, + "learning_rate": 3.796116574240484e-05, + "loss": 132.2212, + "step": 57620 + }, + { + "epoch": 0.23283249231365927, + "grad_norm": 947.598388671875, + "learning_rate": 3.795993720048224e-05, + "loss": 89.9369, + "step": 57630 + }, + { + "epoch": 0.2328728935790269, + "grad_norm": 1345.5723876953125, + "learning_rate": 3.795870830842142e-05, + "loss": 96.0626, + "step": 57640 + }, + { + "epoch": 0.23291329484439452, + "grad_norm": 602.86376953125, + "learning_rate": 3.795747906624634e-05, + "loss": 102.8041, + "step": 57650 + }, + { + "epoch": 0.23295369610976216, + "grad_norm": 852.7508544921875, + "learning_rate": 3.795624947398096e-05, + "loss": 131.7493, + "step": 57660 + }, + { + "epoch": 0.2329940973751298, + "grad_norm": 1139.660400390625, + "learning_rate": 3.795501953164924e-05, + "loss": 95.1869, + "step": 57670 + }, + { + "epoch": 0.2330344986404974, + "grad_norm": 666.6486206054688, + "learning_rate": 3.795378923927518e-05, + "loss": 147.149, + "step": 57680 + }, + { + "epoch": 0.23307489990586505, + "grad_norm": 933.6730346679688, + "learning_rate": 3.795255859688276e-05, + "loss": 115.6546, + "step": 57690 + }, + { + "epoch": 0.2331153011712327, + "grad_norm": 735.5430908203125, + "learning_rate": 3.7951327604495957e-05, + "loss": 80.773, + "step": 57700 + }, + { + "epoch": 0.2331557024366003, + "grad_norm": 698.564208984375, + "learning_rate": 3.7950096262138784e-05, + "loss": 66.2171, + "step": 57710 + }, + { + "epoch": 0.23319610370196794, + "grad_norm": 778.54296875, + "learning_rate": 3.794886456983524e-05, + "loss": 132.7007, + "step": 57720 + }, + { + "epoch": 0.23323650496733558, + "grad_norm": 831.33251953125, + "learning_rate": 3.7947632527609345e-05, + "loss": 113.6544, + "step": 57730 + }, + { + "epoch": 0.2332769062327032, + "grad_norm": 3942.43896484375, + "learning_rate": 3.7946400135485115e-05, + "loss": 98.7463, + "step": 57740 + }, + { + "epoch": 0.23331730749807084, + "grad_norm": 1609.7852783203125, + "learning_rate": 3.794516739348657e-05, + "loss": 93.2245, + "step": 57750 + }, + { + "epoch": 0.23335770876343848, + "grad_norm": 693.8519897460938, + "learning_rate": 3.794393430163776e-05, + "loss": 76.7485, + "step": 57760 + }, + { + "epoch": 0.2333981100288061, + "grad_norm": 331.87078857421875, + "learning_rate": 3.7942700859962694e-05, + "loss": 118.4316, + "step": 57770 + }, + { + "epoch": 0.23343851129417373, + "grad_norm": 1329.51611328125, + "learning_rate": 3.794146706848545e-05, + "loss": 136.9642, + "step": 57780 + }, + { + "epoch": 0.23347891255954137, + "grad_norm": 834.9077758789062, + "learning_rate": 3.794023292723006e-05, + "loss": 107.1519, + "step": 57790 + }, + { + "epoch": 0.233519313824909, + "grad_norm": 789.717041015625, + "learning_rate": 3.7938998436220604e-05, + "loss": 181.8444, + "step": 57800 + }, + { + "epoch": 0.23355971509027662, + "grad_norm": 2342.259521484375, + "learning_rate": 3.793776359548113e-05, + "loss": 111.2926, + "step": 57810 + }, + { + "epoch": 0.23360011635564426, + "grad_norm": 1226.922607421875, + "learning_rate": 3.793652840503572e-05, + "loss": 95.3619, + "step": 57820 + }, + { + "epoch": 0.2336405176210119, + "grad_norm": 1017.34814453125, + "learning_rate": 3.793529286490846e-05, + "loss": 103.705, + "step": 57830 + }, + { + "epoch": 0.2336809188863795, + "grad_norm": 479.9931335449219, + "learning_rate": 3.7934056975123424e-05, + "loss": 91.5169, + "step": 57840 + }, + { + "epoch": 0.23372132015174715, + "grad_norm": 639.6784057617188, + "learning_rate": 3.793282073570472e-05, + "loss": 125.1747, + "step": 57850 + }, + { + "epoch": 0.2337617214171148, + "grad_norm": 1132.715087890625, + "learning_rate": 3.7931584146676444e-05, + "loss": 141.3777, + "step": 57860 + }, + { + "epoch": 0.2338021226824824, + "grad_norm": 1520.7171630859375, + "learning_rate": 3.793034720806269e-05, + "loss": 81.1108, + "step": 57870 + }, + { + "epoch": 0.23384252394785005, + "grad_norm": 794.2672119140625, + "learning_rate": 3.792910991988761e-05, + "loss": 97.3528, + "step": 57880 + }, + { + "epoch": 0.23388292521321769, + "grad_norm": 1095.7169189453125, + "learning_rate": 3.792787228217529e-05, + "loss": 114.455, + "step": 57890 + }, + { + "epoch": 0.2339233264785853, + "grad_norm": 1223.0452880859375, + "learning_rate": 3.792663429494986e-05, + "loss": 100.9564, + "step": 57900 + }, + { + "epoch": 0.23396372774395294, + "grad_norm": 1370.2698974609375, + "learning_rate": 3.792539595823548e-05, + "loss": 93.1722, + "step": 57910 + }, + { + "epoch": 0.23400412900932058, + "grad_norm": 578.9678344726562, + "learning_rate": 3.792415727205627e-05, + "loss": 99.8347, + "step": 57920 + }, + { + "epoch": 0.2340445302746882, + "grad_norm": 1447.6973876953125, + "learning_rate": 3.7922918236436384e-05, + "loss": 140.3301, + "step": 57930 + }, + { + "epoch": 0.23408493154005583, + "grad_norm": 1327.2806396484375, + "learning_rate": 3.7921678851399984e-05, + "loss": 74.3396, + "step": 57940 + }, + { + "epoch": 0.23412533280542347, + "grad_norm": 296.90301513671875, + "learning_rate": 3.792043911697122e-05, + "loss": 82.5662, + "step": 57950 + }, + { + "epoch": 0.2341657340707911, + "grad_norm": 1020.02880859375, + "learning_rate": 3.791919903317428e-05, + "loss": 79.5906, + "step": 57960 + }, + { + "epoch": 0.23420613533615872, + "grad_norm": 1108.0133056640625, + "learning_rate": 3.7917958600033326e-05, + "loss": 108.672, + "step": 57970 + }, + { + "epoch": 0.23424653660152636, + "grad_norm": 1015.9360961914062, + "learning_rate": 3.791671781757254e-05, + "loss": 65.5053, + "step": 57980 + }, + { + "epoch": 0.234286937866894, + "grad_norm": 392.6874694824219, + "learning_rate": 3.7915476685816124e-05, + "loss": 94.2783, + "step": 57990 + }, + { + "epoch": 0.23432733913226161, + "grad_norm": 1077.373779296875, + "learning_rate": 3.791423520478826e-05, + "loss": 108.0826, + "step": 58000 + }, + { + "epoch": 0.23436774039762925, + "grad_norm": 825.9427490234375, + "learning_rate": 3.791299337451316e-05, + "loss": 90.763, + "step": 58010 + }, + { + "epoch": 0.2344081416629969, + "grad_norm": 3807.073486328125, + "learning_rate": 3.791175119501503e-05, + "loss": 135.3612, + "step": 58020 + }, + { + "epoch": 0.2344485429283645, + "grad_norm": 974.1555786132812, + "learning_rate": 3.791050866631809e-05, + "loss": 79.3657, + "step": 58030 + }, + { + "epoch": 0.23448894419373215, + "grad_norm": 967.0960083007812, + "learning_rate": 3.790926578844657e-05, + "loss": 138.4739, + "step": 58040 + }, + { + "epoch": 0.2345293454590998, + "grad_norm": 448.822509765625, + "learning_rate": 3.790802256142468e-05, + "loss": 86.7168, + "step": 58050 + }, + { + "epoch": 0.2345697467244674, + "grad_norm": 532.562744140625, + "learning_rate": 3.790677898527668e-05, + "loss": 136.999, + "step": 58060 + }, + { + "epoch": 0.23461014798983504, + "grad_norm": 929.1100463867188, + "learning_rate": 3.79055350600268e-05, + "loss": 116.6686, + "step": 58070 + }, + { + "epoch": 0.23465054925520268, + "grad_norm": 629.6224975585938, + "learning_rate": 3.79042907856993e-05, + "loss": 108.6816, + "step": 58080 + }, + { + "epoch": 0.2346909505205703, + "grad_norm": 734.6692504882812, + "learning_rate": 3.790304616231843e-05, + "loss": 113.1525, + "step": 58090 + }, + { + "epoch": 0.23473135178593793, + "grad_norm": 340.5950012207031, + "learning_rate": 3.790180118990845e-05, + "loss": 134.4205, + "step": 58100 + }, + { + "epoch": 0.23477175305130557, + "grad_norm": 510.8604736328125, + "learning_rate": 3.7900555868493656e-05, + "loss": 59.1877, + "step": 58110 + }, + { + "epoch": 0.2348121543166732, + "grad_norm": 530.1917724609375, + "learning_rate": 3.78993101980983e-05, + "loss": 64.9851, + "step": 58120 + }, + { + "epoch": 0.23485255558204082, + "grad_norm": 1019.1861572265625, + "learning_rate": 3.789806417874668e-05, + "loss": 73.9086, + "step": 58130 + }, + { + "epoch": 0.23489295684740846, + "grad_norm": 653.8428955078125, + "learning_rate": 3.789681781046308e-05, + "loss": 103.3189, + "step": 58140 + }, + { + "epoch": 0.2349333581127761, + "grad_norm": 4421.1162109375, + "learning_rate": 3.789557109327181e-05, + "loss": 98.5552, + "step": 58150 + }, + { + "epoch": 0.23497375937814372, + "grad_norm": 640.8590087890625, + "learning_rate": 3.789432402719716e-05, + "loss": 118.8453, + "step": 58160 + }, + { + "epoch": 0.23501416064351136, + "grad_norm": 885.3309326171875, + "learning_rate": 3.7893076612263454e-05, + "loss": 115.1265, + "step": 58170 + }, + { + "epoch": 0.235054561908879, + "grad_norm": 596.861083984375, + "learning_rate": 3.7891828848495006e-05, + "loss": 68.9423, + "step": 58180 + }, + { + "epoch": 0.2350949631742466, + "grad_norm": 456.4228515625, + "learning_rate": 3.789058073591615e-05, + "loss": 91.6257, + "step": 58190 + }, + { + "epoch": 0.23513536443961425, + "grad_norm": 1138.335205078125, + "learning_rate": 3.78893322745512e-05, + "loss": 100.3203, + "step": 58200 + }, + { + "epoch": 0.2351757657049819, + "grad_norm": 1008.3480224609375, + "learning_rate": 3.7888083464424513e-05, + "loss": 129.1233, + "step": 58210 + }, + { + "epoch": 0.2352161669703495, + "grad_norm": 623.0090942382812, + "learning_rate": 3.788683430556043e-05, + "loss": 80.1154, + "step": 58220 + }, + { + "epoch": 0.23525656823571714, + "grad_norm": 1090.668212890625, + "learning_rate": 3.78855847979833e-05, + "loss": 94.6625, + "step": 58230 + }, + { + "epoch": 0.23529696950108478, + "grad_norm": 1210.515869140625, + "learning_rate": 3.7884334941717494e-05, + "loss": 149.0509, + "step": 58240 + }, + { + "epoch": 0.2353373707664524, + "grad_norm": 3735.24951171875, + "learning_rate": 3.788308473678737e-05, + "loss": 203.884, + "step": 58250 + }, + { + "epoch": 0.23537777203182003, + "grad_norm": 625.9381713867188, + "learning_rate": 3.78818341832173e-05, + "loss": 113.444, + "step": 58260 + }, + { + "epoch": 0.23541817329718767, + "grad_norm": 939.540283203125, + "learning_rate": 3.788058328103166e-05, + "loss": 132.2816, + "step": 58270 + }, + { + "epoch": 0.2354585745625553, + "grad_norm": 678.6011352539062, + "learning_rate": 3.787933203025485e-05, + "loss": 84.1426, + "step": 58280 + }, + { + "epoch": 0.23549897582792292, + "grad_norm": 875.9268798828125, + "learning_rate": 3.787808043091126e-05, + "loss": 107.2938, + "step": 58290 + }, + { + "epoch": 0.23553937709329056, + "grad_norm": 382.9146728515625, + "learning_rate": 3.787682848302528e-05, + "loss": 117.6348, + "step": 58300 + }, + { + "epoch": 0.2355797783586582, + "grad_norm": 804.398681640625, + "learning_rate": 3.787557618662133e-05, + "loss": 84.1675, + "step": 58310 + }, + { + "epoch": 0.23562017962402582, + "grad_norm": 874.1199951171875, + "learning_rate": 3.787432354172381e-05, + "loss": 90.8841, + "step": 58320 + }, + { + "epoch": 0.23566058088939346, + "grad_norm": 2398.533203125, + "learning_rate": 3.787307054835716e-05, + "loss": 131.9245, + "step": 58330 + }, + { + "epoch": 0.2357009821547611, + "grad_norm": 900.5089111328125, + "learning_rate": 3.7871817206545805e-05, + "loss": 105.0597, + "step": 58340 + }, + { + "epoch": 0.2357413834201287, + "grad_norm": 644.414306640625, + "learning_rate": 3.787056351631416e-05, + "loss": 81.7388, + "step": 58350 + }, + { + "epoch": 0.23578178468549635, + "grad_norm": 2379.84765625, + "learning_rate": 3.786930947768668e-05, + "loss": 140.1207, + "step": 58360 + }, + { + "epoch": 0.235822185950864, + "grad_norm": 455.5780944824219, + "learning_rate": 3.7868055090687814e-05, + "loss": 100.8639, + "step": 58370 + }, + { + "epoch": 0.2358625872162316, + "grad_norm": 705.1060180664062, + "learning_rate": 3.786680035534202e-05, + "loss": 109.694, + "step": 58380 + }, + { + "epoch": 0.23590298848159924, + "grad_norm": 690.2679443359375, + "learning_rate": 3.7865545271673744e-05, + "loss": 99.1022, + "step": 58390 + }, + { + "epoch": 0.23594338974696688, + "grad_norm": 546.4279174804688, + "learning_rate": 3.786428983970748e-05, + "loss": 135.0689, + "step": 58400 + }, + { + "epoch": 0.2359837910123345, + "grad_norm": 1364.4224853515625, + "learning_rate": 3.7863034059467676e-05, + "loss": 112.7331, + "step": 58410 + }, + { + "epoch": 0.23602419227770213, + "grad_norm": 513.878662109375, + "learning_rate": 3.786177793097883e-05, + "loss": 97.0626, + "step": 58420 + }, + { + "epoch": 0.23606459354306977, + "grad_norm": 1283.0843505859375, + "learning_rate": 3.786052145426543e-05, + "loss": 88.0088, + "step": 58430 + }, + { + "epoch": 0.2361049948084374, + "grad_norm": 806.3046875, + "learning_rate": 3.7859264629351965e-05, + "loss": 89.4536, + "step": 58440 + }, + { + "epoch": 0.23614539607380503, + "grad_norm": 385.24212646484375, + "learning_rate": 3.785800745626294e-05, + "loss": 93.0923, + "step": 58450 + }, + { + "epoch": 0.23618579733917267, + "grad_norm": 953.452880859375, + "learning_rate": 3.785674993502287e-05, + "loss": 123.3629, + "step": 58460 + }, + { + "epoch": 0.2362261986045403, + "grad_norm": 502.5683898925781, + "learning_rate": 3.785549206565626e-05, + "loss": 82.1018, + "step": 58470 + }, + { + "epoch": 0.23626659986990792, + "grad_norm": 459.7453918457031, + "learning_rate": 3.785423384818765e-05, + "loss": 94.2425, + "step": 58480 + }, + { + "epoch": 0.23630700113527556, + "grad_norm": 609.497314453125, + "learning_rate": 3.7852975282641555e-05, + "loss": 87.4015, + "step": 58490 + }, + { + "epoch": 0.2363474024006432, + "grad_norm": 946.9102783203125, + "learning_rate": 3.785171636904252e-05, + "loss": 82.7589, + "step": 58500 + }, + { + "epoch": 0.2363878036660108, + "grad_norm": 523.8385620117188, + "learning_rate": 3.785045710741507e-05, + "loss": 91.8581, + "step": 58510 + }, + { + "epoch": 0.23642820493137845, + "grad_norm": 1096.3548583984375, + "learning_rate": 3.7849197497783775e-05, + "loss": 96.3965, + "step": 58520 + }, + { + "epoch": 0.2364686061967461, + "grad_norm": 427.93646240234375, + "learning_rate": 3.784793754017319e-05, + "loss": 116.6613, + "step": 58530 + }, + { + "epoch": 0.2365090074621137, + "grad_norm": 2993.38134765625, + "learning_rate": 3.7846677234607874e-05, + "loss": 151.5205, + "step": 58540 + }, + { + "epoch": 0.23654940872748134, + "grad_norm": 1457.5404052734375, + "learning_rate": 3.7845416581112394e-05, + "loss": 93.9705, + "step": 58550 + }, + { + "epoch": 0.23658980999284898, + "grad_norm": 2880.7373046875, + "learning_rate": 3.784415557971133e-05, + "loss": 122.5074, + "step": 58560 + }, + { + "epoch": 0.2366302112582166, + "grad_norm": 1264.4603271484375, + "learning_rate": 3.784289423042927e-05, + "loss": 126.1034, + "step": 58570 + }, + { + "epoch": 0.23667061252358423, + "grad_norm": 819.107666015625, + "learning_rate": 3.784163253329079e-05, + "loss": 141.1911, + "step": 58580 + }, + { + "epoch": 0.23671101378895187, + "grad_norm": 1476.96337890625, + "learning_rate": 3.7840370488320514e-05, + "loss": 173.6309, + "step": 58590 + }, + { + "epoch": 0.23675141505431951, + "grad_norm": 730.6476440429688, + "learning_rate": 3.7839108095543016e-05, + "loss": 90.9459, + "step": 58600 + }, + { + "epoch": 0.23679181631968713, + "grad_norm": 625.1728515625, + "learning_rate": 3.783784535498293e-05, + "loss": 107.2876, + "step": 58610 + }, + { + "epoch": 0.23683221758505477, + "grad_norm": 1709.8636474609375, + "learning_rate": 3.7836582266664864e-05, + "loss": 150.1779, + "step": 58620 + }, + { + "epoch": 0.2368726188504224, + "grad_norm": 850.3084106445312, + "learning_rate": 3.783531883061345e-05, + "loss": 111.2487, + "step": 58630 + }, + { + "epoch": 0.23691302011579002, + "grad_norm": 641.0333862304688, + "learning_rate": 3.7834055046853297e-05, + "loss": 98.71, + "step": 58640 + }, + { + "epoch": 0.23695342138115766, + "grad_norm": 1203.37158203125, + "learning_rate": 3.783279091540907e-05, + "loss": 96.2756, + "step": 58650 + }, + { + "epoch": 0.2369938226465253, + "grad_norm": 637.991943359375, + "learning_rate": 3.783152643630541e-05, + "loss": 79.1743, + "step": 58660 + }, + { + "epoch": 0.2370342239118929, + "grad_norm": 650.8229370117188, + "learning_rate": 3.783026160956695e-05, + "loss": 77.7432, + "step": 58670 + }, + { + "epoch": 0.23707462517726055, + "grad_norm": 629.7841186523438, + "learning_rate": 3.782899643521836e-05, + "loss": 84.37, + "step": 58680 + }, + { + "epoch": 0.2371150264426282, + "grad_norm": 712.90283203125, + "learning_rate": 3.782773091328431e-05, + "loss": 111.5241, + "step": 58690 + }, + { + "epoch": 0.2371554277079958, + "grad_norm": 1006.4022216796875, + "learning_rate": 3.782646504378947e-05, + "loss": 68.0544, + "step": 58700 + }, + { + "epoch": 0.23719582897336344, + "grad_norm": 958.9357299804688, + "learning_rate": 3.782519882675851e-05, + "loss": 98.0294, + "step": 58710 + }, + { + "epoch": 0.23723623023873108, + "grad_norm": 480.5754089355469, + "learning_rate": 3.782393226221613e-05, + "loss": 89.1497, + "step": 58720 + }, + { + "epoch": 0.2372766315040987, + "grad_norm": 1631.68115234375, + "learning_rate": 3.7822665350187006e-05, + "loss": 170.6472, + "step": 58730 + }, + { + "epoch": 0.23731703276946634, + "grad_norm": 1339.2601318359375, + "learning_rate": 3.782139809069585e-05, + "loss": 124.5563, + "step": 58740 + }, + { + "epoch": 0.23735743403483398, + "grad_norm": 1041.58740234375, + "learning_rate": 3.782013048376736e-05, + "loss": 101.57, + "step": 58750 + }, + { + "epoch": 0.23739783530020162, + "grad_norm": 797.3617553710938, + "learning_rate": 3.7818862529426255e-05, + "loss": 113.0389, + "step": 58760 + }, + { + "epoch": 0.23743823656556923, + "grad_norm": 501.02606201171875, + "learning_rate": 3.781759422769725e-05, + "loss": 105.2435, + "step": 58770 + }, + { + "epoch": 0.23747863783093687, + "grad_norm": 406.3040466308594, + "learning_rate": 3.781632557860507e-05, + "loss": 72.2338, + "step": 58780 + }, + { + "epoch": 0.2375190390963045, + "grad_norm": 1033.3238525390625, + "learning_rate": 3.7815056582174455e-05, + "loss": 109.3837, + "step": 58790 + }, + { + "epoch": 0.23755944036167212, + "grad_norm": 995.8871459960938, + "learning_rate": 3.781378723843014e-05, + "loss": 103.584, + "step": 58800 + }, + { + "epoch": 0.23759984162703976, + "grad_norm": 801.3675537109375, + "learning_rate": 3.781251754739686e-05, + "loss": 115.4915, + "step": 58810 + }, + { + "epoch": 0.2376402428924074, + "grad_norm": 598.1197509765625, + "learning_rate": 3.781124750909939e-05, + "loss": 66.8253, + "step": 58820 + }, + { + "epoch": 0.237680644157775, + "grad_norm": 695.3208618164062, + "learning_rate": 3.7809977123562486e-05, + "loss": 112.4289, + "step": 58830 + }, + { + "epoch": 0.23772104542314265, + "grad_norm": 971.5728149414062, + "learning_rate": 3.78087063908109e-05, + "loss": 110.4719, + "step": 58840 + }, + { + "epoch": 0.2377614466885103, + "grad_norm": 167.75857543945312, + "learning_rate": 3.7807435310869415e-05, + "loss": 123.7762, + "step": 58850 + }, + { + "epoch": 0.2378018479538779, + "grad_norm": 532.1731567382812, + "learning_rate": 3.780616388376281e-05, + "loss": 100.5701, + "step": 58860 + }, + { + "epoch": 0.23784224921924554, + "grad_norm": 979.2057495117188, + "learning_rate": 3.780489210951588e-05, + "loss": 99.1844, + "step": 58870 + }, + { + "epoch": 0.23788265048461318, + "grad_norm": 341.1992492675781, + "learning_rate": 3.7803619988153404e-05, + "loss": 109.5135, + "step": 58880 + }, + { + "epoch": 0.2379230517499808, + "grad_norm": 1180.691650390625, + "learning_rate": 3.780234751970019e-05, + "loss": 67.1738, + "step": 58890 + }, + { + "epoch": 0.23796345301534844, + "grad_norm": 1115.95458984375, + "learning_rate": 3.7801074704181046e-05, + "loss": 132.5123, + "step": 58900 + }, + { + "epoch": 0.23800385428071608, + "grad_norm": 1498.4993896484375, + "learning_rate": 3.77998015416208e-05, + "loss": 124.042, + "step": 58910 + }, + { + "epoch": 0.23804425554608372, + "grad_norm": 475.79608154296875, + "learning_rate": 3.779852803204424e-05, + "loss": 66.2962, + "step": 58920 + }, + { + "epoch": 0.23808465681145133, + "grad_norm": 747.5648193359375, + "learning_rate": 3.779725417547622e-05, + "loss": 115.3903, + "step": 58930 + }, + { + "epoch": 0.23812505807681897, + "grad_norm": 663.6346435546875, + "learning_rate": 3.7795979971941573e-05, + "loss": 84.0251, + "step": 58940 + }, + { + "epoch": 0.2381654593421866, + "grad_norm": 663.1929321289062, + "learning_rate": 3.779470542146513e-05, + "loss": 93.6917, + "step": 58950 + }, + { + "epoch": 0.23820586060755422, + "grad_norm": 770.202392578125, + "learning_rate": 3.779343052407174e-05, + "loss": 74.3521, + "step": 58960 + }, + { + "epoch": 0.23824626187292186, + "grad_norm": 555.599365234375, + "learning_rate": 3.779215527978626e-05, + "loss": 91.2006, + "step": 58970 + }, + { + "epoch": 0.2382866631382895, + "grad_norm": 642.787109375, + "learning_rate": 3.7790879688633565e-05, + "loss": 103.0893, + "step": 58980 + }, + { + "epoch": 0.2383270644036571, + "grad_norm": 676.2999877929688, + "learning_rate": 3.77896037506385e-05, + "loss": 175.0371, + "step": 58990 + }, + { + "epoch": 0.23836746566902475, + "grad_norm": 942.9210205078125, + "learning_rate": 3.778832746582596e-05, + "loss": 125.6891, + "step": 59000 + }, + { + "epoch": 0.2384078669343924, + "grad_norm": 471.7799987792969, + "learning_rate": 3.778705083422081e-05, + "loss": 95.173, + "step": 59010 + }, + { + "epoch": 0.23844826819976, + "grad_norm": 679.1378173828125, + "learning_rate": 3.7785773855847944e-05, + "loss": 94.8684, + "step": 59020 + }, + { + "epoch": 0.23848866946512765, + "grad_norm": 1433.923095703125, + "learning_rate": 3.7784496530732264e-05, + "loss": 85.4767, + "step": 59030 + }, + { + "epoch": 0.23852907073049529, + "grad_norm": 570.0117797851562, + "learning_rate": 3.778321885889867e-05, + "loss": 103.5534, + "step": 59040 + }, + { + "epoch": 0.2385694719958629, + "grad_norm": 1195.59033203125, + "learning_rate": 3.778194084037207e-05, + "loss": 102.9608, + "step": 59050 + }, + { + "epoch": 0.23860987326123054, + "grad_norm": 735.2047729492188, + "learning_rate": 3.778066247517737e-05, + "loss": 84.5705, + "step": 59060 + }, + { + "epoch": 0.23865027452659818, + "grad_norm": 337.5326232910156, + "learning_rate": 3.7779383763339505e-05, + "loss": 67.6904, + "step": 59070 + }, + { + "epoch": 0.23869067579196582, + "grad_norm": 474.41253662109375, + "learning_rate": 3.7778104704883405e-05, + "loss": 132.3674, + "step": 59080 + }, + { + "epoch": 0.23873107705733343, + "grad_norm": 498.4506530761719, + "learning_rate": 3.7776825299834e-05, + "loss": 81.5151, + "step": 59090 + }, + { + "epoch": 0.23877147832270107, + "grad_norm": 1089.6246337890625, + "learning_rate": 3.777554554821623e-05, + "loss": 124.9036, + "step": 59100 + }, + { + "epoch": 0.2388118795880687, + "grad_norm": 690.4824829101562, + "learning_rate": 3.777426545005505e-05, + "loss": 77.0556, + "step": 59110 + }, + { + "epoch": 0.23885228085343632, + "grad_norm": 316.0060729980469, + "learning_rate": 3.777298500537542e-05, + "loss": 129.0566, + "step": 59120 + }, + { + "epoch": 0.23889268211880396, + "grad_norm": 575.944091796875, + "learning_rate": 3.7771704214202287e-05, + "loss": 110.401, + "step": 59130 + }, + { + "epoch": 0.2389330833841716, + "grad_norm": 986.3673095703125, + "learning_rate": 3.7770423076560635e-05, + "loss": 85.8204, + "step": 59140 + }, + { + "epoch": 0.23897348464953921, + "grad_norm": 541.3056640625, + "learning_rate": 3.776914159247544e-05, + "loss": 100.5992, + "step": 59150 + }, + { + "epoch": 0.23901388591490685, + "grad_norm": 486.48236083984375, + "learning_rate": 3.776785976197168e-05, + "loss": 86.1922, + "step": 59160 + }, + { + "epoch": 0.2390542871802745, + "grad_norm": 416.504150390625, + "learning_rate": 3.776657758507434e-05, + "loss": 87.3624, + "step": 59170 + }, + { + "epoch": 0.2390946884456421, + "grad_norm": 1117.2928466796875, + "learning_rate": 3.776529506180843e-05, + "loss": 51.453, + "step": 59180 + }, + { + "epoch": 0.23913508971100975, + "grad_norm": 1148.307861328125, + "learning_rate": 3.776401219219894e-05, + "loss": 116.4751, + "step": 59190 + }, + { + "epoch": 0.2391754909763774, + "grad_norm": 617.6036376953125, + "learning_rate": 3.7762728976270897e-05, + "loss": 94.2632, + "step": 59200 + }, + { + "epoch": 0.239215892241745, + "grad_norm": 1331.6995849609375, + "learning_rate": 3.77614454140493e-05, + "loss": 89.4077, + "step": 59210 + }, + { + "epoch": 0.23925629350711264, + "grad_norm": 523.2699584960938, + "learning_rate": 3.776016150555918e-05, + "loss": 83.4866, + "step": 59220 + }, + { + "epoch": 0.23929669477248028, + "grad_norm": 1026.3717041015625, + "learning_rate": 3.775887725082557e-05, + "loss": 80.6844, + "step": 59230 + }, + { + "epoch": 0.23933709603784792, + "grad_norm": 956.5472412109375, + "learning_rate": 3.7757592649873503e-05, + "loss": 123.4441, + "step": 59240 + }, + { + "epoch": 0.23937749730321553, + "grad_norm": 579.4854125976562, + "learning_rate": 3.7756307702728026e-05, + "loss": 121.4849, + "step": 59250 + }, + { + "epoch": 0.23941789856858317, + "grad_norm": 727.5404663085938, + "learning_rate": 3.7755022409414195e-05, + "loss": 98.3299, + "step": 59260 + }, + { + "epoch": 0.2394582998339508, + "grad_norm": 880.2427978515625, + "learning_rate": 3.775373676995705e-05, + "loss": 73.5452, + "step": 59270 + }, + { + "epoch": 0.23949870109931842, + "grad_norm": 1007.5031127929688, + "learning_rate": 3.775245078438168e-05, + "loss": 109.5919, + "step": 59280 + }, + { + "epoch": 0.23953910236468606, + "grad_norm": 942.3436889648438, + "learning_rate": 3.775116445271313e-05, + "loss": 70.814, + "step": 59290 + }, + { + "epoch": 0.2395795036300537, + "grad_norm": 911.0596313476562, + "learning_rate": 3.77498777749765e-05, + "loss": 89.9307, + "step": 59300 + }, + { + "epoch": 0.23961990489542132, + "grad_norm": 440.5122985839844, + "learning_rate": 3.7748590751196854e-05, + "loss": 100.4015, + "step": 59310 + }, + { + "epoch": 0.23966030616078896, + "grad_norm": 1075.908935546875, + "learning_rate": 3.7747303381399304e-05, + "loss": 120.0661, + "step": 59320 + }, + { + "epoch": 0.2397007074261566, + "grad_norm": 3719.984375, + "learning_rate": 3.7746015665608934e-05, + "loss": 117.6895, + "step": 59330 + }, + { + "epoch": 0.2397411086915242, + "grad_norm": 1656.7984619140625, + "learning_rate": 3.774472760385085e-05, + "loss": 129.3087, + "step": 59340 + }, + { + "epoch": 0.23978150995689185, + "grad_norm": 532.5039672851562, + "learning_rate": 3.7743439196150166e-05, + "loss": 96.9342, + "step": 59350 + }, + { + "epoch": 0.2398219112222595, + "grad_norm": 623.6903076171875, + "learning_rate": 3.774215044253201e-05, + "loss": 93.4591, + "step": 59360 + }, + { + "epoch": 0.2398623124876271, + "grad_norm": 1447.830322265625, + "learning_rate": 3.774086134302148e-05, + "loss": 115.7442, + "step": 59370 + }, + { + "epoch": 0.23990271375299474, + "grad_norm": 883.28759765625, + "learning_rate": 3.773957189764373e-05, + "loss": 91.1345, + "step": 59380 + }, + { + "epoch": 0.23994311501836238, + "grad_norm": 1141.7835693359375, + "learning_rate": 3.77382821064239e-05, + "loss": 119.0018, + "step": 59390 + }, + { + "epoch": 0.23998351628373002, + "grad_norm": 788.7601928710938, + "learning_rate": 3.773699196938712e-05, + "loss": 122.936, + "step": 59400 + }, + { + "epoch": 0.24002391754909763, + "grad_norm": 800.6116943359375, + "learning_rate": 3.7735701486558555e-05, + "loss": 57.829, + "step": 59410 + }, + { + "epoch": 0.24006431881446527, + "grad_norm": 424.2896728515625, + "learning_rate": 3.773441065796335e-05, + "loss": 76.6666, + "step": 59420 + }, + { + "epoch": 0.2401047200798329, + "grad_norm": 892.7840576171875, + "learning_rate": 3.7733119483626694e-05, + "loss": 85.4513, + "step": 59430 + }, + { + "epoch": 0.24014512134520052, + "grad_norm": 565.1644287109375, + "learning_rate": 3.7731827963573734e-05, + "loss": 109.8706, + "step": 59440 + }, + { + "epoch": 0.24018552261056816, + "grad_norm": 668.5365600585938, + "learning_rate": 3.7730536097829655e-05, + "loss": 76.1042, + "step": 59450 + }, + { + "epoch": 0.2402259238759358, + "grad_norm": 836.4471435546875, + "learning_rate": 3.7729243886419656e-05, + "loss": 99.8065, + "step": 59460 + }, + { + "epoch": 0.24026632514130342, + "grad_norm": 451.7341613769531, + "learning_rate": 3.772795132936891e-05, + "loss": 58.2667, + "step": 59470 + }, + { + "epoch": 0.24030672640667106, + "grad_norm": 843.3948974609375, + "learning_rate": 3.7726658426702636e-05, + "loss": 130.5708, + "step": 59480 + }, + { + "epoch": 0.2403471276720387, + "grad_norm": 1024.7684326171875, + "learning_rate": 3.7725365178446024e-05, + "loss": 90.5274, + "step": 59490 + }, + { + "epoch": 0.2403875289374063, + "grad_norm": 936.1085815429688, + "learning_rate": 3.7724071584624296e-05, + "loss": 126.5645, + "step": 59500 + }, + { + "epoch": 0.24042793020277395, + "grad_norm": 588.0001831054688, + "learning_rate": 3.772277764526267e-05, + "loss": 108.2653, + "step": 59510 + }, + { + "epoch": 0.2404683314681416, + "grad_norm": 524.4647827148438, + "learning_rate": 3.772148336038636e-05, + "loss": 67.8994, + "step": 59520 + }, + { + "epoch": 0.2405087327335092, + "grad_norm": 368.9629211425781, + "learning_rate": 3.772018873002061e-05, + "loss": 99.1219, + "step": 59530 + }, + { + "epoch": 0.24054913399887684, + "grad_norm": 530.7007446289062, + "learning_rate": 3.7718893754190665e-05, + "loss": 101.7188, + "step": 59540 + }, + { + "epoch": 0.24058953526424448, + "grad_norm": 696.43115234375, + "learning_rate": 3.7717598432921766e-05, + "loss": 98.9038, + "step": 59550 + }, + { + "epoch": 0.24062993652961212, + "grad_norm": 616.9910278320312, + "learning_rate": 3.771630276623915e-05, + "loss": 64.1646, + "step": 59560 + }, + { + "epoch": 0.24067033779497973, + "grad_norm": 829.0867919921875, + "learning_rate": 3.77150067541681e-05, + "loss": 84.8281, + "step": 59570 + }, + { + "epoch": 0.24071073906034737, + "grad_norm": 307.4955139160156, + "learning_rate": 3.7713710396733866e-05, + "loss": 71.3296, + "step": 59580 + }, + { + "epoch": 0.240751140325715, + "grad_norm": 341.6563720703125, + "learning_rate": 3.771241369396174e-05, + "loss": 86.2545, + "step": 59590 + }, + { + "epoch": 0.24079154159108263, + "grad_norm": 764.89404296875, + "learning_rate": 3.7711116645876984e-05, + "loss": 118.327, + "step": 59600 + }, + { + "epoch": 0.24083194285645027, + "grad_norm": 759.447265625, + "learning_rate": 3.770981925250489e-05, + "loss": 129.6245, + "step": 59610 + }, + { + "epoch": 0.2408723441218179, + "grad_norm": 955.57373046875, + "learning_rate": 3.7708521513870756e-05, + "loss": 90.8778, + "step": 59620 + }, + { + "epoch": 0.24091274538718552, + "grad_norm": 286.6724853515625, + "learning_rate": 3.7707223429999874e-05, + "loss": 88.8584, + "step": 59630 + }, + { + "epoch": 0.24095314665255316, + "grad_norm": 965.59716796875, + "learning_rate": 3.770592500091755e-05, + "loss": 90.4285, + "step": 59640 + }, + { + "epoch": 0.2409935479179208, + "grad_norm": 384.2714538574219, + "learning_rate": 3.7704626226649106e-05, + "loss": 109.0705, + "step": 59650 + }, + { + "epoch": 0.2410339491832884, + "grad_norm": 515.5958251953125, + "learning_rate": 3.7703327107219866e-05, + "loss": 118.7511, + "step": 59660 + }, + { + "epoch": 0.24107435044865605, + "grad_norm": 435.495849609375, + "learning_rate": 3.770202764265514e-05, + "loss": 94.9133, + "step": 59670 + }, + { + "epoch": 0.2411147517140237, + "grad_norm": 763.4544677734375, + "learning_rate": 3.7700727832980275e-05, + "loss": 70.4356, + "step": 59680 + }, + { + "epoch": 0.2411551529793913, + "grad_norm": 384.64385986328125, + "learning_rate": 3.769942767822061e-05, + "loss": 97.882, + "step": 59690 + }, + { + "epoch": 0.24119555424475894, + "grad_norm": 851.3671264648438, + "learning_rate": 3.769812717840149e-05, + "loss": 59.9911, + "step": 59700 + }, + { + "epoch": 0.24123595551012658, + "grad_norm": 764.9544677734375, + "learning_rate": 3.7696826333548265e-05, + "loss": 108.3298, + "step": 59710 + }, + { + "epoch": 0.24127635677549422, + "grad_norm": 1363.042236328125, + "learning_rate": 3.7695525143686305e-05, + "loss": 109.0271, + "step": 59720 + }, + { + "epoch": 0.24131675804086183, + "grad_norm": 546.606201171875, + "learning_rate": 3.7694223608840966e-05, + "loss": 92.7528, + "step": 59730 + }, + { + "epoch": 0.24135715930622947, + "grad_norm": 1940.9775390625, + "learning_rate": 3.7692921729037636e-05, + "loss": 94.457, + "step": 59740 + }, + { + "epoch": 0.24139756057159711, + "grad_norm": 880.95263671875, + "learning_rate": 3.769161950430168e-05, + "loss": 81.5209, + "step": 59750 + }, + { + "epoch": 0.24143796183696473, + "grad_norm": 215.4679718017578, + "learning_rate": 3.7690316934658497e-05, + "loss": 70.5718, + "step": 59760 + }, + { + "epoch": 0.24147836310233237, + "grad_norm": 692.8617553710938, + "learning_rate": 3.768901402013348e-05, + "loss": 109.4248, + "step": 59770 + }, + { + "epoch": 0.2415187643677, + "grad_norm": 749.2047729492188, + "learning_rate": 3.7687710760752026e-05, + "loss": 121.4366, + "step": 59780 + }, + { + "epoch": 0.24155916563306762, + "grad_norm": 760.3408203125, + "learning_rate": 3.768640715653955e-05, + "loss": 105.7677, + "step": 59790 + }, + { + "epoch": 0.24159956689843526, + "grad_norm": 1907.9853515625, + "learning_rate": 3.768510320752145e-05, + "loss": 105.1958, + "step": 59800 + }, + { + "epoch": 0.2416399681638029, + "grad_norm": 1419.8045654296875, + "learning_rate": 3.768379891372316e-05, + "loss": 97.3287, + "step": 59810 + }, + { + "epoch": 0.2416803694291705, + "grad_norm": 871.6134643554688, + "learning_rate": 3.768249427517011e-05, + "loss": 52.4379, + "step": 59820 + }, + { + "epoch": 0.24172077069453815, + "grad_norm": 3427.263671875, + "learning_rate": 3.7681189291887734e-05, + "loss": 139.908, + "step": 59830 + }, + { + "epoch": 0.2417611719599058, + "grad_norm": 3192.88330078125, + "learning_rate": 3.767988396390146e-05, + "loss": 89.1294, + "step": 59840 + }, + { + "epoch": 0.2418015732252734, + "grad_norm": 917.2242431640625, + "learning_rate": 3.7678578291236756e-05, + "loss": 103.428, + "step": 59850 + }, + { + "epoch": 0.24184197449064104, + "grad_norm": 921.170654296875, + "learning_rate": 3.767727227391906e-05, + "loss": 118.0729, + "step": 59860 + }, + { + "epoch": 0.24188237575600868, + "grad_norm": 1201.132568359375, + "learning_rate": 3.7675965911973846e-05, + "loss": 114.8766, + "step": 59870 + }, + { + "epoch": 0.24192277702137632, + "grad_norm": 821.1017456054688, + "learning_rate": 3.767465920542657e-05, + "loss": 137.0163, + "step": 59880 + }, + { + "epoch": 0.24196317828674394, + "grad_norm": 847.866455078125, + "learning_rate": 3.767335215430271e-05, + "loss": 105.8519, + "step": 59890 + }, + { + "epoch": 0.24200357955211158, + "grad_norm": 706.5931396484375, + "learning_rate": 3.767204475862777e-05, + "loss": 99.6784, + "step": 59900 + }, + { + "epoch": 0.24204398081747922, + "grad_norm": 697.3872680664062, + "learning_rate": 3.76707370184272e-05, + "loss": 98.1725, + "step": 59910 + }, + { + "epoch": 0.24208438208284683, + "grad_norm": 1155.2279052734375, + "learning_rate": 3.766942893372652e-05, + "loss": 121.1948, + "step": 59920 + }, + { + "epoch": 0.24212478334821447, + "grad_norm": 1835.845458984375, + "learning_rate": 3.766812050455123e-05, + "loss": 100.7905, + "step": 59930 + }, + { + "epoch": 0.2421651846135821, + "grad_norm": 988.8313598632812, + "learning_rate": 3.766681173092683e-05, + "loss": 149.9354, + "step": 59940 + }, + { + "epoch": 0.24220558587894972, + "grad_norm": 1659.922607421875, + "learning_rate": 3.766550261287884e-05, + "loss": 114.8634, + "step": 59950 + }, + { + "epoch": 0.24224598714431736, + "grad_norm": 750.1390991210938, + "learning_rate": 3.766419315043278e-05, + "loss": 112.8856, + "step": 59960 + }, + { + "epoch": 0.242286388409685, + "grad_norm": 643.5686645507812, + "learning_rate": 3.7662883343614184e-05, + "loss": 94.5602, + "step": 59970 + }, + { + "epoch": 0.2423267896750526, + "grad_norm": 1295.0062255859375, + "learning_rate": 3.766157319244858e-05, + "loss": 138.2557, + "step": 59980 + }, + { + "epoch": 0.24236719094042025, + "grad_norm": 484.1633605957031, + "learning_rate": 3.766026269696152e-05, + "loss": 101.604, + "step": 59990 + }, + { + "epoch": 0.2424075922057879, + "grad_norm": 695.8294677734375, + "learning_rate": 3.7658951857178544e-05, + "loss": 113.1955, + "step": 60000 + }, + { + "epoch": 0.2424479934711555, + "grad_norm": 1642.4739990234375, + "learning_rate": 3.765764067312521e-05, + "loss": 113.4252, + "step": 60010 + }, + { + "epoch": 0.24248839473652314, + "grad_norm": 945.6461791992188, + "learning_rate": 3.7656329144827076e-05, + "loss": 97.8712, + "step": 60020 + }, + { + "epoch": 0.24252879600189078, + "grad_norm": 762.181640625, + "learning_rate": 3.765501727230972e-05, + "loss": 107.5911, + "step": 60030 + }, + { + "epoch": 0.24256919726725842, + "grad_norm": 506.6683349609375, + "learning_rate": 3.765370505559871e-05, + "loss": 80.9275, + "step": 60040 + }, + { + "epoch": 0.24260959853262604, + "grad_norm": 662.3472290039062, + "learning_rate": 3.765239249471964e-05, + "loss": 87.0323, + "step": 60050 + }, + { + "epoch": 0.24264999979799368, + "grad_norm": 596.1854858398438, + "learning_rate": 3.7651079589698075e-05, + "loss": 91.9281, + "step": 60060 + }, + { + "epoch": 0.24269040106336132, + "grad_norm": 743.2182006835938, + "learning_rate": 3.764976634055963e-05, + "loss": 108.7354, + "step": 60070 + }, + { + "epoch": 0.24273080232872893, + "grad_norm": 756.8488159179688, + "learning_rate": 3.764845274732992e-05, + "loss": 123.3724, + "step": 60080 + }, + { + "epoch": 0.24277120359409657, + "grad_norm": 1230.0062255859375, + "learning_rate": 3.7647138810034526e-05, + "loss": 104.4602, + "step": 60090 + }, + { + "epoch": 0.2428116048594642, + "grad_norm": 1227.811279296875, + "learning_rate": 3.764582452869907e-05, + "loss": 101.4176, + "step": 60100 + }, + { + "epoch": 0.24285200612483182, + "grad_norm": 1130.011474609375, + "learning_rate": 3.7644509903349186e-05, + "loss": 85.0605, + "step": 60110 + }, + { + "epoch": 0.24289240739019946, + "grad_norm": 1035.083984375, + "learning_rate": 3.7643194934010494e-05, + "loss": 109.7007, + "step": 60120 + }, + { + "epoch": 0.2429328086555671, + "grad_norm": 787.6973266601562, + "learning_rate": 3.7641879620708636e-05, + "loss": 106.005, + "step": 60130 + }, + { + "epoch": 0.2429732099209347, + "grad_norm": 679.463134765625, + "learning_rate": 3.764056396346925e-05, + "loss": 96.3705, + "step": 60140 + }, + { + "epoch": 0.24301361118630235, + "grad_norm": 1113.6385498046875, + "learning_rate": 3.763924796231799e-05, + "loss": 95.9024, + "step": 60150 + }, + { + "epoch": 0.24305401245167, + "grad_norm": 659.6593017578125, + "learning_rate": 3.763793161728051e-05, + "loss": 81.4323, + "step": 60160 + }, + { + "epoch": 0.2430944137170376, + "grad_norm": 1508.6103515625, + "learning_rate": 3.763661492838247e-05, + "loss": 103.3584, + "step": 60170 + }, + { + "epoch": 0.24313481498240525, + "grad_norm": 745.3189086914062, + "learning_rate": 3.763529789564955e-05, + "loss": 77.7978, + "step": 60180 + }, + { + "epoch": 0.24317521624777289, + "grad_norm": 761.920166015625, + "learning_rate": 3.7633980519107406e-05, + "loss": 100.5048, + "step": 60190 + }, + { + "epoch": 0.24321561751314053, + "grad_norm": 539.8789672851562, + "learning_rate": 3.763266279878174e-05, + "loss": 122.4004, + "step": 60200 + }, + { + "epoch": 0.24325601877850814, + "grad_norm": 750.6740112304688, + "learning_rate": 3.763134473469824e-05, + "loss": 76.9624, + "step": 60210 + }, + { + "epoch": 0.24329642004387578, + "grad_norm": 720.6984252929688, + "learning_rate": 3.7630026326882586e-05, + "loss": 81.0413, + "step": 60220 + }, + { + "epoch": 0.24333682130924342, + "grad_norm": 339.6478271484375, + "learning_rate": 3.76287075753605e-05, + "loss": 129.901, + "step": 60230 + }, + { + "epoch": 0.24337722257461103, + "grad_norm": 1518.2181396484375, + "learning_rate": 3.762738848015768e-05, + "loss": 105.6377, + "step": 60240 + }, + { + "epoch": 0.24341762383997867, + "grad_norm": 698.3955688476562, + "learning_rate": 3.7626069041299847e-05, + "loss": 83.3728, + "step": 60250 + }, + { + "epoch": 0.2434580251053463, + "grad_norm": 1030.1087646484375, + "learning_rate": 3.7624749258812726e-05, + "loss": 153.4841, + "step": 60260 + }, + { + "epoch": 0.24349842637071392, + "grad_norm": 699.5809326171875, + "learning_rate": 3.762342913272204e-05, + "loss": 81.6518, + "step": 60270 + }, + { + "epoch": 0.24353882763608156, + "grad_norm": 756.538818359375, + "learning_rate": 3.762210866305354e-05, + "loss": 116.947, + "step": 60280 + }, + { + "epoch": 0.2435792289014492, + "grad_norm": 484.88226318359375, + "learning_rate": 3.762078784983294e-05, + "loss": 60.6095, + "step": 60290 + }, + { + "epoch": 0.24361963016681681, + "grad_norm": 688.47119140625, + "learning_rate": 3.7619466693086025e-05, + "loss": 97.4146, + "step": 60300 + }, + { + "epoch": 0.24366003143218445, + "grad_norm": 687.4910888671875, + "learning_rate": 3.761814519283853e-05, + "loss": 95.3711, + "step": 60310 + }, + { + "epoch": 0.2437004326975521, + "grad_norm": 1240.6036376953125, + "learning_rate": 3.761682334911623e-05, + "loss": 66.8396, + "step": 60320 + }, + { + "epoch": 0.2437408339629197, + "grad_norm": 492.3941955566406, + "learning_rate": 3.761550116194488e-05, + "loss": 96.3623, + "step": 60330 + }, + { + "epoch": 0.24378123522828735, + "grad_norm": 576.4376220703125, + "learning_rate": 3.7614178631350274e-05, + "loss": 77.44, + "step": 60340 + }, + { + "epoch": 0.243821636493655, + "grad_norm": 581.4568481445312, + "learning_rate": 3.761285575735818e-05, + "loss": 85.7667, + "step": 60350 + }, + { + "epoch": 0.24386203775902263, + "grad_norm": 1075.5167236328125, + "learning_rate": 3.7611532539994405e-05, + "loss": 124.7682, + "step": 60360 + }, + { + "epoch": 0.24390243902439024, + "grad_norm": 829.4497680664062, + "learning_rate": 3.7610208979284724e-05, + "loss": 84.7166, + "step": 60370 + }, + { + "epoch": 0.24394284028975788, + "grad_norm": 911.6157836914062, + "learning_rate": 3.7608885075254965e-05, + "loss": 83.7194, + "step": 60380 + }, + { + "epoch": 0.24398324155512552, + "grad_norm": 681.8450317382812, + "learning_rate": 3.760756082793092e-05, + "loss": 76.5257, + "step": 60390 + }, + { + "epoch": 0.24402364282049313, + "grad_norm": 647.01416015625, + "learning_rate": 3.7606236237338406e-05, + "loss": 68.6924, + "step": 60400 + }, + { + "epoch": 0.24406404408586077, + "grad_norm": 868.0816040039062, + "learning_rate": 3.7604911303503255e-05, + "loss": 112.0904, + "step": 60410 + }, + { + "epoch": 0.2441044453512284, + "grad_norm": 965.8763427734375, + "learning_rate": 3.7603586026451296e-05, + "loss": 95.5411, + "step": 60420 + }, + { + "epoch": 0.24414484661659602, + "grad_norm": 1799.1202392578125, + "learning_rate": 3.760226040620837e-05, + "loss": 149.6658, + "step": 60430 + }, + { + "epoch": 0.24418524788196366, + "grad_norm": 823.300048828125, + "learning_rate": 3.760093444280031e-05, + "loss": 80.7894, + "step": 60440 + }, + { + "epoch": 0.2442256491473313, + "grad_norm": 721.2788696289062, + "learning_rate": 3.7599608136252975e-05, + "loss": 111.0317, + "step": 60450 + }, + { + "epoch": 0.24426605041269892, + "grad_norm": 576.7468872070312, + "learning_rate": 3.759828148659221e-05, + "loss": 101.6799, + "step": 60460 + }, + { + "epoch": 0.24430645167806656, + "grad_norm": 413.9503173828125, + "learning_rate": 3.759695449384389e-05, + "loss": 63.5257, + "step": 60470 + }, + { + "epoch": 0.2443468529434342, + "grad_norm": 1131.601806640625, + "learning_rate": 3.7595627158033895e-05, + "loss": 86.9528, + "step": 60480 + }, + { + "epoch": 0.2443872542088018, + "grad_norm": 598.8096313476562, + "learning_rate": 3.759429947918808e-05, + "loss": 122.1968, + "step": 60490 + }, + { + "epoch": 0.24442765547416945, + "grad_norm": 902.0849609375, + "learning_rate": 3.759297145733234e-05, + "loss": 85.4886, + "step": 60500 + }, + { + "epoch": 0.2444680567395371, + "grad_norm": 412.18902587890625, + "learning_rate": 3.7591643092492554e-05, + "loss": 72.1288, + "step": 60510 + }, + { + "epoch": 0.24450845800490473, + "grad_norm": 1041.95166015625, + "learning_rate": 3.759031438469464e-05, + "loss": 112.656, + "step": 60520 + }, + { + "epoch": 0.24454885927027234, + "grad_norm": 1129.0216064453125, + "learning_rate": 3.7588985333964486e-05, + "loss": 74.794, + "step": 60530 + }, + { + "epoch": 0.24458926053563998, + "grad_norm": 825.4546508789062, + "learning_rate": 3.758765594032801e-05, + "loss": 133.8562, + "step": 60540 + }, + { + "epoch": 0.24462966180100762, + "grad_norm": 1044.1409912109375, + "learning_rate": 3.7586326203811124e-05, + "loss": 140.7445, + "step": 60550 + }, + { + "epoch": 0.24467006306637523, + "grad_norm": 839.1287231445312, + "learning_rate": 3.758499612443976e-05, + "loss": 88.4545, + "step": 60560 + }, + { + "epoch": 0.24471046433174287, + "grad_norm": 430.0978698730469, + "learning_rate": 3.758366570223984e-05, + "loss": 53.6207, + "step": 60570 + }, + { + "epoch": 0.2447508655971105, + "grad_norm": 974.6571655273438, + "learning_rate": 3.758233493723731e-05, + "loss": 93.5173, + "step": 60580 + }, + { + "epoch": 0.24479126686247812, + "grad_norm": 616.6634521484375, + "learning_rate": 3.7581003829458104e-05, + "loss": 123.1613, + "step": 60590 + }, + { + "epoch": 0.24483166812784576, + "grad_norm": 382.3331604003906, + "learning_rate": 3.757967237892818e-05, + "loss": 73.9521, + "step": 60600 + }, + { + "epoch": 0.2448720693932134, + "grad_norm": 645.8353881835938, + "learning_rate": 3.757834058567348e-05, + "loss": 84.9366, + "step": 60610 + }, + { + "epoch": 0.24491247065858102, + "grad_norm": 540.4635620117188, + "learning_rate": 3.757700844971999e-05, + "loss": 44.7494, + "step": 60620 + }, + { + "epoch": 0.24495287192394866, + "grad_norm": 474.0079650878906, + "learning_rate": 3.7575675971093674e-05, + "loss": 157.1437, + "step": 60630 + }, + { + "epoch": 0.2449932731893163, + "grad_norm": 796.4700927734375, + "learning_rate": 3.75743431498205e-05, + "loss": 119.1533, + "step": 60640 + }, + { + "epoch": 0.2450336744546839, + "grad_norm": 582.9804077148438, + "learning_rate": 3.757300998592646e-05, + "loss": 68.5942, + "step": 60650 + }, + { + "epoch": 0.24507407572005155, + "grad_norm": 582.134521484375, + "learning_rate": 3.757167647943755e-05, + "loss": 54.1289, + "step": 60660 + }, + { + "epoch": 0.2451144769854192, + "grad_norm": 750.4342041015625, + "learning_rate": 3.757034263037975e-05, + "loss": 107.3885, + "step": 60670 + }, + { + "epoch": 0.24515487825078683, + "grad_norm": 735.1005859375, + "learning_rate": 3.756900843877908e-05, + "loss": 93.072, + "step": 60680 + }, + { + "epoch": 0.24519527951615444, + "grad_norm": 1535.9774169921875, + "learning_rate": 3.756767390466154e-05, + "loss": 86.963, + "step": 60690 + }, + { + "epoch": 0.24523568078152208, + "grad_norm": 1317.6925048828125, + "learning_rate": 3.756633902805316e-05, + "loss": 133.0494, + "step": 60700 + }, + { + "epoch": 0.24527608204688972, + "grad_norm": 731.9415283203125, + "learning_rate": 3.756500380897995e-05, + "loss": 98.6184, + "step": 60710 + }, + { + "epoch": 0.24531648331225733, + "grad_norm": 981.7581787109375, + "learning_rate": 3.756366824746795e-05, + "loss": 142.0991, + "step": 60720 + }, + { + "epoch": 0.24535688457762497, + "grad_norm": 693.9589233398438, + "learning_rate": 3.75623323435432e-05, + "loss": 101.7523, + "step": 60730 + }, + { + "epoch": 0.2453972858429926, + "grad_norm": 1234.9918212890625, + "learning_rate": 3.7560996097231736e-05, + "loss": 100.7203, + "step": 60740 + }, + { + "epoch": 0.24543768710836023, + "grad_norm": 865.1453857421875, + "learning_rate": 3.755965950855961e-05, + "loss": 88.1825, + "step": 60750 + }, + { + "epoch": 0.24547808837372787, + "grad_norm": 1068.69970703125, + "learning_rate": 3.755832257755289e-05, + "loss": 81.9904, + "step": 60760 + }, + { + "epoch": 0.2455184896390955, + "grad_norm": 1053.095947265625, + "learning_rate": 3.7556985304237625e-05, + "loss": 141.8503, + "step": 60770 + }, + { + "epoch": 0.24555889090446312, + "grad_norm": 1196.3419189453125, + "learning_rate": 3.755564768863989e-05, + "loss": 97.1368, + "step": 60780 + }, + { + "epoch": 0.24559929216983076, + "grad_norm": 773.222412109375, + "learning_rate": 3.7554309730785765e-05, + "loss": 118.5934, + "step": 60790 + }, + { + "epoch": 0.2456396934351984, + "grad_norm": 2098.968017578125, + "learning_rate": 3.7552971430701344e-05, + "loss": 100.0156, + "step": 60800 + }, + { + "epoch": 0.245680094700566, + "grad_norm": 716.4620361328125, + "learning_rate": 3.75516327884127e-05, + "loss": 80.9717, + "step": 60810 + }, + { + "epoch": 0.24572049596593365, + "grad_norm": 1017.4884033203125, + "learning_rate": 3.755029380394594e-05, + "loss": 117.0032, + "step": 60820 + }, + { + "epoch": 0.2457608972313013, + "grad_norm": 588.8272094726562, + "learning_rate": 3.7548954477327166e-05, + "loss": 134.0433, + "step": 60830 + }, + { + "epoch": 0.2458012984966689, + "grad_norm": 976.3790283203125, + "learning_rate": 3.754761480858249e-05, + "loss": 99.596, + "step": 60840 + }, + { + "epoch": 0.24584169976203654, + "grad_norm": 579.171630859375, + "learning_rate": 3.7546274797738034e-05, + "loss": 73.961, + "step": 60850 + }, + { + "epoch": 0.24588210102740418, + "grad_norm": 1049.4588623046875, + "learning_rate": 3.7544934444819915e-05, + "loss": 158.1656, + "step": 60860 + }, + { + "epoch": 0.24592250229277182, + "grad_norm": 313.2977600097656, + "learning_rate": 3.754359374985426e-05, + "loss": 116.6254, + "step": 60870 + }, + { + "epoch": 0.24596290355813943, + "grad_norm": 1421.011962890625, + "learning_rate": 3.7542252712867214e-05, + "loss": 134.4884, + "step": 60880 + }, + { + "epoch": 0.24600330482350707, + "grad_norm": 706.0517578125, + "learning_rate": 3.7540911333884926e-05, + "loss": 100.695, + "step": 60890 + }, + { + "epoch": 0.24604370608887471, + "grad_norm": 1243.3876953125, + "learning_rate": 3.7539569612933536e-05, + "loss": 137.7799, + "step": 60900 + }, + { + "epoch": 0.24608410735424233, + "grad_norm": 727.9938354492188, + "learning_rate": 3.753822755003921e-05, + "loss": 136.4738, + "step": 60910 + }, + { + "epoch": 0.24612450861960997, + "grad_norm": 982.3226318359375, + "learning_rate": 3.753688514522811e-05, + "loss": 104.4303, + "step": 60920 + }, + { + "epoch": 0.2461649098849776, + "grad_norm": 547.1356201171875, + "learning_rate": 3.75355423985264e-05, + "loss": 116.0822, + "step": 60930 + }, + { + "epoch": 0.24620531115034522, + "grad_norm": 2842.302490234375, + "learning_rate": 3.753419930996026e-05, + "loss": 112.774, + "step": 60940 + }, + { + "epoch": 0.24624571241571286, + "grad_norm": 621.8200073242188, + "learning_rate": 3.7532855879555887e-05, + "loss": 96.5125, + "step": 60950 + }, + { + "epoch": 0.2462861136810805, + "grad_norm": 562.8447265625, + "learning_rate": 3.753151210733946e-05, + "loss": 71.9968, + "step": 60960 + }, + { + "epoch": 0.2463265149464481, + "grad_norm": 792.779052734375, + "learning_rate": 3.753016799333717e-05, + "loss": 86.5457, + "step": 60970 + }, + { + "epoch": 0.24636691621181575, + "grad_norm": 673.2649536132812, + "learning_rate": 3.752882353757524e-05, + "loss": 83.9432, + "step": 60980 + }, + { + "epoch": 0.2464073174771834, + "grad_norm": 713.5126953125, + "learning_rate": 3.752747874007987e-05, + "loss": 83.6294, + "step": 60990 + }, + { + "epoch": 0.246447718742551, + "grad_norm": 261.3819885253906, + "learning_rate": 3.7526133600877275e-05, + "loss": 82.074, + "step": 61000 + }, + { + "epoch": 0.24648812000791864, + "grad_norm": 1107.83203125, + "learning_rate": 3.7524788119993687e-05, + "loss": 177.1709, + "step": 61010 + }, + { + "epoch": 0.24652852127328628, + "grad_norm": 632.9651489257812, + "learning_rate": 3.752344229745532e-05, + "loss": 133.4072, + "step": 61020 + }, + { + "epoch": 0.24656892253865392, + "grad_norm": 840.670654296875, + "learning_rate": 3.7522096133288434e-05, + "loss": 84.6038, + "step": 61030 + }, + { + "epoch": 0.24660932380402154, + "grad_norm": 1128.57177734375, + "learning_rate": 3.752074962751926e-05, + "loss": 140.4576, + "step": 61040 + }, + { + "epoch": 0.24664972506938918, + "grad_norm": 695.4209594726562, + "learning_rate": 3.751940278017405e-05, + "loss": 66.8642, + "step": 61050 + }, + { + "epoch": 0.24669012633475682, + "grad_norm": 429.568115234375, + "learning_rate": 3.751805559127907e-05, + "loss": 75.6144, + "step": 61060 + }, + { + "epoch": 0.24673052760012443, + "grad_norm": 697.20703125, + "learning_rate": 3.751670806086058e-05, + "loss": 69.8539, + "step": 61070 + }, + { + "epoch": 0.24677092886549207, + "grad_norm": 776.1934204101562, + "learning_rate": 3.7515360188944835e-05, + "loss": 101.1446, + "step": 61080 + }, + { + "epoch": 0.2468113301308597, + "grad_norm": 740.6940307617188, + "learning_rate": 3.751401197555813e-05, + "loss": 99.3996, + "step": 61090 + }, + { + "epoch": 0.24685173139622732, + "grad_norm": 689.2658081054688, + "learning_rate": 3.751266342072675e-05, + "loss": 95.1718, + "step": 61100 + }, + { + "epoch": 0.24689213266159496, + "grad_norm": 921.92529296875, + "learning_rate": 3.751131452447697e-05, + "loss": 125.7502, + "step": 61110 + }, + { + "epoch": 0.2469325339269626, + "grad_norm": 1019.453369140625, + "learning_rate": 3.7509965286835106e-05, + "loss": 122.8944, + "step": 61120 + }, + { + "epoch": 0.2469729351923302, + "grad_norm": 613.3357543945312, + "learning_rate": 3.7508615707827456e-05, + "loss": 93.4595, + "step": 61130 + }, + { + "epoch": 0.24701333645769785, + "grad_norm": 211.00929260253906, + "learning_rate": 3.750726578748032e-05, + "loss": 98.3001, + "step": 61140 + }, + { + "epoch": 0.2470537377230655, + "grad_norm": 865.328857421875, + "learning_rate": 3.750591552582003e-05, + "loss": 169.1905, + "step": 61150 + }, + { + "epoch": 0.2470941389884331, + "grad_norm": 549.3242797851562, + "learning_rate": 3.750456492287291e-05, + "loss": 127.9316, + "step": 61160 + }, + { + "epoch": 0.24713454025380074, + "grad_norm": 555.203369140625, + "learning_rate": 3.7503213978665275e-05, + "loss": 125.311, + "step": 61170 + }, + { + "epoch": 0.24717494151916838, + "grad_norm": 659.7843627929688, + "learning_rate": 3.750186269322348e-05, + "loss": 119.1043, + "step": 61180 + }, + { + "epoch": 0.24721534278453602, + "grad_norm": 760.2193603515625, + "learning_rate": 3.750051106657386e-05, + "loss": 85.6839, + "step": 61190 + }, + { + "epoch": 0.24725574404990364, + "grad_norm": 1229.86376953125, + "learning_rate": 3.749915909874276e-05, + "loss": 113.2912, + "step": 61200 + }, + { + "epoch": 0.24729614531527128, + "grad_norm": 751.037841796875, + "learning_rate": 3.749780678975655e-05, + "loss": 85.3707, + "step": 61210 + }, + { + "epoch": 0.24733654658063892, + "grad_norm": 508.0979309082031, + "learning_rate": 3.7496454139641584e-05, + "loss": 78.9569, + "step": 61220 + }, + { + "epoch": 0.24737694784600653, + "grad_norm": 873.2188110351562, + "learning_rate": 3.749510114842424e-05, + "loss": 97.3054, + "step": 61230 + }, + { + "epoch": 0.24741734911137417, + "grad_norm": 475.1913146972656, + "learning_rate": 3.7493747816130887e-05, + "loss": 120.0739, + "step": 61240 + }, + { + "epoch": 0.2474577503767418, + "grad_norm": 647.6439819335938, + "learning_rate": 3.749239414278792e-05, + "loss": 116.7733, + "step": 61250 + }, + { + "epoch": 0.24749815164210942, + "grad_norm": 1656.1588134765625, + "learning_rate": 3.749104012842172e-05, + "loss": 118.9298, + "step": 61260 + }, + { + "epoch": 0.24753855290747706, + "grad_norm": 890.5503540039062, + "learning_rate": 3.748968577305869e-05, + "loss": 115.8407, + "step": 61270 + }, + { + "epoch": 0.2475789541728447, + "grad_norm": 1403.031494140625, + "learning_rate": 3.748833107672523e-05, + "loss": 100.7257, + "step": 61280 + }, + { + "epoch": 0.2476193554382123, + "grad_norm": 625.96044921875, + "learning_rate": 3.748697603944775e-05, + "loss": 106.4968, + "step": 61290 + }, + { + "epoch": 0.24765975670357995, + "grad_norm": 275.9015808105469, + "learning_rate": 3.7485620661252676e-05, + "loss": 62.4005, + "step": 61300 + }, + { + "epoch": 0.2477001579689476, + "grad_norm": 680.1389770507812, + "learning_rate": 3.748426494216643e-05, + "loss": 96.2172, + "step": 61310 + }, + { + "epoch": 0.2477405592343152, + "grad_norm": 956.2650756835938, + "learning_rate": 3.748290888221542e-05, + "loss": 112.2521, + "step": 61320 + }, + { + "epoch": 0.24778096049968285, + "grad_norm": 525.1364135742188, + "learning_rate": 3.748155248142611e-05, + "loss": 113.7486, + "step": 61330 + }, + { + "epoch": 0.24782136176505049, + "grad_norm": 597.5513916015625, + "learning_rate": 3.748019573982493e-05, + "loss": 94.5298, + "step": 61340 + }, + { + "epoch": 0.24786176303041813, + "grad_norm": 658.5269165039062, + "learning_rate": 3.747883865743834e-05, + "loss": 141.3896, + "step": 61350 + }, + { + "epoch": 0.24790216429578574, + "grad_norm": 721.6571655273438, + "learning_rate": 3.747748123429279e-05, + "loss": 65.8589, + "step": 61360 + }, + { + "epoch": 0.24794256556115338, + "grad_norm": 900.7858276367188, + "learning_rate": 3.7476123470414745e-05, + "loss": 94.124, + "step": 61370 + }, + { + "epoch": 0.24798296682652102, + "grad_norm": 1400.7100830078125, + "learning_rate": 3.747476536583068e-05, + "loss": 107.1088, + "step": 61380 + }, + { + "epoch": 0.24802336809188863, + "grad_norm": 658.3121337890625, + "learning_rate": 3.747340692056706e-05, + "loss": 128.5653, + "step": 61390 + }, + { + "epoch": 0.24806376935725627, + "grad_norm": 796.2944946289062, + "learning_rate": 3.7472048134650376e-05, + "loss": 141.5945, + "step": 61400 + }, + { + "epoch": 0.2481041706226239, + "grad_norm": 739.2470703125, + "learning_rate": 3.747068900810712e-05, + "loss": 92.3581, + "step": 61410 + }, + { + "epoch": 0.24814457188799152, + "grad_norm": 1669.0, + "learning_rate": 3.7469329540963795e-05, + "loss": 77.034, + "step": 61420 + }, + { + "epoch": 0.24818497315335916, + "grad_norm": 920.5509643554688, + "learning_rate": 3.746796973324689e-05, + "loss": 120.6616, + "step": 61430 + }, + { + "epoch": 0.2482253744187268, + "grad_norm": 1456.6912841796875, + "learning_rate": 3.746660958498292e-05, + "loss": 105.1577, + "step": 61440 + }, + { + "epoch": 0.24826577568409441, + "grad_norm": 701.8552856445312, + "learning_rate": 3.746524909619841e-05, + "loss": 92.5675, + "step": 61450 + }, + { + "epoch": 0.24830617694946205, + "grad_norm": 1342.8740234375, + "learning_rate": 3.746388826691987e-05, + "loss": 83.689, + "step": 61460 + }, + { + "epoch": 0.2483465782148297, + "grad_norm": 1680.129638671875, + "learning_rate": 3.746252709717384e-05, + "loss": 86.3869, + "step": 61470 + }, + { + "epoch": 0.2483869794801973, + "grad_norm": 876.2835083007812, + "learning_rate": 3.746116558698686e-05, + "loss": 72.938, + "step": 61480 + }, + { + "epoch": 0.24842738074556495, + "grad_norm": 472.8554992675781, + "learning_rate": 3.745980373638546e-05, + "loss": 80.2662, + "step": 61490 + }, + { + "epoch": 0.2484677820109326, + "grad_norm": 726.5430908203125, + "learning_rate": 3.745844154539619e-05, + "loss": 149.4269, + "step": 61500 + }, + { + "epoch": 0.24850818327630023, + "grad_norm": 886.1257934570312, + "learning_rate": 3.745707901404563e-05, + "loss": 113.5898, + "step": 61510 + }, + { + "epoch": 0.24854858454166784, + "grad_norm": 1236.3870849609375, + "learning_rate": 3.7455716142360324e-05, + "loss": 127.7425, + "step": 61520 + }, + { + "epoch": 0.24858898580703548, + "grad_norm": 428.8044128417969, + "learning_rate": 3.7454352930366834e-05, + "loss": 183.9209, + "step": 61530 + }, + { + "epoch": 0.24862938707240312, + "grad_norm": 711.9517822265625, + "learning_rate": 3.7452989378091755e-05, + "loss": 117.0259, + "step": 61540 + }, + { + "epoch": 0.24866978833777073, + "grad_norm": 386.4212951660156, + "learning_rate": 3.7451625485561665e-05, + "loss": 69.9448, + "step": 61550 + }, + { + "epoch": 0.24871018960313837, + "grad_norm": 1698.6722412109375, + "learning_rate": 3.7450261252803146e-05, + "loss": 135.4981, + "step": 61560 + }, + { + "epoch": 0.248750590868506, + "grad_norm": 663.17041015625, + "learning_rate": 3.74488966798428e-05, + "loss": 67.0008, + "step": 61570 + }, + { + "epoch": 0.24879099213387362, + "grad_norm": 652.9514770507812, + "learning_rate": 3.744753176670724e-05, + "loss": 108.62, + "step": 61580 + }, + { + "epoch": 0.24883139339924126, + "grad_norm": 661.67626953125, + "learning_rate": 3.744616651342305e-05, + "loss": 112.2802, + "step": 61590 + }, + { + "epoch": 0.2488717946646089, + "grad_norm": 982.057373046875, + "learning_rate": 3.7444800920016875e-05, + "loss": 111.2677, + "step": 61600 + }, + { + "epoch": 0.24891219592997652, + "grad_norm": 953.5374145507812, + "learning_rate": 3.744343498651532e-05, + "loss": 76.0339, + "step": 61610 + }, + { + "epoch": 0.24895259719534416, + "grad_norm": 873.2127075195312, + "learning_rate": 3.744206871294502e-05, + "loss": 106.2532, + "step": 61620 + }, + { + "epoch": 0.2489929984607118, + "grad_norm": 574.3665161132812, + "learning_rate": 3.744070209933261e-05, + "loss": 105.9014, + "step": 61630 + }, + { + "epoch": 0.2490333997260794, + "grad_norm": 711.326171875, + "learning_rate": 3.743933514570473e-05, + "loss": 101.1488, + "step": 61640 + }, + { + "epoch": 0.24907380099144705, + "grad_norm": 750.7244873046875, + "learning_rate": 3.743796785208804e-05, + "loss": 96.9205, + "step": 61650 + }, + { + "epoch": 0.2491142022568147, + "grad_norm": 685.7415771484375, + "learning_rate": 3.743660021850919e-05, + "loss": 78.9617, + "step": 61660 + }, + { + "epoch": 0.24915460352218233, + "grad_norm": 873.5574951171875, + "learning_rate": 3.743523224499483e-05, + "loss": 92.8822, + "step": 61670 + }, + { + "epoch": 0.24919500478754994, + "grad_norm": 1423.2413330078125, + "learning_rate": 3.743386393157165e-05, + "loss": 84.1565, + "step": 61680 + }, + { + "epoch": 0.24923540605291758, + "grad_norm": 653.9383544921875, + "learning_rate": 3.743249527826632e-05, + "loss": 48.7502, + "step": 61690 + }, + { + "epoch": 0.24927580731828522, + "grad_norm": 339.85498046875, + "learning_rate": 3.7431126285105516e-05, + "loss": 171.408, + "step": 61700 + }, + { + "epoch": 0.24931620858365283, + "grad_norm": 694.3819580078125, + "learning_rate": 3.742975695211593e-05, + "loss": 111.7853, + "step": 61710 + }, + { + "epoch": 0.24935660984902047, + "grad_norm": 549.6668090820312, + "learning_rate": 3.742838727932426e-05, + "loss": 139.9367, + "step": 61720 + }, + { + "epoch": 0.2493970111143881, + "grad_norm": 343.2468566894531, + "learning_rate": 3.74270172667572e-05, + "loss": 146.8776, + "step": 61730 + }, + { + "epoch": 0.24943741237975572, + "grad_norm": 457.4078369140625, + "learning_rate": 3.742564691444147e-05, + "loss": 103.1195, + "step": 61740 + }, + { + "epoch": 0.24947781364512336, + "grad_norm": 1013.33740234375, + "learning_rate": 3.7424276222403795e-05, + "loss": 101.6543, + "step": 61750 + }, + { + "epoch": 0.249518214910491, + "grad_norm": 841.10693359375, + "learning_rate": 3.742290519067087e-05, + "loss": 122.9554, + "step": 61760 + }, + { + "epoch": 0.24955861617585862, + "grad_norm": 537.203125, + "learning_rate": 3.742153381926945e-05, + "loss": 79.4424, + "step": 61770 + }, + { + "epoch": 0.24959901744122626, + "grad_norm": 634.1030883789062, + "learning_rate": 3.742016210822624e-05, + "loss": 113.7417, + "step": 61780 + }, + { + "epoch": 0.2496394187065939, + "grad_norm": 416.3699035644531, + "learning_rate": 3.741879005756802e-05, + "loss": 136.1241, + "step": 61790 + }, + { + "epoch": 0.2496798199719615, + "grad_norm": 1251.6590576171875, + "learning_rate": 3.741741766732151e-05, + "loss": 122.564, + "step": 61800 + }, + { + "epoch": 0.24972022123732915, + "grad_norm": 792.7076416015625, + "learning_rate": 3.741604493751348e-05, + "loss": 102.8169, + "step": 61810 + }, + { + "epoch": 0.2497606225026968, + "grad_norm": 1038.4208984375, + "learning_rate": 3.741467186817068e-05, + "loss": 164.6049, + "step": 61820 + }, + { + "epoch": 0.24980102376806443, + "grad_norm": 1214.204833984375, + "learning_rate": 3.7413298459319897e-05, + "loss": 131.7073, + "step": 61830 + }, + { + "epoch": 0.24984142503343204, + "grad_norm": 833.353271484375, + "learning_rate": 3.741192471098789e-05, + "loss": 92.2617, + "step": 61840 + }, + { + "epoch": 0.24988182629879968, + "grad_norm": 434.82427978515625, + "learning_rate": 3.741055062320145e-05, + "loss": 45.6773, + "step": 61850 + }, + { + "epoch": 0.24992222756416732, + "grad_norm": 1044.83935546875, + "learning_rate": 3.740917619598736e-05, + "loss": 106.127, + "step": 61860 + }, + { + "epoch": 0.24996262882953493, + "grad_norm": 813.526611328125, + "learning_rate": 3.740780142937241e-05, + "loss": 83.1512, + "step": 61870 + }, + { + "epoch": 0.25000303009490255, + "grad_norm": 410.4111633300781, + "learning_rate": 3.7406426323383416e-05, + "loss": 114.8819, + "step": 61880 + }, + { + "epoch": 0.2500434313602702, + "grad_norm": 517.940185546875, + "learning_rate": 3.740505087804718e-05, + "loss": 67.3328, + "step": 61890 + }, + { + "epoch": 0.2500838326256378, + "grad_norm": 488.0714416503906, + "learning_rate": 3.740367509339052e-05, + "loss": 115.9478, + "step": 61900 + }, + { + "epoch": 0.25012423389100547, + "grad_norm": 1162.9671630859375, + "learning_rate": 3.740229896944025e-05, + "loss": 81.5974, + "step": 61910 + }, + { + "epoch": 0.2501646351563731, + "grad_norm": 893.83837890625, + "learning_rate": 3.74009225062232e-05, + "loss": 119.5683, + "step": 61920 + }, + { + "epoch": 0.25020503642174075, + "grad_norm": 774.9286499023438, + "learning_rate": 3.739954570376621e-05, + "loss": 71.9548, + "step": 61930 + }, + { + "epoch": 0.2502454376871084, + "grad_norm": 1464.76025390625, + "learning_rate": 3.7398168562096115e-05, + "loss": 161.9588, + "step": 61940 + }, + { + "epoch": 0.25028583895247597, + "grad_norm": 1013.6892700195312, + "learning_rate": 3.7396791081239766e-05, + "loss": 71.8219, + "step": 61950 + }, + { + "epoch": 0.2503262402178436, + "grad_norm": 1112.704345703125, + "learning_rate": 3.7395413261224026e-05, + "loss": 77.2744, + "step": 61960 + }, + { + "epoch": 0.25036664148321125, + "grad_norm": 651.5643920898438, + "learning_rate": 3.739403510207574e-05, + "loss": 80.9204, + "step": 61970 + }, + { + "epoch": 0.2504070427485789, + "grad_norm": 901.9815063476562, + "learning_rate": 3.7392656603821794e-05, + "loss": 85.5091, + "step": 61980 + }, + { + "epoch": 0.25044744401394653, + "grad_norm": 822.910400390625, + "learning_rate": 3.7391277766489044e-05, + "loss": 92.6805, + "step": 61990 + }, + { + "epoch": 0.25048784527931417, + "grad_norm": 888.1629638671875, + "learning_rate": 3.738989859010438e-05, + "loss": 128.3323, + "step": 62000 + }, + { + "epoch": 0.25052824654468175, + "grad_norm": 4081.058837890625, + "learning_rate": 3.73885190746947e-05, + "loss": 144.4782, + "step": 62010 + }, + { + "epoch": 0.2505686478100494, + "grad_norm": 757.1979370117188, + "learning_rate": 3.738713922028688e-05, + "loss": 75.5786, + "step": 62020 + }, + { + "epoch": 0.25060904907541703, + "grad_norm": 928.4437866210938, + "learning_rate": 3.7385759026907836e-05, + "loss": 99.6654, + "step": 62030 + }, + { + "epoch": 0.2506494503407847, + "grad_norm": 896.1304321289062, + "learning_rate": 3.738437849458446e-05, + "loss": 123.2424, + "step": 62040 + }, + { + "epoch": 0.2506898516061523, + "grad_norm": 1034.583740234375, + "learning_rate": 3.738299762334368e-05, + "loss": 137.8725, + "step": 62050 + }, + { + "epoch": 0.25073025287151995, + "grad_norm": 567.015380859375, + "learning_rate": 3.738161641321242e-05, + "loss": 89.3944, + "step": 62060 + }, + { + "epoch": 0.2507706541368876, + "grad_norm": 508.46185302734375, + "learning_rate": 3.7380234864217584e-05, + "loss": 105.066, + "step": 62070 + }, + { + "epoch": 0.2508110554022552, + "grad_norm": 634.5106811523438, + "learning_rate": 3.737885297638613e-05, + "loss": 88.7158, + "step": 62080 + }, + { + "epoch": 0.2508514566676228, + "grad_norm": 794.8460083007812, + "learning_rate": 3.737747074974498e-05, + "loss": 91.4644, + "step": 62090 + }, + { + "epoch": 0.25089185793299046, + "grad_norm": 563.3334350585938, + "learning_rate": 3.737608818432111e-05, + "loss": 83.2723, + "step": 62100 + }, + { + "epoch": 0.2509322591983581, + "grad_norm": 359.7083435058594, + "learning_rate": 3.737470528014145e-05, + "loss": 82.7567, + "step": 62110 + }, + { + "epoch": 0.25097266046372574, + "grad_norm": 618.590087890625, + "learning_rate": 3.7373322037232956e-05, + "loss": 122.4383, + "step": 62120 + }, + { + "epoch": 0.2510130617290934, + "grad_norm": 954.675537109375, + "learning_rate": 3.737193845562261e-05, + "loss": 111.8171, + "step": 62130 + }, + { + "epoch": 0.25105346299446096, + "grad_norm": 571.9029541015625, + "learning_rate": 3.737055453533738e-05, + "loss": 70.5844, + "step": 62140 + }, + { + "epoch": 0.2510938642598286, + "grad_norm": 1008.9256591796875, + "learning_rate": 3.7369170276404245e-05, + "loss": 105.918, + "step": 62150 + }, + { + "epoch": 0.25113426552519624, + "grad_norm": 869.2931518554688, + "learning_rate": 3.7367785678850196e-05, + "loss": 99.9044, + "step": 62160 + }, + { + "epoch": 0.2511746667905639, + "grad_norm": 895.7987060546875, + "learning_rate": 3.736640074270222e-05, + "loss": 86.2093, + "step": 62170 + }, + { + "epoch": 0.2512150680559315, + "grad_norm": 1053.09130859375, + "learning_rate": 3.736501546798732e-05, + "loss": 98.9663, + "step": 62180 + }, + { + "epoch": 0.25125546932129916, + "grad_norm": 1025.1368408203125, + "learning_rate": 3.7363629854732506e-05, + "loss": 99.1653, + "step": 62190 + }, + { + "epoch": 0.25129587058666675, + "grad_norm": 1256.1749267578125, + "learning_rate": 3.736224390296479e-05, + "loss": 110.8187, + "step": 62200 + }, + { + "epoch": 0.2513362718520344, + "grad_norm": 679.8109741210938, + "learning_rate": 3.736085761271119e-05, + "loss": 77.7323, + "step": 62210 + }, + { + "epoch": 0.251376673117402, + "grad_norm": 1115.193115234375, + "learning_rate": 3.735947098399873e-05, + "loss": 82.7968, + "step": 62220 + }, + { + "epoch": 0.25141707438276967, + "grad_norm": 863.3545532226562, + "learning_rate": 3.7358084016854445e-05, + "loss": 72.7392, + "step": 62230 + }, + { + "epoch": 0.2514574756481373, + "grad_norm": 1082.9144287109375, + "learning_rate": 3.735669671130538e-05, + "loss": 72.3309, + "step": 62240 + }, + { + "epoch": 0.25149787691350495, + "grad_norm": 1212.4620361328125, + "learning_rate": 3.735530906737857e-05, + "loss": 87.4921, + "step": 62250 + }, + { + "epoch": 0.2515382781788726, + "grad_norm": 869.3161010742188, + "learning_rate": 3.735392108510108e-05, + "loss": 94.5516, + "step": 62260 + }, + { + "epoch": 0.25157867944424017, + "grad_norm": 582.7783203125, + "learning_rate": 3.735253276449997e-05, + "loss": 133.012, + "step": 62270 + }, + { + "epoch": 0.2516190807096078, + "grad_norm": 754.9213256835938, + "learning_rate": 3.735114410560229e-05, + "loss": 84.7234, + "step": 62280 + }, + { + "epoch": 0.25165948197497545, + "grad_norm": 959.8594360351562, + "learning_rate": 3.734975510843512e-05, + "loss": 100.0778, + "step": 62290 + }, + { + "epoch": 0.2516998832403431, + "grad_norm": 429.73895263671875, + "learning_rate": 3.734836577302555e-05, + "loss": 84.2397, + "step": 62300 + }, + { + "epoch": 0.25174028450571073, + "grad_norm": 590.1376342773438, + "learning_rate": 3.734697609940066e-05, + "loss": 139.0243, + "step": 62310 + }, + { + "epoch": 0.25178068577107837, + "grad_norm": 410.11224365234375, + "learning_rate": 3.7345586087587535e-05, + "loss": 79.0262, + "step": 62320 + }, + { + "epoch": 0.25182108703644596, + "grad_norm": 1352.2822265625, + "learning_rate": 3.734419573761328e-05, + "loss": 96.0687, + "step": 62330 + }, + { + "epoch": 0.2518614883018136, + "grad_norm": 674.7533569335938, + "learning_rate": 3.7342805049504994e-05, + "loss": 139.3266, + "step": 62340 + }, + { + "epoch": 0.25190188956718124, + "grad_norm": 574.5116577148438, + "learning_rate": 3.73414140232898e-05, + "loss": 76.1198, + "step": 62350 + }, + { + "epoch": 0.2519422908325489, + "grad_norm": 343.53509521484375, + "learning_rate": 3.7340022658994816e-05, + "loss": 98.5033, + "step": 62360 + }, + { + "epoch": 0.2519826920979165, + "grad_norm": 825.7366943359375, + "learning_rate": 3.733863095664716e-05, + "loss": 73.7992, + "step": 62370 + }, + { + "epoch": 0.25202309336328416, + "grad_norm": 585.5107421875, + "learning_rate": 3.733723891627396e-05, + "loss": 75.7485, + "step": 62380 + }, + { + "epoch": 0.2520634946286518, + "grad_norm": 1346.1016845703125, + "learning_rate": 3.733584653790237e-05, + "loss": 137.5393, + "step": 62390 + }, + { + "epoch": 0.2521038958940194, + "grad_norm": 814.6492309570312, + "learning_rate": 3.7334453821559534e-05, + "loss": 71.6008, + "step": 62400 + }, + { + "epoch": 0.252144297159387, + "grad_norm": 864.88720703125, + "learning_rate": 3.733306076727258e-05, + "loss": 100.8701, + "step": 62410 + }, + { + "epoch": 0.25218469842475466, + "grad_norm": 1060.40966796875, + "learning_rate": 3.733166737506869e-05, + "loss": 184.5341, + "step": 62420 + }, + { + "epoch": 0.2522250996901223, + "grad_norm": 422.5169372558594, + "learning_rate": 3.733027364497502e-05, + "loss": 120.4113, + "step": 62430 + }, + { + "epoch": 0.25226550095548994, + "grad_norm": 918.08740234375, + "learning_rate": 3.732887957701874e-05, + "loss": 93.6972, + "step": 62440 + }, + { + "epoch": 0.2523059022208576, + "grad_norm": 807.5408325195312, + "learning_rate": 3.732748517122704e-05, + "loss": 75.2036, + "step": 62450 + }, + { + "epoch": 0.25234630348622517, + "grad_norm": 564.2117309570312, + "learning_rate": 3.732609042762709e-05, + "loss": 77.946, + "step": 62460 + }, + { + "epoch": 0.2523867047515928, + "grad_norm": 852.6795043945312, + "learning_rate": 3.732469534624609e-05, + "loss": 129.4016, + "step": 62470 + }, + { + "epoch": 0.25242710601696045, + "grad_norm": 1167.436767578125, + "learning_rate": 3.732329992711123e-05, + "loss": 117.3296, + "step": 62480 + }, + { + "epoch": 0.2524675072823281, + "grad_norm": 977.0709838867188, + "learning_rate": 3.732190417024972e-05, + "loss": 104.5861, + "step": 62490 + }, + { + "epoch": 0.2525079085476957, + "grad_norm": 665.907958984375, + "learning_rate": 3.732050807568878e-05, + "loss": 106.6902, + "step": 62500 + }, + { + "epoch": 0.25254830981306337, + "grad_norm": 681.3965454101562, + "learning_rate": 3.731911164345561e-05, + "loss": 75.3472, + "step": 62510 + }, + { + "epoch": 0.25258871107843095, + "grad_norm": 445.883544921875, + "learning_rate": 3.731771487357744e-05, + "loss": 83.755, + "step": 62520 + }, + { + "epoch": 0.2526291123437986, + "grad_norm": 792.4898071289062, + "learning_rate": 3.731631776608151e-05, + "loss": 101.2789, + "step": 62530 + }, + { + "epoch": 0.25266951360916623, + "grad_norm": 399.0079650878906, + "learning_rate": 3.731492032099504e-05, + "loss": 133.7845, + "step": 62540 + }, + { + "epoch": 0.25270991487453387, + "grad_norm": 1023.5635375976562, + "learning_rate": 3.731352253834529e-05, + "loss": 118.8593, + "step": 62550 + }, + { + "epoch": 0.2527503161399015, + "grad_norm": 938.2078857421875, + "learning_rate": 3.73121244181595e-05, + "loss": 174.2431, + "step": 62560 + }, + { + "epoch": 0.25279071740526915, + "grad_norm": 2179.6533203125, + "learning_rate": 3.731072596046493e-05, + "loss": 108.8702, + "step": 62570 + }, + { + "epoch": 0.2528311186706368, + "grad_norm": 358.10107421875, + "learning_rate": 3.7309327165288857e-05, + "loss": 114.0688, + "step": 62580 + }, + { + "epoch": 0.2528715199360044, + "grad_norm": 767.3385009765625, + "learning_rate": 3.730792803265853e-05, + "loss": 124.743, + "step": 62590 + }, + { + "epoch": 0.252911921201372, + "grad_norm": 2259.091064453125, + "learning_rate": 3.7306528562601245e-05, + "loss": 88.174, + "step": 62600 + }, + { + "epoch": 0.25295232246673965, + "grad_norm": 1002.8915405273438, + "learning_rate": 3.7305128755144265e-05, + "loss": 146.4647, + "step": 62610 + }, + { + "epoch": 0.2529927237321073, + "grad_norm": 887.9339599609375, + "learning_rate": 3.73037286103149e-05, + "loss": 129.0282, + "step": 62620 + }, + { + "epoch": 0.25303312499747493, + "grad_norm": 450.8857116699219, + "learning_rate": 3.730232812814043e-05, + "loss": 85.7734, + "step": 62630 + }, + { + "epoch": 0.2530735262628426, + "grad_norm": 728.8134765625, + "learning_rate": 3.730092730864816e-05, + "loss": 96.7616, + "step": 62640 + }, + { + "epoch": 0.25311392752821016, + "grad_norm": 1781.1455078125, + "learning_rate": 3.729952615186542e-05, + "loss": 101.8685, + "step": 62650 + }, + { + "epoch": 0.2531543287935778, + "grad_norm": 475.5879821777344, + "learning_rate": 3.72981246578195e-05, + "loss": 155.1444, + "step": 62660 + }, + { + "epoch": 0.25319473005894544, + "grad_norm": 1132.07861328125, + "learning_rate": 3.729672282653774e-05, + "loss": 101.146, + "step": 62670 + }, + { + "epoch": 0.2532351313243131, + "grad_norm": 1326.7109375, + "learning_rate": 3.729532065804746e-05, + "loss": 135.9761, + "step": 62680 + }, + { + "epoch": 0.2532755325896807, + "grad_norm": 598.1143188476562, + "learning_rate": 3.7293918152376004e-05, + "loss": 86.6694, + "step": 62690 + }, + { + "epoch": 0.25331593385504836, + "grad_norm": 936.3707275390625, + "learning_rate": 3.72925153095507e-05, + "loss": 95.7403, + "step": 62700 + }, + { + "epoch": 0.253356335120416, + "grad_norm": 824.6199951171875, + "learning_rate": 3.729111212959891e-05, + "loss": 67.4074, + "step": 62710 + }, + { + "epoch": 0.2533967363857836, + "grad_norm": 944.58203125, + "learning_rate": 3.7289708612547995e-05, + "loss": 74.48, + "step": 62720 + }, + { + "epoch": 0.2534371376511512, + "grad_norm": 800.2420654296875, + "learning_rate": 3.72883047584253e-05, + "loss": 94.5633, + "step": 62730 + }, + { + "epoch": 0.25347753891651886, + "grad_norm": 724.8945922851562, + "learning_rate": 3.728690056725821e-05, + "loss": 128.9444, + "step": 62740 + }, + { + "epoch": 0.2535179401818865, + "grad_norm": 1090.5589599609375, + "learning_rate": 3.72854960390741e-05, + "loss": 114.4577, + "step": 62750 + }, + { + "epoch": 0.25355834144725414, + "grad_norm": 466.8430480957031, + "learning_rate": 3.728409117390033e-05, + "loss": 88.3461, + "step": 62760 + }, + { + "epoch": 0.2535987427126218, + "grad_norm": 3536.67041015625, + "learning_rate": 3.7282685971764316e-05, + "loss": 241.532, + "step": 62770 + }, + { + "epoch": 0.25363914397798937, + "grad_norm": 659.9575805664062, + "learning_rate": 3.7281280432693436e-05, + "loss": 80.2921, + "step": 62780 + }, + { + "epoch": 0.253679545243357, + "grad_norm": 265.70184326171875, + "learning_rate": 3.72798745567151e-05, + "loss": 81.6745, + "step": 62790 + }, + { + "epoch": 0.25371994650872465, + "grad_norm": 1366.11181640625, + "learning_rate": 3.727846834385671e-05, + "loss": 134.0089, + "step": 62800 + }, + { + "epoch": 0.2537603477740923, + "grad_norm": 886.4556274414062, + "learning_rate": 3.727706179414568e-05, + "loss": 125.2466, + "step": 62810 + }, + { + "epoch": 0.2538007490394599, + "grad_norm": 742.3310546875, + "learning_rate": 3.7275654907609444e-05, + "loss": 70.222, + "step": 62820 + }, + { + "epoch": 0.25384115030482757, + "grad_norm": 378.6891174316406, + "learning_rate": 3.727424768427542e-05, + "loss": 100.6184, + "step": 62830 + }, + { + "epoch": 0.25388155157019515, + "grad_norm": 738.6260986328125, + "learning_rate": 3.727284012417104e-05, + "loss": 88.8644, + "step": 62840 + }, + { + "epoch": 0.2539219528355628, + "grad_norm": 691.2196655273438, + "learning_rate": 3.727143222732375e-05, + "loss": 90.8922, + "step": 62850 + }, + { + "epoch": 0.25396235410093043, + "grad_norm": 1172.6610107421875, + "learning_rate": 3.7270023993761e-05, + "loss": 142.3584, + "step": 62860 + }, + { + "epoch": 0.25400275536629807, + "grad_norm": 1382.592529296875, + "learning_rate": 3.726861542351024e-05, + "loss": 76.1431, + "step": 62870 + }, + { + "epoch": 0.2540431566316657, + "grad_norm": 732.424560546875, + "learning_rate": 3.726720651659893e-05, + "loss": 127.9882, + "step": 62880 + }, + { + "epoch": 0.25408355789703335, + "grad_norm": 599.9231567382812, + "learning_rate": 3.726579727305454e-05, + "loss": 81.8997, + "step": 62890 + }, + { + "epoch": 0.254123959162401, + "grad_norm": 814.5032348632812, + "learning_rate": 3.726438769290454e-05, + "loss": 104.1378, + "step": 62900 + }, + { + "epoch": 0.2541643604277686, + "grad_norm": 508.2313537597656, + "learning_rate": 3.726297777617642e-05, + "loss": 123.1504, + "step": 62910 + }, + { + "epoch": 0.2542047616931362, + "grad_norm": 727.0783081054688, + "learning_rate": 3.7261567522897656e-05, + "loss": 132.8472, + "step": 62920 + }, + { + "epoch": 0.25424516295850386, + "grad_norm": 671.1376342773438, + "learning_rate": 3.726015693309575e-05, + "loss": 82.1267, + "step": 62930 + }, + { + "epoch": 0.2542855642238715, + "grad_norm": 171.28952026367188, + "learning_rate": 3.7258746006798194e-05, + "loss": 108.437, + "step": 62940 + }, + { + "epoch": 0.25432596548923914, + "grad_norm": 672.1793212890625, + "learning_rate": 3.72573347440325e-05, + "loss": 119.4874, + "step": 62950 + }, + { + "epoch": 0.2543663667546068, + "grad_norm": 720.2647705078125, + "learning_rate": 3.725592314482619e-05, + "loss": 85.0747, + "step": 62960 + }, + { + "epoch": 0.25440676801997436, + "grad_norm": 857.1250610351562, + "learning_rate": 3.725451120920676e-05, + "loss": 100.4638, + "step": 62970 + }, + { + "epoch": 0.254447169285342, + "grad_norm": 710.0978393554688, + "learning_rate": 3.725309893720176e-05, + "loss": 75.6941, + "step": 62980 + }, + { + "epoch": 0.25448757055070964, + "grad_norm": 178.305908203125, + "learning_rate": 3.725168632883871e-05, + "loss": 88.5473, + "step": 62990 + }, + { + "epoch": 0.2545279718160773, + "grad_norm": 1319.0653076171875, + "learning_rate": 3.725027338414515e-05, + "loss": 140.3865, + "step": 63000 + }, + { + "epoch": 0.2545683730814449, + "grad_norm": 860.0739135742188, + "learning_rate": 3.7248860103148636e-05, + "loss": 104.3906, + "step": 63010 + }, + { + "epoch": 0.25460877434681256, + "grad_norm": 489.9392395019531, + "learning_rate": 3.724744648587671e-05, + "loss": 65.3853, + "step": 63020 + }, + { + "epoch": 0.2546491756121802, + "grad_norm": 830.6475830078125, + "learning_rate": 3.724603253235694e-05, + "loss": 74.4073, + "step": 63030 + }, + { + "epoch": 0.2546895768775478, + "grad_norm": 755.3203735351562, + "learning_rate": 3.724461824261688e-05, + "loss": 111.5286, + "step": 63040 + }, + { + "epoch": 0.2547299781429154, + "grad_norm": 832.8167724609375, + "learning_rate": 3.724320361668412e-05, + "loss": 82.5079, + "step": 63050 + }, + { + "epoch": 0.25477037940828307, + "grad_norm": 624.6299438476562, + "learning_rate": 3.7241788654586215e-05, + "loss": 92.5379, + "step": 63060 + }, + { + "epoch": 0.2548107806736507, + "grad_norm": 599.07958984375, + "learning_rate": 3.724037335635076e-05, + "loss": 77.1182, + "step": 63070 + }, + { + "epoch": 0.25485118193901835, + "grad_norm": 580.4988403320312, + "learning_rate": 3.7238957722005364e-05, + "loss": 105.3672, + "step": 63080 + }, + { + "epoch": 0.254891583204386, + "grad_norm": 641.9137573242188, + "learning_rate": 3.723754175157761e-05, + "loss": 61.6163, + "step": 63090 + }, + { + "epoch": 0.25493198446975357, + "grad_norm": 1184.375732421875, + "learning_rate": 3.72361254450951e-05, + "loss": 84.937, + "step": 63100 + }, + { + "epoch": 0.2549723857351212, + "grad_norm": 1070.485595703125, + "learning_rate": 3.7234708802585455e-05, + "loss": 86.6163, + "step": 63110 + }, + { + "epoch": 0.25501278700048885, + "grad_norm": 516.0731201171875, + "learning_rate": 3.723329182407629e-05, + "loss": 117.5316, + "step": 63120 + }, + { + "epoch": 0.2550531882658565, + "grad_norm": 510.67059326171875, + "learning_rate": 3.723187450959523e-05, + "loss": 83.5695, + "step": 63130 + }, + { + "epoch": 0.25509358953122413, + "grad_norm": 983.8540649414062, + "learning_rate": 3.7230456859169894e-05, + "loss": 117.9967, + "step": 63140 + }, + { + "epoch": 0.25513399079659177, + "grad_norm": 759.6858520507812, + "learning_rate": 3.7229038872827936e-05, + "loss": 81.0221, + "step": 63150 + }, + { + "epoch": 0.25517439206195935, + "grad_norm": 656.4317626953125, + "learning_rate": 3.7227620550597e-05, + "loss": 90.8298, + "step": 63160 + }, + { + "epoch": 0.255214793327327, + "grad_norm": 343.1498107910156, + "learning_rate": 3.722620189250473e-05, + "loss": 115.8797, + "step": 63170 + }, + { + "epoch": 0.25525519459269463, + "grad_norm": 677.8213500976562, + "learning_rate": 3.722478289857878e-05, + "loss": 63.3492, + "step": 63180 + }, + { + "epoch": 0.2552955958580623, + "grad_norm": 1002.9613647460938, + "learning_rate": 3.722336356884682e-05, + "loss": 149.5272, + "step": 63190 + }, + { + "epoch": 0.2553359971234299, + "grad_norm": 740.4276733398438, + "learning_rate": 3.722194390333653e-05, + "loss": 100.7448, + "step": 63200 + }, + { + "epoch": 0.25537639838879755, + "grad_norm": 385.5496520996094, + "learning_rate": 3.722052390207557e-05, + "loss": 86.2595, + "step": 63210 + }, + { + "epoch": 0.2554167996541652, + "grad_norm": 581.33154296875, + "learning_rate": 3.721910356509163e-05, + "loss": 123.1271, + "step": 63220 + }, + { + "epoch": 0.2554572009195328, + "grad_norm": 548.7271728515625, + "learning_rate": 3.7217682892412404e-05, + "loss": 88.1982, + "step": 63230 + }, + { + "epoch": 0.2554976021849004, + "grad_norm": 543.096435546875, + "learning_rate": 3.7216261884065585e-05, + "loss": 91.1932, + "step": 63240 + }, + { + "epoch": 0.25553800345026806, + "grad_norm": 988.0489501953125, + "learning_rate": 3.721484054007888e-05, + "loss": 95.6022, + "step": 63250 + }, + { + "epoch": 0.2555784047156357, + "grad_norm": 907.4325561523438, + "learning_rate": 3.721341886047999e-05, + "loss": 97.423, + "step": 63260 + }, + { + "epoch": 0.25561880598100334, + "grad_norm": 1319.098388671875, + "learning_rate": 3.7211996845296636e-05, + "loss": 105.4733, + "step": 63270 + }, + { + "epoch": 0.255659207246371, + "grad_norm": 940.328125, + "learning_rate": 3.7210574494556546e-05, + "loss": 138.6737, + "step": 63280 + }, + { + "epoch": 0.25569960851173856, + "grad_norm": 906.1506958007812, + "learning_rate": 3.7209151808287447e-05, + "loss": 112.9401, + "step": 63290 + }, + { + "epoch": 0.2557400097771062, + "grad_norm": 519.0744018554688, + "learning_rate": 3.720772878651707e-05, + "loss": 126.8765, + "step": 63300 + }, + { + "epoch": 0.25578041104247384, + "grad_norm": 1880.4742431640625, + "learning_rate": 3.7206305429273164e-05, + "loss": 104.762, + "step": 63310 + }, + { + "epoch": 0.2558208123078415, + "grad_norm": 944.4894409179688, + "learning_rate": 3.720488173658347e-05, + "loss": 127.2272, + "step": 63320 + }, + { + "epoch": 0.2558612135732091, + "grad_norm": 849.9081420898438, + "learning_rate": 3.720345770847575e-05, + "loss": 82.4509, + "step": 63330 + }, + { + "epoch": 0.25590161483857676, + "grad_norm": 1682.496826171875, + "learning_rate": 3.7202033344977774e-05, + "loss": 92.1781, + "step": 63340 + }, + { + "epoch": 0.2559420161039444, + "grad_norm": 947.993408203125, + "learning_rate": 3.72006086461173e-05, + "loss": 58.052, + "step": 63350 + }, + { + "epoch": 0.255982417369312, + "grad_norm": 520.7560424804688, + "learning_rate": 3.7199183611922095e-05, + "loss": 125.2274, + "step": 63360 + }, + { + "epoch": 0.2560228186346796, + "grad_norm": 1292.26513671875, + "learning_rate": 3.719775824241996e-05, + "loss": 86.1128, + "step": 63370 + }, + { + "epoch": 0.25606321990004727, + "grad_norm": 844.4318237304688, + "learning_rate": 3.719633253763867e-05, + "loss": 130.0777, + "step": 63380 + }, + { + "epoch": 0.2561036211654149, + "grad_norm": 631.265869140625, + "learning_rate": 3.719490649760603e-05, + "loss": 107.8685, + "step": 63390 + }, + { + "epoch": 0.25614402243078255, + "grad_norm": 1551.359375, + "learning_rate": 3.7193480122349824e-05, + "loss": 136.067, + "step": 63400 + }, + { + "epoch": 0.2561844236961502, + "grad_norm": 935.7584838867188, + "learning_rate": 3.7192053411897876e-05, + "loss": 97.1138, + "step": 63410 + }, + { + "epoch": 0.25622482496151777, + "grad_norm": 1152.337158203125, + "learning_rate": 3.7190626366278e-05, + "loss": 155.6344, + "step": 63420 + }, + { + "epoch": 0.2562652262268854, + "grad_norm": 1192.861328125, + "learning_rate": 3.718919898551801e-05, + "loss": 92.823, + "step": 63430 + }, + { + "epoch": 0.25630562749225305, + "grad_norm": 661.6820678710938, + "learning_rate": 3.718777126964573e-05, + "loss": 101.7833, + "step": 63440 + }, + { + "epoch": 0.2563460287576207, + "grad_norm": 528.6534423828125, + "learning_rate": 3.718634321868901e-05, + "loss": 108.0845, + "step": 63450 + }, + { + "epoch": 0.25638643002298833, + "grad_norm": 1591.26611328125, + "learning_rate": 3.718491483267568e-05, + "loss": 150.5883, + "step": 63460 + }, + { + "epoch": 0.25642683128835597, + "grad_norm": 382.126220703125, + "learning_rate": 3.7183486111633585e-05, + "loss": 99.6214, + "step": 63470 + }, + { + "epoch": 0.25646723255372356, + "grad_norm": 753.4785766601562, + "learning_rate": 3.7182057055590576e-05, + "loss": 129.9543, + "step": 63480 + }, + { + "epoch": 0.2565076338190912, + "grad_norm": 825.617431640625, + "learning_rate": 3.7180627664574524e-05, + "loss": 135.1169, + "step": 63490 + }, + { + "epoch": 0.25654803508445884, + "grad_norm": 660.3353881835938, + "learning_rate": 3.717919793861329e-05, + "loss": 93.4321, + "step": 63500 + }, + { + "epoch": 0.2565884363498265, + "grad_norm": 751.5831909179688, + "learning_rate": 3.717776787773475e-05, + "loss": 99.8669, + "step": 63510 + }, + { + "epoch": 0.2566288376151941, + "grad_norm": 557.0651245117188, + "learning_rate": 3.717633748196678e-05, + "loss": 94.2512, + "step": 63520 + }, + { + "epoch": 0.25666923888056176, + "grad_norm": 489.67242431640625, + "learning_rate": 3.717490675133727e-05, + "loss": 106.1345, + "step": 63530 + }, + { + "epoch": 0.2567096401459294, + "grad_norm": 693.766357421875, + "learning_rate": 3.7173475685874105e-05, + "loss": 80.031, + "step": 63540 + }, + { + "epoch": 0.256750041411297, + "grad_norm": 574.4652099609375, + "learning_rate": 3.7172044285605195e-05, + "loss": 92.8209, + "step": 63550 + }, + { + "epoch": 0.2567904426766646, + "grad_norm": 762.2802124023438, + "learning_rate": 3.717061255055844e-05, + "loss": 100.6132, + "step": 63560 + }, + { + "epoch": 0.25683084394203226, + "grad_norm": 1370.24462890625, + "learning_rate": 3.7169180480761754e-05, + "loss": 109.6956, + "step": 63570 + }, + { + "epoch": 0.2568712452073999, + "grad_norm": 787.908447265625, + "learning_rate": 3.7167748076243046e-05, + "loss": 88.1939, + "step": 63580 + }, + { + "epoch": 0.25691164647276754, + "grad_norm": 678.8076171875, + "learning_rate": 3.716631533703026e-05, + "loss": 85.4772, + "step": 63590 + }, + { + "epoch": 0.2569520477381352, + "grad_norm": 1040.426513671875, + "learning_rate": 3.7164882263151315e-05, + "loss": 103.8459, + "step": 63600 + }, + { + "epoch": 0.25699244900350277, + "grad_norm": 461.91253662109375, + "learning_rate": 3.716344885463416e-05, + "loss": 126.6732, + "step": 63610 + }, + { + "epoch": 0.2570328502688704, + "grad_norm": 661.9632568359375, + "learning_rate": 3.716201511150673e-05, + "loss": 80.0156, + "step": 63620 + }, + { + "epoch": 0.25707325153423805, + "grad_norm": 503.06158447265625, + "learning_rate": 3.716058103379697e-05, + "loss": 95.1054, + "step": 63630 + }, + { + "epoch": 0.2571136527996057, + "grad_norm": 471.42230224609375, + "learning_rate": 3.715914662153286e-05, + "loss": 104.5018, + "step": 63640 + }, + { + "epoch": 0.2571540540649733, + "grad_norm": 1518.90478515625, + "learning_rate": 3.715771187474235e-05, + "loss": 84.9718, + "step": 63650 + }, + { + "epoch": 0.25719445533034097, + "grad_norm": 697.58056640625, + "learning_rate": 3.7156276793453414e-05, + "loss": 74.3619, + "step": 63660 + }, + { + "epoch": 0.2572348565957086, + "grad_norm": 420.0550231933594, + "learning_rate": 3.715484137769402e-05, + "loss": 86.1362, + "step": 63670 + }, + { + "epoch": 0.2572752578610762, + "grad_norm": 465.1202392578125, + "learning_rate": 3.7153405627492175e-05, + "loss": 92.4991, + "step": 63680 + }, + { + "epoch": 0.25731565912644383, + "grad_norm": 804.6327514648438, + "learning_rate": 3.715196954287585e-05, + "loss": 67.9978, + "step": 63690 + }, + { + "epoch": 0.25735606039181147, + "grad_norm": 947.8130493164062, + "learning_rate": 3.715053312387305e-05, + "loss": 102.4816, + "step": 63700 + }, + { + "epoch": 0.2573964616571791, + "grad_norm": 1129.798828125, + "learning_rate": 3.7149096370511776e-05, + "loss": 112.9908, + "step": 63710 + }, + { + "epoch": 0.25743686292254675, + "grad_norm": 1644.0733642578125, + "learning_rate": 3.714765928282004e-05, + "loss": 179.6517, + "step": 63720 + }, + { + "epoch": 0.2574772641879144, + "grad_norm": 931.06591796875, + "learning_rate": 3.714622186082585e-05, + "loss": 110.7087, + "step": 63730 + }, + { + "epoch": 0.257517665453282, + "grad_norm": 880.3685302734375, + "learning_rate": 3.7144784104557246e-05, + "loss": 97.5638, + "step": 63740 + }, + { + "epoch": 0.2575580667186496, + "grad_norm": 785.1746826171875, + "learning_rate": 3.714334601404225e-05, + "loss": 75.5446, + "step": 63750 + }, + { + "epoch": 0.25759846798401725, + "grad_norm": 670.9224243164062, + "learning_rate": 3.714190758930889e-05, + "loss": 93.7492, + "step": 63760 + }, + { + "epoch": 0.2576388692493849, + "grad_norm": 753.9791259765625, + "learning_rate": 3.7140468830385227e-05, + "loss": 112.21, + "step": 63770 + }, + { + "epoch": 0.25767927051475253, + "grad_norm": 1231.0936279296875, + "learning_rate": 3.713902973729929e-05, + "loss": 122.6011, + "step": 63780 + }, + { + "epoch": 0.2577196717801202, + "grad_norm": 408.81927490234375, + "learning_rate": 3.713759031007915e-05, + "loss": 147.2747, + "step": 63790 + }, + { + "epoch": 0.25776007304548776, + "grad_norm": 1079.59326171875, + "learning_rate": 3.713615054875286e-05, + "loss": 120.4963, + "step": 63800 + }, + { + "epoch": 0.2578004743108554, + "grad_norm": 1345.376708984375, + "learning_rate": 3.7134710453348496e-05, + "loss": 112.9191, + "step": 63810 + }, + { + "epoch": 0.25784087557622304, + "grad_norm": 865.9116821289062, + "learning_rate": 3.713327002389413e-05, + "loss": 77.4771, + "step": 63820 + }, + { + "epoch": 0.2578812768415907, + "grad_norm": 1280.2784423828125, + "learning_rate": 3.713182926041785e-05, + "loss": 116.0818, + "step": 63830 + }, + { + "epoch": 0.2579216781069583, + "grad_norm": 610.8704223632812, + "learning_rate": 3.7130388162947726e-05, + "loss": 80.356, + "step": 63840 + }, + { + "epoch": 0.25796207937232596, + "grad_norm": 680.147705078125, + "learning_rate": 3.712894673151187e-05, + "loss": 111.3564, + "step": 63850 + }, + { + "epoch": 0.2580024806376936, + "grad_norm": 815.5071411132812, + "learning_rate": 3.712750496613838e-05, + "loss": 75.9202, + "step": 63860 + }, + { + "epoch": 0.2580428819030612, + "grad_norm": 850.8605346679688, + "learning_rate": 3.712606286685536e-05, + "loss": 130.1348, + "step": 63870 + }, + { + "epoch": 0.2580832831684288, + "grad_norm": 1765.9049072265625, + "learning_rate": 3.712462043369093e-05, + "loss": 89.5328, + "step": 63880 + }, + { + "epoch": 0.25812368443379646, + "grad_norm": 1370.7615966796875, + "learning_rate": 3.712317766667321e-05, + "loss": 85.9847, + "step": 63890 + }, + { + "epoch": 0.2581640856991641, + "grad_norm": 594.0784912109375, + "learning_rate": 3.712173456583033e-05, + "loss": 59.166, + "step": 63900 + }, + { + "epoch": 0.25820448696453174, + "grad_norm": 761.9963989257812, + "learning_rate": 3.712029113119041e-05, + "loss": 58.8487, + "step": 63910 + }, + { + "epoch": 0.2582448882298994, + "grad_norm": 809.0079345703125, + "learning_rate": 3.7118847362781605e-05, + "loss": 150.5685, + "step": 63920 + }, + { + "epoch": 0.25828528949526697, + "grad_norm": 649.3175048828125, + "learning_rate": 3.711740326063206e-05, + "loss": 91.2149, + "step": 63930 + }, + { + "epoch": 0.2583256907606346, + "grad_norm": 352.7129821777344, + "learning_rate": 3.711595882476992e-05, + "loss": 75.4109, + "step": 63940 + }, + { + "epoch": 0.25836609202600225, + "grad_norm": 440.92999267578125, + "learning_rate": 3.711451405522335e-05, + "loss": 67.2835, + "step": 63950 + }, + { + "epoch": 0.2584064932913699, + "grad_norm": 819.1078491210938, + "learning_rate": 3.711306895202052e-05, + "loss": 120.6553, + "step": 63960 + }, + { + "epoch": 0.2584468945567375, + "grad_norm": 782.6903686523438, + "learning_rate": 3.7111623515189605e-05, + "loss": 66.0393, + "step": 63970 + }, + { + "epoch": 0.25848729582210517, + "grad_norm": 1118.6910400390625, + "learning_rate": 3.7110177744758774e-05, + "loss": 102.1884, + "step": 63980 + }, + { + "epoch": 0.25852769708747275, + "grad_norm": 697.8674926757812, + "learning_rate": 3.7108731640756224e-05, + "loss": 78.6153, + "step": 63990 + }, + { + "epoch": 0.2585680983528404, + "grad_norm": 1088.2008056640625, + "learning_rate": 3.710728520321014e-05, + "loss": 80.858, + "step": 64000 + }, + { + "epoch": 0.25860849961820803, + "grad_norm": 717.3138427734375, + "learning_rate": 3.710583843214872e-05, + "loss": 93.5504, + "step": 64010 + }, + { + "epoch": 0.25864890088357567, + "grad_norm": 873.1448974609375, + "learning_rate": 3.7104391327600176e-05, + "loss": 99.0795, + "step": 64020 + }, + { + "epoch": 0.2586893021489433, + "grad_norm": 1556.3338623046875, + "learning_rate": 3.710294388959272e-05, + "loss": 84.2014, + "step": 64030 + }, + { + "epoch": 0.25872970341431095, + "grad_norm": 610.7234497070312, + "learning_rate": 3.7101496118154557e-05, + "loss": 83.9052, + "step": 64040 + }, + { + "epoch": 0.2587701046796786, + "grad_norm": 1536.5025634765625, + "learning_rate": 3.7100048013313933e-05, + "loss": 89.448, + "step": 64050 + }, + { + "epoch": 0.2588105059450462, + "grad_norm": 1107.8944091796875, + "learning_rate": 3.709859957509907e-05, + "loss": 110.5031, + "step": 64060 + }, + { + "epoch": 0.2588509072104138, + "grad_norm": 792.7132568359375, + "learning_rate": 3.70971508035382e-05, + "loss": 126.7712, + "step": 64070 + }, + { + "epoch": 0.25889130847578146, + "grad_norm": 412.228759765625, + "learning_rate": 3.7095701698659576e-05, + "loss": 87.4664, + "step": 64080 + }, + { + "epoch": 0.2589317097411491, + "grad_norm": 1258.835205078125, + "learning_rate": 3.709425226049145e-05, + "loss": 90.9776, + "step": 64090 + }, + { + "epoch": 0.25897211100651674, + "grad_norm": 1426.8939208984375, + "learning_rate": 3.709280248906206e-05, + "loss": 130.9939, + "step": 64100 + }, + { + "epoch": 0.2590125122718844, + "grad_norm": 900.2066650390625, + "learning_rate": 3.70913523843997e-05, + "loss": 133.217, + "step": 64110 + }, + { + "epoch": 0.25905291353725196, + "grad_norm": 553.3869018554688, + "learning_rate": 3.708990194653262e-05, + "loss": 114.05, + "step": 64120 + }, + { + "epoch": 0.2590933148026196, + "grad_norm": 616.3143920898438, + "learning_rate": 3.708845117548911e-05, + "loss": 123.2524, + "step": 64130 + }, + { + "epoch": 0.25913371606798724, + "grad_norm": 832.3405151367188, + "learning_rate": 3.708700007129744e-05, + "loss": 95.8738, + "step": 64140 + }, + { + "epoch": 0.2591741173333549, + "grad_norm": 512.5089111328125, + "learning_rate": 3.7085548633985906e-05, + "loss": 84.4528, + "step": 64150 + }, + { + "epoch": 0.2592145185987225, + "grad_norm": 2523.961669921875, + "learning_rate": 3.708409686358281e-05, + "loss": 85.1741, + "step": 64160 + }, + { + "epoch": 0.25925491986409016, + "grad_norm": 663.655517578125, + "learning_rate": 3.708264476011645e-05, + "loss": 88.9209, + "step": 64170 + }, + { + "epoch": 0.2592953211294578, + "grad_norm": 1033.0018310546875, + "learning_rate": 3.708119232361513e-05, + "loss": 110.9427, + "step": 64180 + }, + { + "epoch": 0.2593357223948254, + "grad_norm": 770.9794311523438, + "learning_rate": 3.707973955410717e-05, + "loss": 132.509, + "step": 64190 + }, + { + "epoch": 0.259376123660193, + "grad_norm": 1053.312255859375, + "learning_rate": 3.707828645162091e-05, + "loss": 76.0707, + "step": 64200 + }, + { + "epoch": 0.25941652492556067, + "grad_norm": 559.0098266601562, + "learning_rate": 3.7076833016184646e-05, + "loss": 87.5009, + "step": 64210 + }, + { + "epoch": 0.2594569261909283, + "grad_norm": 839.114013671875, + "learning_rate": 3.707537924782673e-05, + "loss": 67.3911, + "step": 64220 + }, + { + "epoch": 0.25949732745629595, + "grad_norm": 485.9180908203125, + "learning_rate": 3.7073925146575514e-05, + "loss": 137.0499, + "step": 64230 + }, + { + "epoch": 0.2595377287216636, + "grad_norm": 1929.6771240234375, + "learning_rate": 3.707247071245933e-05, + "loss": 93.1167, + "step": 64240 + }, + { + "epoch": 0.25957812998703117, + "grad_norm": 705.2219848632812, + "learning_rate": 3.707101594550655e-05, + "loss": 101.321, + "step": 64250 + }, + { + "epoch": 0.2596185312523988, + "grad_norm": 662.4102783203125, + "learning_rate": 3.706956084574552e-05, + "loss": 148.3191, + "step": 64260 + }, + { + "epoch": 0.25965893251776645, + "grad_norm": 685.0839233398438, + "learning_rate": 3.706810541320462e-05, + "loss": 164.4754, + "step": 64270 + }, + { + "epoch": 0.2596993337831341, + "grad_norm": 937.897216796875, + "learning_rate": 3.706664964791221e-05, + "loss": 114.6711, + "step": 64280 + }, + { + "epoch": 0.25973973504850173, + "grad_norm": 1208.8258056640625, + "learning_rate": 3.7065193549896676e-05, + "loss": 132.0447, + "step": 64290 + }, + { + "epoch": 0.25978013631386937, + "grad_norm": 407.6659851074219, + "learning_rate": 3.706373711918641e-05, + "loss": 65.5091, + "step": 64300 + }, + { + "epoch": 0.25982053757923695, + "grad_norm": 1298.5391845703125, + "learning_rate": 3.706228035580981e-05, + "loss": 85.253, + "step": 64310 + }, + { + "epoch": 0.2598609388446046, + "grad_norm": 557.4871215820312, + "learning_rate": 3.7060823259795256e-05, + "loss": 107.2541, + "step": 64320 + }, + { + "epoch": 0.25990134010997223, + "grad_norm": 1316.3580322265625, + "learning_rate": 3.705936583117118e-05, + "loss": 127.4741, + "step": 64330 + }, + { + "epoch": 0.2599417413753399, + "grad_norm": 500.9515075683594, + "learning_rate": 3.7057908069965984e-05, + "loss": 103.5375, + "step": 64340 + }, + { + "epoch": 0.2599821426407075, + "grad_norm": 918.0108032226562, + "learning_rate": 3.705644997620809e-05, + "loss": 86.0953, + "step": 64350 + }, + { + "epoch": 0.26002254390607515, + "grad_norm": 481.9149169921875, + "learning_rate": 3.705499154992592e-05, + "loss": 124.008, + "step": 64360 + }, + { + "epoch": 0.2600629451714428, + "grad_norm": 940.8428344726562, + "learning_rate": 3.705353279114791e-05, + "loss": 102.4339, + "step": 64370 + }, + { + "epoch": 0.2601033464368104, + "grad_norm": 755.39306640625, + "learning_rate": 3.7052073699902494e-05, + "loss": 94.2508, + "step": 64380 + }, + { + "epoch": 0.260143747702178, + "grad_norm": 626.9711303710938, + "learning_rate": 3.705061427621813e-05, + "loss": 78.7625, + "step": 64390 + }, + { + "epoch": 0.26018414896754566, + "grad_norm": 823.7897338867188, + "learning_rate": 3.7049154520123256e-05, + "loss": 97.4351, + "step": 64400 + }, + { + "epoch": 0.2602245502329133, + "grad_norm": 563.3991088867188, + "learning_rate": 3.7047694431646334e-05, + "loss": 92.8853, + "step": 64410 + }, + { + "epoch": 0.26026495149828094, + "grad_norm": 540.2824096679688, + "learning_rate": 3.704623401081584e-05, + "loss": 73.6057, + "step": 64420 + }, + { + "epoch": 0.2603053527636486, + "grad_norm": 419.0438232421875, + "learning_rate": 3.7044773257660234e-05, + "loss": 73.1855, + "step": 64430 + }, + { + "epoch": 0.26034575402901616, + "grad_norm": 690.1300659179688, + "learning_rate": 3.7043312172207996e-05, + "loss": 121.8483, + "step": 64440 + }, + { + "epoch": 0.2603861552943838, + "grad_norm": 1426.2569580078125, + "learning_rate": 3.7041850754487623e-05, + "loss": 76.0797, + "step": 64450 + }, + { + "epoch": 0.26042655655975144, + "grad_norm": 605.5452880859375, + "learning_rate": 3.704038900452758e-05, + "loss": 102.276, + "step": 64460 + }, + { + "epoch": 0.2604669578251191, + "grad_norm": 650.0904541015625, + "learning_rate": 3.7038926922356395e-05, + "loss": 98.2879, + "step": 64470 + }, + { + "epoch": 0.2605073590904867, + "grad_norm": 1057.3546142578125, + "learning_rate": 3.703746450800255e-05, + "loss": 72.4349, + "step": 64480 + }, + { + "epoch": 0.26054776035585436, + "grad_norm": 898.99609375, + "learning_rate": 3.703600176149457e-05, + "loss": 118.7012, + "step": 64490 + }, + { + "epoch": 0.260588161621222, + "grad_norm": 738.8807373046875, + "learning_rate": 3.703453868286096e-05, + "loss": 83.6598, + "step": 64500 + }, + { + "epoch": 0.2606285628865896, + "grad_norm": 483.17889404296875, + "learning_rate": 3.703307527213024e-05, + "loss": 114.7747, + "step": 64510 + }, + { + "epoch": 0.2606689641519572, + "grad_norm": 4903.748046875, + "learning_rate": 3.7031611529330956e-05, + "loss": 115.6444, + "step": 64520 + }, + { + "epoch": 0.26070936541732487, + "grad_norm": 925.2534790039062, + "learning_rate": 3.703014745449164e-05, + "loss": 122.4535, + "step": 64530 + }, + { + "epoch": 0.2607497666826925, + "grad_norm": 589.3216552734375, + "learning_rate": 3.702868304764083e-05, + "loss": 100.3989, + "step": 64540 + }, + { + "epoch": 0.26079016794806015, + "grad_norm": 1201.8673095703125, + "learning_rate": 3.702721830880707e-05, + "loss": 183.8782, + "step": 64550 + }, + { + "epoch": 0.2608305692134278, + "grad_norm": 1067.19580078125, + "learning_rate": 3.702575323801893e-05, + "loss": 132.1987, + "step": 64560 + }, + { + "epoch": 0.26087097047879537, + "grad_norm": 605.4977416992188, + "learning_rate": 3.702428783530497e-05, + "loss": 93.8968, + "step": 64570 + }, + { + "epoch": 0.260911371744163, + "grad_norm": 634.1218872070312, + "learning_rate": 3.7022822100693746e-05, + "loss": 120.2565, + "step": 64580 + }, + { + "epoch": 0.26095177300953065, + "grad_norm": 358.3839111328125, + "learning_rate": 3.702135603421385e-05, + "loss": 87.87, + "step": 64590 + }, + { + "epoch": 0.2609921742748983, + "grad_norm": 548.27587890625, + "learning_rate": 3.701988963589384e-05, + "loss": 131.644, + "step": 64600 + }, + { + "epoch": 0.26103257554026593, + "grad_norm": 458.6590270996094, + "learning_rate": 3.701842290576233e-05, + "loss": 110.5659, + "step": 64610 + }, + { + "epoch": 0.26107297680563357, + "grad_norm": 800.7355346679688, + "learning_rate": 3.70169558438479e-05, + "loss": 164.3004, + "step": 64620 + }, + { + "epoch": 0.26111337807100116, + "grad_norm": 693.1978149414062, + "learning_rate": 3.701548845017915e-05, + "loss": 101.6118, + "step": 64630 + }, + { + "epoch": 0.2611537793363688, + "grad_norm": 515.5180053710938, + "learning_rate": 3.7014020724784703e-05, + "loss": 117.515, + "step": 64640 + }, + { + "epoch": 0.26119418060173644, + "grad_norm": 763.5925903320312, + "learning_rate": 3.701255266769316e-05, + "loss": 96.4843, + "step": 64650 + }, + { + "epoch": 0.2612345818671041, + "grad_norm": 252.2643280029297, + "learning_rate": 3.701108427893314e-05, + "loss": 88.5075, + "step": 64660 + }, + { + "epoch": 0.2612749831324717, + "grad_norm": 749.4307861328125, + "learning_rate": 3.7009615558533275e-05, + "loss": 61.1947, + "step": 64670 + }, + { + "epoch": 0.26131538439783936, + "grad_norm": 743.8534545898438, + "learning_rate": 3.70081465065222e-05, + "loss": 110.2821, + "step": 64680 + }, + { + "epoch": 0.261355785663207, + "grad_norm": 324.21405029296875, + "learning_rate": 3.7006677122928546e-05, + "loss": 104.5184, + "step": 64690 + }, + { + "epoch": 0.2613961869285746, + "grad_norm": 622.2553100585938, + "learning_rate": 3.7005207407780975e-05, + "loss": 112.2461, + "step": 64700 + }, + { + "epoch": 0.2614365881939422, + "grad_norm": 793.0509033203125, + "learning_rate": 3.7003737361108124e-05, + "loss": 88.7361, + "step": 64710 + }, + { + "epoch": 0.26147698945930986, + "grad_norm": 797.2341918945312, + "learning_rate": 3.700226698293866e-05, + "loss": 78.7673, + "step": 64720 + }, + { + "epoch": 0.2615173907246775, + "grad_norm": 1127.797119140625, + "learning_rate": 3.7000796273301254e-05, + "loss": 93.0857, + "step": 64730 + }, + { + "epoch": 0.26155779199004514, + "grad_norm": 897.6571655273438, + "learning_rate": 3.6999325232224564e-05, + "loss": 77.6829, + "step": 64740 + }, + { + "epoch": 0.2615981932554128, + "grad_norm": 1549.79052734375, + "learning_rate": 3.699785385973728e-05, + "loss": 98.8927, + "step": 64750 + }, + { + "epoch": 0.26163859452078037, + "grad_norm": 1096.0546875, + "learning_rate": 3.6996382155868084e-05, + "loss": 74.6725, + "step": 64760 + }, + { + "epoch": 0.261678995786148, + "grad_norm": 1315.089111328125, + "learning_rate": 3.699491012064567e-05, + "loss": 105.7346, + "step": 64770 + }, + { + "epoch": 0.26171939705151565, + "grad_norm": 411.2245788574219, + "learning_rate": 3.6993437754098734e-05, + "loss": 100.3017, + "step": 64780 + }, + { + "epoch": 0.2617597983168833, + "grad_norm": 875.404541015625, + "learning_rate": 3.6991965056255976e-05, + "loss": 91.5437, + "step": 64790 + }, + { + "epoch": 0.2618001995822509, + "grad_norm": 432.2652893066406, + "learning_rate": 3.6990492027146114e-05, + "loss": 69.9126, + "step": 64800 + }, + { + "epoch": 0.26184060084761857, + "grad_norm": 1075.6805419921875, + "learning_rate": 3.698901866679786e-05, + "loss": 101.0705, + "step": 64810 + }, + { + "epoch": 0.2618810021129862, + "grad_norm": 1470.537109375, + "learning_rate": 3.698754497523994e-05, + "loss": 111.1882, + "step": 64820 + }, + { + "epoch": 0.2619214033783538, + "grad_norm": 609.381591796875, + "learning_rate": 3.6986070952501085e-05, + "loss": 105.03, + "step": 64830 + }, + { + "epoch": 0.26196180464372143, + "grad_norm": 608.0729370117188, + "learning_rate": 3.698459659861003e-05, + "loss": 119.1692, + "step": 64840 + }, + { + "epoch": 0.26200220590908907, + "grad_norm": 910.6409912109375, + "learning_rate": 3.698312191359553e-05, + "loss": 112.0729, + "step": 64850 + }, + { + "epoch": 0.2620426071744567, + "grad_norm": 678.8384399414062, + "learning_rate": 3.698164689748631e-05, + "loss": 76.3912, + "step": 64860 + }, + { + "epoch": 0.26208300843982435, + "grad_norm": 569.0090942382812, + "learning_rate": 3.6980171550311156e-05, + "loss": 72.4753, + "step": 64870 + }, + { + "epoch": 0.262123409705192, + "grad_norm": 746.2606201171875, + "learning_rate": 3.6978695872098806e-05, + "loss": 152.4471, + "step": 64880 + }, + { + "epoch": 0.2621638109705596, + "grad_norm": 936.8212890625, + "learning_rate": 3.697721986287804e-05, + "loss": 74.6426, + "step": 64890 + }, + { + "epoch": 0.2622042122359272, + "grad_norm": 1019.0665283203125, + "learning_rate": 3.697574352267764e-05, + "loss": 101.2239, + "step": 64900 + }, + { + "epoch": 0.26224461350129485, + "grad_norm": 537.8054809570312, + "learning_rate": 3.697426685152637e-05, + "loss": 80.0296, + "step": 64910 + }, + { + "epoch": 0.2622850147666625, + "grad_norm": 889.2251586914062, + "learning_rate": 3.697278984945304e-05, + "loss": 93.0229, + "step": 64920 + }, + { + "epoch": 0.26232541603203013, + "grad_norm": 633.8497314453125, + "learning_rate": 3.6971312516486426e-05, + "loss": 115.4767, + "step": 64930 + }, + { + "epoch": 0.2623658172973978, + "grad_norm": 890.2572631835938, + "learning_rate": 3.6969834852655334e-05, + "loss": 86.3858, + "step": 64940 + }, + { + "epoch": 0.26240621856276536, + "grad_norm": 2041.6094970703125, + "learning_rate": 3.696835685798858e-05, + "loss": 117.0331, + "step": 64950 + }, + { + "epoch": 0.262446619828133, + "grad_norm": 1232.2847900390625, + "learning_rate": 3.696687853251497e-05, + "loss": 189.0766, + "step": 64960 + }, + { + "epoch": 0.26248702109350064, + "grad_norm": 615.3238525390625, + "learning_rate": 3.696539987626334e-05, + "loss": 82.3887, + "step": 64970 + }, + { + "epoch": 0.2625274223588683, + "grad_norm": 780.9232788085938, + "learning_rate": 3.696392088926248e-05, + "loss": 116.4401, + "step": 64980 + }, + { + "epoch": 0.2625678236242359, + "grad_norm": 360.5764465332031, + "learning_rate": 3.696244157154128e-05, + "loss": 66.0333, + "step": 64990 + }, + { + "epoch": 0.26260822488960356, + "grad_norm": 1353.8798828125, + "learning_rate": 3.696096192312852e-05, + "loss": 133.2398, + "step": 65000 + }, + { + "epoch": 0.2626486261549712, + "grad_norm": 602.8245849609375, + "learning_rate": 3.695948194405309e-05, + "loss": 78.4112, + "step": 65010 + }, + { + "epoch": 0.2626890274203388, + "grad_norm": 406.8870849609375, + "learning_rate": 3.6958001634343825e-05, + "loss": 108.8085, + "step": 65020 + }, + { + "epoch": 0.2627294286857064, + "grad_norm": 588.2860107421875, + "learning_rate": 3.695652099402959e-05, + "loss": 57.6515, + "step": 65030 + }, + { + "epoch": 0.26276982995107406, + "grad_norm": 1437.43701171875, + "learning_rate": 3.695504002313924e-05, + "loss": 114.828, + "step": 65040 + }, + { + "epoch": 0.2628102312164417, + "grad_norm": 662.9895629882812, + "learning_rate": 3.6953558721701666e-05, + "loss": 85.8193, + "step": 65050 + }, + { + "epoch": 0.26285063248180934, + "grad_norm": 304.2913513183594, + "learning_rate": 3.6952077089745735e-05, + "loss": 122.9981, + "step": 65060 + }, + { + "epoch": 0.262891033747177, + "grad_norm": 958.9667358398438, + "learning_rate": 3.695059512730033e-05, + "loss": 97.6834, + "step": 65070 + }, + { + "epoch": 0.26293143501254457, + "grad_norm": 597.2545166015625, + "learning_rate": 3.694911283439435e-05, + "loss": 117.681, + "step": 65080 + }, + { + "epoch": 0.2629718362779122, + "grad_norm": 650.4228515625, + "learning_rate": 3.6947630211056684e-05, + "loss": 78.3831, + "step": 65090 + }, + { + "epoch": 0.26301223754327985, + "grad_norm": 734.40771484375, + "learning_rate": 3.6946147257316247e-05, + "loss": 76.1051, + "step": 65100 + }, + { + "epoch": 0.2630526388086475, + "grad_norm": 625.302001953125, + "learning_rate": 3.6944663973201945e-05, + "loss": 67.578, + "step": 65110 + }, + { + "epoch": 0.2630930400740151, + "grad_norm": 548.2142944335938, + "learning_rate": 3.694318035874269e-05, + "loss": 75.1477, + "step": 65120 + }, + { + "epoch": 0.26313344133938277, + "grad_norm": 744.4384765625, + "learning_rate": 3.694169641396741e-05, + "loss": 84.0746, + "step": 65130 + }, + { + "epoch": 0.2631738426047504, + "grad_norm": 515.2950439453125, + "learning_rate": 3.6940212138905043e-05, + "loss": 81.1483, + "step": 65140 + }, + { + "epoch": 0.263214243870118, + "grad_norm": 882.6480102539062, + "learning_rate": 3.6938727533584515e-05, + "loss": 73.116, + "step": 65150 + }, + { + "epoch": 0.26325464513548563, + "grad_norm": 387.6449890136719, + "learning_rate": 3.6937242598034776e-05, + "loss": 67.9663, + "step": 65160 + }, + { + "epoch": 0.26329504640085327, + "grad_norm": 1767.402099609375, + "learning_rate": 3.693575733228477e-05, + "loss": 109.6401, + "step": 65170 + }, + { + "epoch": 0.2633354476662209, + "grad_norm": 695.4577026367188, + "learning_rate": 3.6934271736363455e-05, + "loss": 65.6038, + "step": 65180 + }, + { + "epoch": 0.26337584893158855, + "grad_norm": 630.9485473632812, + "learning_rate": 3.69327858102998e-05, + "loss": 127.0452, + "step": 65190 + }, + { + "epoch": 0.2634162501969562, + "grad_norm": 1002.1976928710938, + "learning_rate": 3.6931299554122754e-05, + "loss": 84.8406, + "step": 65200 + }, + { + "epoch": 0.2634566514623238, + "grad_norm": 1292.970947265625, + "learning_rate": 3.692981296786132e-05, + "loss": 113.9335, + "step": 65210 + }, + { + "epoch": 0.2634970527276914, + "grad_norm": 687.6002807617188, + "learning_rate": 3.692832605154446e-05, + "loss": 87.5518, + "step": 65220 + }, + { + "epoch": 0.26353745399305906, + "grad_norm": 1506.4801025390625, + "learning_rate": 3.692683880520117e-05, + "loss": 112.9319, + "step": 65230 + }, + { + "epoch": 0.2635778552584267, + "grad_norm": 1268.7906494140625, + "learning_rate": 3.6925351228860445e-05, + "loss": 116.8934, + "step": 65240 + }, + { + "epoch": 0.26361825652379434, + "grad_norm": 623.935791015625, + "learning_rate": 3.6923863322551284e-05, + "loss": 87.7411, + "step": 65250 + }, + { + "epoch": 0.263658657789162, + "grad_norm": 479.07635498046875, + "learning_rate": 3.692237508630269e-05, + "loss": 101.1454, + "step": 65260 + }, + { + "epoch": 0.26369905905452956, + "grad_norm": 1086.2506103515625, + "learning_rate": 3.6920886520143684e-05, + "loss": 68.4225, + "step": 65270 + }, + { + "epoch": 0.2637394603198972, + "grad_norm": 491.6980285644531, + "learning_rate": 3.6919397624103284e-05, + "loss": 71.1107, + "step": 65280 + }, + { + "epoch": 0.26377986158526484, + "grad_norm": 706.2943115234375, + "learning_rate": 3.691790839821052e-05, + "loss": 61.1636, + "step": 65290 + }, + { + "epoch": 0.2638202628506325, + "grad_norm": 799.2622680664062, + "learning_rate": 3.6916418842494416e-05, + "loss": 75.8203, + "step": 65300 + }, + { + "epoch": 0.2638606641160001, + "grad_norm": 644.90234375, + "learning_rate": 3.691492895698402e-05, + "loss": 83.3496, + "step": 65310 + }, + { + "epoch": 0.26390106538136776, + "grad_norm": 766.0650024414062, + "learning_rate": 3.691343874170838e-05, + "loss": 119.5681, + "step": 65320 + }, + { + "epoch": 0.2639414666467354, + "grad_norm": 913.3250122070312, + "learning_rate": 3.6911948196696533e-05, + "loss": 72.4505, + "step": 65330 + }, + { + "epoch": 0.263981867912103, + "grad_norm": 844.3764038085938, + "learning_rate": 3.691045732197756e-05, + "loss": 104.1052, + "step": 65340 + }, + { + "epoch": 0.2640222691774706, + "grad_norm": 642.6758422851562, + "learning_rate": 3.690896611758051e-05, + "loss": 86.4235, + "step": 65350 + }, + { + "epoch": 0.26406267044283827, + "grad_norm": 963.2833862304688, + "learning_rate": 3.690747458353446e-05, + "loss": 119.986, + "step": 65360 + }, + { + "epoch": 0.2641030717082059, + "grad_norm": 495.5404968261719, + "learning_rate": 3.6905982719868493e-05, + "loss": 114.2894, + "step": 65370 + }, + { + "epoch": 0.26414347297357355, + "grad_norm": 696.38232421875, + "learning_rate": 3.6904490526611684e-05, + "loss": 114.5754, + "step": 65380 + }, + { + "epoch": 0.2641838742389412, + "grad_norm": 526.3698120117188, + "learning_rate": 3.690299800379313e-05, + "loss": 61.4895, + "step": 65390 + }, + { + "epoch": 0.26422427550430877, + "grad_norm": 840.7091674804688, + "learning_rate": 3.6901505151441935e-05, + "loss": 132.3211, + "step": 65400 + }, + { + "epoch": 0.2642646767696764, + "grad_norm": 990.7478637695312, + "learning_rate": 3.690001196958719e-05, + "loss": 124.6337, + "step": 65410 + }, + { + "epoch": 0.26430507803504405, + "grad_norm": 577.9857788085938, + "learning_rate": 3.6898518458258006e-05, + "loss": 68.3108, + "step": 65420 + }, + { + "epoch": 0.2643454793004117, + "grad_norm": 169.7301483154297, + "learning_rate": 3.689702461748351e-05, + "loss": 103.6921, + "step": 65430 + }, + { + "epoch": 0.26438588056577933, + "grad_norm": 1321.1123046875, + "learning_rate": 3.689553044729282e-05, + "loss": 133.7232, + "step": 65440 + }, + { + "epoch": 0.26442628183114697, + "grad_norm": 544.4238891601562, + "learning_rate": 3.689403594771506e-05, + "loss": 84.2998, + "step": 65450 + }, + { + "epoch": 0.2644666830965146, + "grad_norm": 1150.3575439453125, + "learning_rate": 3.689254111877938e-05, + "loss": 92.0875, + "step": 65460 + }, + { + "epoch": 0.2645070843618822, + "grad_norm": 1430.2828369140625, + "learning_rate": 3.6891045960514904e-05, + "loss": 85.4246, + "step": 65470 + }, + { + "epoch": 0.26454748562724983, + "grad_norm": 665.8350830078125, + "learning_rate": 3.6889550472950804e-05, + "loss": 106.1281, + "step": 65480 + }, + { + "epoch": 0.2645878868926175, + "grad_norm": 419.1693115234375, + "learning_rate": 3.688805465611621e-05, + "loss": 76.4147, + "step": 65490 + }, + { + "epoch": 0.2646282881579851, + "grad_norm": 815.631591796875, + "learning_rate": 3.6886558510040305e-05, + "loss": 94.7802, + "step": 65500 + }, + { + "epoch": 0.26466868942335275, + "grad_norm": 737.8182983398438, + "learning_rate": 3.6885062034752244e-05, + "loss": 85.144, + "step": 65510 + }, + { + "epoch": 0.2647090906887204, + "grad_norm": 539.2249755859375, + "learning_rate": 3.6883565230281205e-05, + "loss": 69.7382, + "step": 65520 + }, + { + "epoch": 0.264749491954088, + "grad_norm": 409.9531555175781, + "learning_rate": 3.688206809665637e-05, + "loss": 60.988, + "step": 65530 + }, + { + "epoch": 0.2647898932194556, + "grad_norm": 636.5735473632812, + "learning_rate": 3.688057063390693e-05, + "loss": 85.4699, + "step": 65540 + }, + { + "epoch": 0.26483029448482326, + "grad_norm": 840.2516479492188, + "learning_rate": 3.687907284206207e-05, + "loss": 73.3038, + "step": 65550 + }, + { + "epoch": 0.2648706957501909, + "grad_norm": 472.2933654785156, + "learning_rate": 3.6877574721151e-05, + "loss": 122.7684, + "step": 65560 + }, + { + "epoch": 0.26491109701555854, + "grad_norm": 1137.9334716796875, + "learning_rate": 3.687607627120291e-05, + "loss": 98.9171, + "step": 65570 + }, + { + "epoch": 0.2649514982809262, + "grad_norm": 582.2466430664062, + "learning_rate": 3.6874577492247036e-05, + "loss": 163.8191, + "step": 65580 + }, + { + "epoch": 0.26499189954629376, + "grad_norm": 724.069091796875, + "learning_rate": 3.687307838431258e-05, + "loss": 83.8654, + "step": 65590 + }, + { + "epoch": 0.2650323008116614, + "grad_norm": 1071.441650390625, + "learning_rate": 3.687157894742878e-05, + "loss": 131.6161, + "step": 65600 + }, + { + "epoch": 0.26507270207702904, + "grad_norm": 541.9859619140625, + "learning_rate": 3.687007918162486e-05, + "loss": 78.6537, + "step": 65610 + }, + { + "epoch": 0.2651131033423967, + "grad_norm": 626.562744140625, + "learning_rate": 3.686857908693006e-05, + "loss": 79.8823, + "step": 65620 + }, + { + "epoch": 0.2651535046077643, + "grad_norm": 702.9822387695312, + "learning_rate": 3.6867078663373624e-05, + "loss": 104.0357, + "step": 65630 + }, + { + "epoch": 0.26519390587313196, + "grad_norm": 360.96484375, + "learning_rate": 3.686557791098481e-05, + "loss": 84.9814, + "step": 65640 + }, + { + "epoch": 0.2652343071384996, + "grad_norm": 1214.092041015625, + "learning_rate": 3.6864076829792865e-05, + "loss": 106.7884, + "step": 65650 + }, + { + "epoch": 0.2652747084038672, + "grad_norm": 919.9710083007812, + "learning_rate": 3.686257541982706e-05, + "loss": 113.7406, + "step": 65660 + }, + { + "epoch": 0.2653151096692348, + "grad_norm": 794.472900390625, + "learning_rate": 3.6861073681116674e-05, + "loss": 111.5102, + "step": 65670 + }, + { + "epoch": 0.26535551093460247, + "grad_norm": 483.4223937988281, + "learning_rate": 3.685957161369098e-05, + "loss": 96.5579, + "step": 65680 + }, + { + "epoch": 0.2653959121999701, + "grad_norm": 2160.982421875, + "learning_rate": 3.685806921757925e-05, + "loss": 130.947, + "step": 65690 + }, + { + "epoch": 0.26543631346533775, + "grad_norm": 1383.9820556640625, + "learning_rate": 3.685656649281078e-05, + "loss": 93.1118, + "step": 65700 + }, + { + "epoch": 0.2654767147307054, + "grad_norm": 1074.34765625, + "learning_rate": 3.6855063439414866e-05, + "loss": 63.4234, + "step": 65710 + }, + { + "epoch": 0.26551711599607297, + "grad_norm": 741.8479614257812, + "learning_rate": 3.685356005742082e-05, + "loss": 116.2139, + "step": 65720 + }, + { + "epoch": 0.2655575172614406, + "grad_norm": 1192.588134765625, + "learning_rate": 3.685205634685794e-05, + "loss": 150.7196, + "step": 65730 + }, + { + "epoch": 0.26559791852680825, + "grad_norm": 559.5575561523438, + "learning_rate": 3.685055230775554e-05, + "loss": 84.0855, + "step": 65740 + }, + { + "epoch": 0.2656383197921759, + "grad_norm": 618.826904296875, + "learning_rate": 3.684904794014296e-05, + "loss": 89.1764, + "step": 65750 + }, + { + "epoch": 0.26567872105754353, + "grad_norm": 1019.466552734375, + "learning_rate": 3.6847543244049505e-05, + "loss": 85.7676, + "step": 65760 + }, + { + "epoch": 0.26571912232291117, + "grad_norm": 620.0348510742188, + "learning_rate": 3.684603821950452e-05, + "loss": 100.0721, + "step": 65770 + }, + { + "epoch": 0.2657595235882788, + "grad_norm": 1102.3499755859375, + "learning_rate": 3.6844532866537355e-05, + "loss": 130.704, + "step": 65780 + }, + { + "epoch": 0.2657999248536464, + "grad_norm": 1337.453125, + "learning_rate": 3.684302718517734e-05, + "loss": 103.3017, + "step": 65790 + }, + { + "epoch": 0.26584032611901404, + "grad_norm": 608.0574340820312, + "learning_rate": 3.684152117545385e-05, + "loss": 114.09, + "step": 65800 + }, + { + "epoch": 0.2658807273843817, + "grad_norm": 1020.663330078125, + "learning_rate": 3.684001483739623e-05, + "loss": 109.9553, + "step": 65810 + }, + { + "epoch": 0.2659211286497493, + "grad_norm": 548.213134765625, + "learning_rate": 3.6838508171033846e-05, + "loss": 92.5788, + "step": 65820 + }, + { + "epoch": 0.26596152991511696, + "grad_norm": 463.6018371582031, + "learning_rate": 3.6837001176396084e-05, + "loss": 91.0072, + "step": 65830 + }, + { + "epoch": 0.2660019311804846, + "grad_norm": 738.9492797851562, + "learning_rate": 3.683549385351231e-05, + "loss": 71.5438, + "step": 65840 + }, + { + "epoch": 0.2660423324458522, + "grad_norm": 688.5137329101562, + "learning_rate": 3.6833986202411914e-05, + "loss": 93.5996, + "step": 65850 + }, + { + "epoch": 0.2660827337112198, + "grad_norm": 735.5326538085938, + "learning_rate": 3.68324782231243e-05, + "loss": 90.1322, + "step": 65860 + }, + { + "epoch": 0.26612313497658746, + "grad_norm": 521.7410888671875, + "learning_rate": 3.683096991567885e-05, + "loss": 83.4711, + "step": 65870 + }, + { + "epoch": 0.2661635362419551, + "grad_norm": 1079.812744140625, + "learning_rate": 3.682946128010498e-05, + "loss": 87.5418, + "step": 65880 + }, + { + "epoch": 0.26620393750732274, + "grad_norm": 1032.6300048828125, + "learning_rate": 3.682795231643209e-05, + "loss": 105.5094, + "step": 65890 + }, + { + "epoch": 0.2662443387726904, + "grad_norm": 1335.040283203125, + "learning_rate": 3.682644302468961e-05, + "loss": 112.5529, + "step": 65900 + }, + { + "epoch": 0.26628474003805797, + "grad_norm": 532.9078979492188, + "learning_rate": 3.682493340490697e-05, + "loss": 114.5399, + "step": 65910 + }, + { + "epoch": 0.2663251413034256, + "grad_norm": 1538.698486328125, + "learning_rate": 3.6823423457113575e-05, + "loss": 92.9194, + "step": 65920 + }, + { + "epoch": 0.26636554256879325, + "grad_norm": 819.9867553710938, + "learning_rate": 3.682191318133889e-05, + "loss": 109.1442, + "step": 65930 + }, + { + "epoch": 0.2664059438341609, + "grad_norm": 290.35150146484375, + "learning_rate": 3.6820402577612336e-05, + "loss": 105.953, + "step": 65940 + }, + { + "epoch": 0.2664463450995285, + "grad_norm": 564.0250854492188, + "learning_rate": 3.681889164596339e-05, + "loss": 52.5289, + "step": 65950 + }, + { + "epoch": 0.26648674636489617, + "grad_norm": 959.5496826171875, + "learning_rate": 3.6817380386421475e-05, + "loss": 65.1463, + "step": 65960 + }, + { + "epoch": 0.2665271476302638, + "grad_norm": 722.5169677734375, + "learning_rate": 3.681586879901608e-05, + "loss": 94.4726, + "step": 65970 + }, + { + "epoch": 0.2665675488956314, + "grad_norm": 585.9683227539062, + "learning_rate": 3.6814356883776665e-05, + "loss": 113.2666, + "step": 65980 + }, + { + "epoch": 0.26660795016099903, + "grad_norm": 924.5708618164062, + "learning_rate": 3.681284464073271e-05, + "loss": 100.2781, + "step": 65990 + }, + { + "epoch": 0.26664835142636667, + "grad_norm": 517.633056640625, + "learning_rate": 3.6811332069913685e-05, + "loss": 101.1605, + "step": 66000 + }, + { + "epoch": 0.2666887526917343, + "grad_norm": 1197.106201171875, + "learning_rate": 3.680981917134909e-05, + "loss": 93.6491, + "step": 66010 + }, + { + "epoch": 0.26672915395710195, + "grad_norm": 774.6829833984375, + "learning_rate": 3.6808305945068424e-05, + "loss": 134.2547, + "step": 66020 + }, + { + "epoch": 0.2667695552224696, + "grad_norm": 649.3412475585938, + "learning_rate": 3.6806792391101166e-05, + "loss": 92.2034, + "step": 66030 + }, + { + "epoch": 0.2668099564878372, + "grad_norm": 1417.0733642578125, + "learning_rate": 3.6805278509476844e-05, + "loss": 82.9415, + "step": 66040 + }, + { + "epoch": 0.2668503577532048, + "grad_norm": 906.3484497070312, + "learning_rate": 3.680376430022497e-05, + "loss": 62.0336, + "step": 66050 + }, + { + "epoch": 0.26689075901857245, + "grad_norm": 713.027587890625, + "learning_rate": 3.680224976337505e-05, + "loss": 119.626, + "step": 66060 + }, + { + "epoch": 0.2669311602839401, + "grad_norm": 569.616455078125, + "learning_rate": 3.680073489895663e-05, + "loss": 102.0743, + "step": 66070 + }, + { + "epoch": 0.26697156154930773, + "grad_norm": 708.4622192382812, + "learning_rate": 3.679921970699923e-05, + "loss": 78.8058, + "step": 66080 + }, + { + "epoch": 0.2670119628146754, + "grad_norm": 1081.883056640625, + "learning_rate": 3.679770418753239e-05, + "loss": 88.8019, + "step": 66090 + }, + { + "epoch": 0.267052364080043, + "grad_norm": 559.322998046875, + "learning_rate": 3.679618834058566e-05, + "loss": 78.6094, + "step": 66100 + }, + { + "epoch": 0.2670927653454106, + "grad_norm": 314.1164245605469, + "learning_rate": 3.6794672166188595e-05, + "loss": 99.6277, + "step": 66110 + }, + { + "epoch": 0.26713316661077824, + "grad_norm": 344.7218017578125, + "learning_rate": 3.679315566437074e-05, + "loss": 80.3364, + "step": 66120 + }, + { + "epoch": 0.2671735678761459, + "grad_norm": 272.1508483886719, + "learning_rate": 3.679163883516168e-05, + "loss": 98.9339, + "step": 66130 + }, + { + "epoch": 0.2672139691415135, + "grad_norm": 1115.919921875, + "learning_rate": 3.6790121678590975e-05, + "loss": 99.5988, + "step": 66140 + }, + { + "epoch": 0.26725437040688116, + "grad_norm": 364.0482482910156, + "learning_rate": 3.6788604194688205e-05, + "loss": 74.2136, + "step": 66150 + }, + { + "epoch": 0.2672947716722488, + "grad_norm": 865.9616088867188, + "learning_rate": 3.6787086383482946e-05, + "loss": 76.1034, + "step": 66160 + }, + { + "epoch": 0.2673351729376164, + "grad_norm": 790.9737548828125, + "learning_rate": 3.6785568245004796e-05, + "loss": 100.9523, + "step": 66170 + }, + { + "epoch": 0.267375574202984, + "grad_norm": 1064.888916015625, + "learning_rate": 3.6784049779283355e-05, + "loss": 68.268, + "step": 66180 + }, + { + "epoch": 0.26741597546835166, + "grad_norm": 1061.612060546875, + "learning_rate": 3.678253098634822e-05, + "loss": 104.9067, + "step": 66190 + }, + { + "epoch": 0.2674563767337193, + "grad_norm": 351.0912780761719, + "learning_rate": 3.678101186622901e-05, + "loss": 91.9083, + "step": 66200 + }, + { + "epoch": 0.26749677799908694, + "grad_norm": 1003.7578735351562, + "learning_rate": 3.6779492418955324e-05, + "loss": 84.5789, + "step": 66210 + }, + { + "epoch": 0.2675371792644546, + "grad_norm": 870.955810546875, + "learning_rate": 3.67779726445568e-05, + "loss": 132.6802, + "step": 66220 + }, + { + "epoch": 0.26757758052982217, + "grad_norm": 637.2534790039062, + "learning_rate": 3.677645254306306e-05, + "loss": 97.3954, + "step": 66230 + }, + { + "epoch": 0.2676179817951898, + "grad_norm": 656.2486572265625, + "learning_rate": 3.6774932114503743e-05, + "loss": 83.658, + "step": 66240 + }, + { + "epoch": 0.26765838306055745, + "grad_norm": 984.7315063476562, + "learning_rate": 3.6773411358908486e-05, + "loss": 118.4066, + "step": 66250 + }, + { + "epoch": 0.2676987843259251, + "grad_norm": 832.0888061523438, + "learning_rate": 3.677189027630694e-05, + "loss": 80.6267, + "step": 66260 + }, + { + "epoch": 0.2677391855912927, + "grad_norm": 501.2219543457031, + "learning_rate": 3.6770368866728756e-05, + "loss": 87.1882, + "step": 66270 + }, + { + "epoch": 0.26777958685666037, + "grad_norm": 1422.3106689453125, + "learning_rate": 3.6768847130203595e-05, + "loss": 120.3903, + "step": 66280 + }, + { + "epoch": 0.267819988122028, + "grad_norm": 1041.5869140625, + "learning_rate": 3.6767325066761136e-05, + "loss": 120.8286, + "step": 66290 + }, + { + "epoch": 0.2678603893873956, + "grad_norm": 679.9183959960938, + "learning_rate": 3.676580267643103e-05, + "loss": 160.7206, + "step": 66300 + }, + { + "epoch": 0.26790079065276323, + "grad_norm": 721.8289184570312, + "learning_rate": 3.676427995924297e-05, + "loss": 74.6857, + "step": 66310 + }, + { + "epoch": 0.26794119191813087, + "grad_norm": 1104.445556640625, + "learning_rate": 3.6762756915226654e-05, + "loss": 89.6367, + "step": 66320 + }, + { + "epoch": 0.2679815931834985, + "grad_norm": 303.7904052734375, + "learning_rate": 3.676123354441175e-05, + "loss": 78.8096, + "step": 66330 + }, + { + "epoch": 0.26802199444886615, + "grad_norm": 441.08819580078125, + "learning_rate": 3.6759709846827977e-05, + "loss": 110.9255, + "step": 66340 + }, + { + "epoch": 0.2680623957142338, + "grad_norm": 891.7954711914062, + "learning_rate": 3.675818582250503e-05, + "loss": 100.9259, + "step": 66350 + }, + { + "epoch": 0.2681027969796014, + "grad_norm": 846.0662841796875, + "learning_rate": 3.6756661471472626e-05, + "loss": 121.6967, + "step": 66360 + }, + { + "epoch": 0.268143198244969, + "grad_norm": 551.7979125976562, + "learning_rate": 3.675513679376047e-05, + "loss": 82.2298, + "step": 66370 + }, + { + "epoch": 0.26818359951033666, + "grad_norm": 1117.17041015625, + "learning_rate": 3.67536117893983e-05, + "loss": 93.3608, + "step": 66380 + }, + { + "epoch": 0.2682240007757043, + "grad_norm": 888.7140502929688, + "learning_rate": 3.675208645841584e-05, + "loss": 89.7162, + "step": 66390 + }, + { + "epoch": 0.26826440204107194, + "grad_norm": 2872.262939453125, + "learning_rate": 3.675056080084284e-05, + "loss": 126.1933, + "step": 66400 + }, + { + "epoch": 0.2683048033064396, + "grad_norm": 777.6696166992188, + "learning_rate": 3.674903481670902e-05, + "loss": 105.9529, + "step": 66410 + }, + { + "epoch": 0.2683452045718072, + "grad_norm": 909.9625244140625, + "learning_rate": 3.6747508506044156e-05, + "loss": 77.0414, + "step": 66420 + }, + { + "epoch": 0.2683856058371748, + "grad_norm": 922.3405151367188, + "learning_rate": 3.6745981868877986e-05, + "loss": 136.5746, + "step": 66430 + }, + { + "epoch": 0.26842600710254244, + "grad_norm": 814.6968994140625, + "learning_rate": 3.674445490524027e-05, + "loss": 128.5733, + "step": 66440 + }, + { + "epoch": 0.2684664083679101, + "grad_norm": 359.998046875, + "learning_rate": 3.67429276151608e-05, + "loss": 96.8418, + "step": 66450 + }, + { + "epoch": 0.2685068096332777, + "grad_norm": 1176.6510009765625, + "learning_rate": 3.674139999866933e-05, + "loss": 96.117, + "step": 66460 + }, + { + "epoch": 0.26854721089864536, + "grad_norm": 1241.8218994140625, + "learning_rate": 3.6739872055795646e-05, + "loss": 63.4372, + "step": 66470 + }, + { + "epoch": 0.268587612164013, + "grad_norm": 1242.6336669921875, + "learning_rate": 3.673834378656954e-05, + "loss": 69.8636, + "step": 66480 + }, + { + "epoch": 0.2686280134293806, + "grad_norm": 635.782958984375, + "learning_rate": 3.67368151910208e-05, + "loss": 121.7058, + "step": 66490 + }, + { + "epoch": 0.2686684146947482, + "grad_norm": 1062.27587890625, + "learning_rate": 3.673528626917924e-05, + "loss": 129.0958, + "step": 66500 + }, + { + "epoch": 0.26870881596011587, + "grad_norm": 633.8303833007812, + "learning_rate": 3.673375702107465e-05, + "loss": 89.3084, + "step": 66510 + }, + { + "epoch": 0.2687492172254835, + "grad_norm": 519.8699340820312, + "learning_rate": 3.673222744673686e-05, + "loss": 89.4574, + "step": 66520 + }, + { + "epoch": 0.26878961849085115, + "grad_norm": 1086.346435546875, + "learning_rate": 3.673069754619567e-05, + "loss": 115.6588, + "step": 66530 + }, + { + "epoch": 0.2688300197562188, + "grad_norm": 585.8494262695312, + "learning_rate": 3.672916731948092e-05, + "loss": 97.3519, + "step": 66540 + }, + { + "epoch": 0.26887042102158637, + "grad_norm": 1002.6953735351562, + "learning_rate": 3.672763676662245e-05, + "loss": 106.9858, + "step": 66550 + }, + { + "epoch": 0.268910822286954, + "grad_norm": 1160.5443115234375, + "learning_rate": 3.672610588765008e-05, + "loss": 115.1764, + "step": 66560 + }, + { + "epoch": 0.26895122355232165, + "grad_norm": 551.3721313476562, + "learning_rate": 3.672457468259367e-05, + "loss": 90.8336, + "step": 66570 + }, + { + "epoch": 0.2689916248176893, + "grad_norm": 1199.9073486328125, + "learning_rate": 3.6723043151483066e-05, + "loss": 94.4928, + "step": 66580 + }, + { + "epoch": 0.26903202608305693, + "grad_norm": 1108.66357421875, + "learning_rate": 3.6721511294348124e-05, + "loss": 125.8415, + "step": 66590 + }, + { + "epoch": 0.26907242734842457, + "grad_norm": 1623.9993896484375, + "learning_rate": 3.671997911121871e-05, + "loss": 111.5956, + "step": 66600 + }, + { + "epoch": 0.2691128286137922, + "grad_norm": 1156.067138671875, + "learning_rate": 3.67184466021247e-05, + "loss": 111.3455, + "step": 66610 + }, + { + "epoch": 0.2691532298791598, + "grad_norm": 628.4947509765625, + "learning_rate": 3.6716913767095964e-05, + "loss": 84.3345, + "step": 66620 + }, + { + "epoch": 0.26919363114452743, + "grad_norm": 654.1062622070312, + "learning_rate": 3.671538060616239e-05, + "loss": 66.1989, + "step": 66630 + }, + { + "epoch": 0.2692340324098951, + "grad_norm": 1026.301513671875, + "learning_rate": 3.671384711935386e-05, + "loss": 96.9606, + "step": 66640 + }, + { + "epoch": 0.2692744336752627, + "grad_norm": 845.8565063476562, + "learning_rate": 3.6712313306700276e-05, + "loss": 136.3838, + "step": 66650 + }, + { + "epoch": 0.26931483494063035, + "grad_norm": 815.4412841796875, + "learning_rate": 3.6710779168231535e-05, + "loss": 111.6956, + "step": 66660 + }, + { + "epoch": 0.269355236205998, + "grad_norm": 592.6746826171875, + "learning_rate": 3.670924470397756e-05, + "loss": 82.0636, + "step": 66670 + }, + { + "epoch": 0.2693956374713656, + "grad_norm": 652.9903564453125, + "learning_rate": 3.6707709913968254e-05, + "loss": 88.7635, + "step": 66680 + }, + { + "epoch": 0.2694360387367332, + "grad_norm": 725.7816772460938, + "learning_rate": 3.6706174798233536e-05, + "loss": 86.003, + "step": 66690 + }, + { + "epoch": 0.26947644000210086, + "grad_norm": 584.0993041992188, + "learning_rate": 3.670463935680335e-05, + "loss": 112.7981, + "step": 66700 + }, + { + "epoch": 0.2695168412674685, + "grad_norm": 481.1322937011719, + "learning_rate": 3.670310358970762e-05, + "loss": 62.3094, + "step": 66710 + }, + { + "epoch": 0.26955724253283614, + "grad_norm": 483.2883605957031, + "learning_rate": 3.670156749697627e-05, + "loss": 79.6554, + "step": 66720 + }, + { + "epoch": 0.2695976437982038, + "grad_norm": 519.1484985351562, + "learning_rate": 3.670003107863928e-05, + "loss": 133.2278, + "step": 66730 + }, + { + "epoch": 0.2696380450635714, + "grad_norm": 1549.7398681640625, + "learning_rate": 3.6698494334726575e-05, + "loss": 142.2457, + "step": 66740 + }, + { + "epoch": 0.269678446328939, + "grad_norm": 357.855224609375, + "learning_rate": 3.6696957265268134e-05, + "loss": 59.0513, + "step": 66750 + }, + { + "epoch": 0.26971884759430664, + "grad_norm": 907.1052856445312, + "learning_rate": 3.6695419870293915e-05, + "loss": 129.0778, + "step": 66760 + }, + { + "epoch": 0.2697592488596743, + "grad_norm": 1043.9722900390625, + "learning_rate": 3.669388214983388e-05, + "loss": 78.7023, + "step": 66770 + }, + { + "epoch": 0.2697996501250419, + "grad_norm": 593.51416015625, + "learning_rate": 3.669234410391803e-05, + "loss": 61.8036, + "step": 66780 + }, + { + "epoch": 0.26984005139040956, + "grad_norm": 1273.9757080078125, + "learning_rate": 3.669080573257633e-05, + "loss": 87.1484, + "step": 66790 + }, + { + "epoch": 0.2698804526557772, + "grad_norm": 363.3462219238281, + "learning_rate": 3.668926703583878e-05, + "loss": 75.2819, + "step": 66800 + }, + { + "epoch": 0.2699208539211448, + "grad_norm": 577.8388671875, + "learning_rate": 3.668772801373538e-05, + "loss": 99.2821, + "step": 66810 + }, + { + "epoch": 0.2699612551865124, + "grad_norm": 631.447509765625, + "learning_rate": 3.6686188666296135e-05, + "loss": 102.0825, + "step": 66820 + }, + { + "epoch": 0.27000165645188007, + "grad_norm": 464.68389892578125, + "learning_rate": 3.668464899355105e-05, + "loss": 117.8338, + "step": 66830 + }, + { + "epoch": 0.2700420577172477, + "grad_norm": 812.9592895507812, + "learning_rate": 3.668310899553014e-05, + "loss": 77.4676, + "step": 66840 + }, + { + "epoch": 0.27008245898261535, + "grad_norm": 926.87353515625, + "learning_rate": 3.668156867226343e-05, + "loss": 73.4413, + "step": 66850 + }, + { + "epoch": 0.270122860247983, + "grad_norm": 911.6707763671875, + "learning_rate": 3.668002802378094e-05, + "loss": 102.9655, + "step": 66860 + }, + { + "epoch": 0.27016326151335057, + "grad_norm": 679.295166015625, + "learning_rate": 3.6678487050112735e-05, + "loss": 102.9573, + "step": 66870 + }, + { + "epoch": 0.2702036627787182, + "grad_norm": 577.8781127929688, + "learning_rate": 3.667694575128883e-05, + "loss": 121.1115, + "step": 66880 + }, + { + "epoch": 0.27024406404408585, + "grad_norm": 578.3147583007812, + "learning_rate": 3.667540412733928e-05, + "loss": 69.0156, + "step": 66890 + }, + { + "epoch": 0.2702844653094535, + "grad_norm": 424.0657653808594, + "learning_rate": 3.667386217829415e-05, + "loss": 60.8166, + "step": 66900 + }, + { + "epoch": 0.27032486657482113, + "grad_norm": 641.4751586914062, + "learning_rate": 3.667231990418348e-05, + "loss": 157.8541, + "step": 66910 + }, + { + "epoch": 0.27036526784018877, + "grad_norm": 659.1722412109375, + "learning_rate": 3.667077730503736e-05, + "loss": 79.0967, + "step": 66920 + }, + { + "epoch": 0.2704056691055564, + "grad_norm": 466.86468505859375, + "learning_rate": 3.666923438088585e-05, + "loss": 76.3576, + "step": 66930 + }, + { + "epoch": 0.270446070370924, + "grad_norm": 642.9347534179688, + "learning_rate": 3.6667691131759034e-05, + "loss": 115.5088, + "step": 66940 + }, + { + "epoch": 0.27048647163629164, + "grad_norm": 1203.8519287109375, + "learning_rate": 3.6666147557687e-05, + "loss": 139.4259, + "step": 66950 + }, + { + "epoch": 0.2705268729016593, + "grad_norm": 985.3958129882812, + "learning_rate": 3.6664603658699836e-05, + "loss": 119.0861, + "step": 66960 + }, + { + "epoch": 0.2705672741670269, + "grad_norm": 886.81103515625, + "learning_rate": 3.666305943482765e-05, + "loss": 86.5638, + "step": 66970 + }, + { + "epoch": 0.27060767543239456, + "grad_norm": 1418.016357421875, + "learning_rate": 3.666151488610053e-05, + "loss": 114.4121, + "step": 66980 + }, + { + "epoch": 0.2706480766977622, + "grad_norm": 778.2364501953125, + "learning_rate": 3.6659970012548606e-05, + "loss": 92.9922, + "step": 66990 + }, + { + "epoch": 0.2706884779631298, + "grad_norm": 1526.047607421875, + "learning_rate": 3.665842481420199e-05, + "loss": 110.0278, + "step": 67000 + }, + { + "epoch": 0.2707288792284974, + "grad_norm": 605.3003540039062, + "learning_rate": 3.665687929109081e-05, + "loss": 122.7618, + "step": 67010 + }, + { + "epoch": 0.27076928049386506, + "grad_norm": 1663.4456787109375, + "learning_rate": 3.6655333443245184e-05, + "loss": 73.8812, + "step": 67020 + }, + { + "epoch": 0.2708096817592327, + "grad_norm": 2892.053466796875, + "learning_rate": 3.6653787270695266e-05, + "loss": 108.0526, + "step": 67030 + }, + { + "epoch": 0.27085008302460034, + "grad_norm": 1097.5616455078125, + "learning_rate": 3.665224077347119e-05, + "loss": 99.895, + "step": 67040 + }, + { + "epoch": 0.270890484289968, + "grad_norm": 983.0557250976562, + "learning_rate": 3.66506939516031e-05, + "loss": 71.9526, + "step": 67050 + }, + { + "epoch": 0.27093088555533557, + "grad_norm": 1033.124755859375, + "learning_rate": 3.664914680512117e-05, + "loss": 81.7465, + "step": 67060 + }, + { + "epoch": 0.2709712868207032, + "grad_norm": 559.4971923828125, + "learning_rate": 3.664759933405554e-05, + "loss": 75.0299, + "step": 67070 + }, + { + "epoch": 0.27101168808607085, + "grad_norm": 565.8617553710938, + "learning_rate": 3.6646051538436395e-05, + "loss": 59.9573, + "step": 67080 + }, + { + "epoch": 0.2710520893514385, + "grad_norm": 381.05352783203125, + "learning_rate": 3.6644503418293905e-05, + "loss": 99.7174, + "step": 67090 + }, + { + "epoch": 0.2710924906168061, + "grad_norm": 1140.415771484375, + "learning_rate": 3.6642954973658256e-05, + "loss": 132.5177, + "step": 67100 + }, + { + "epoch": 0.27113289188217377, + "grad_norm": 879.6929931640625, + "learning_rate": 3.664140620455962e-05, + "loss": 102.6096, + "step": 67110 + }, + { + "epoch": 0.2711732931475414, + "grad_norm": 473.53607177734375, + "learning_rate": 3.6639857111028215e-05, + "loss": 81.2611, + "step": 67120 + }, + { + "epoch": 0.271213694412909, + "grad_norm": 618.7317504882812, + "learning_rate": 3.663830769309423e-05, + "loss": 110.5988, + "step": 67130 + }, + { + "epoch": 0.27125409567827663, + "grad_norm": 803.4512939453125, + "learning_rate": 3.663675795078786e-05, + "loss": 120.2185, + "step": 67140 + }, + { + "epoch": 0.27129449694364427, + "grad_norm": 1341.7626953125, + "learning_rate": 3.663520788413933e-05, + "loss": 124.8719, + "step": 67150 + }, + { + "epoch": 0.2713348982090119, + "grad_norm": 539.691650390625, + "learning_rate": 3.663365749317886e-05, + "loss": 105.1012, + "step": 67160 + }, + { + "epoch": 0.27137529947437955, + "grad_norm": 1525.5050048828125, + "learning_rate": 3.663210677793668e-05, + "loss": 92.0104, + "step": 67170 + }, + { + "epoch": 0.2714157007397472, + "grad_norm": 631.9847412109375, + "learning_rate": 3.6630555738443e-05, + "loss": 155.906, + "step": 67180 + }, + { + "epoch": 0.2714561020051148, + "grad_norm": 819.8184204101562, + "learning_rate": 3.662900437472809e-05, + "loss": 77.3894, + "step": 67190 + }, + { + "epoch": 0.2714965032704824, + "grad_norm": 555.4190063476562, + "learning_rate": 3.662745268682217e-05, + "loss": 84.3662, + "step": 67200 + }, + { + "epoch": 0.27153690453585005, + "grad_norm": 547.0635986328125, + "learning_rate": 3.6625900674755503e-05, + "loss": 88.9308, + "step": 67210 + }, + { + "epoch": 0.2715773058012177, + "grad_norm": 626.9363403320312, + "learning_rate": 3.662434833855835e-05, + "loss": 82.3585, + "step": 67220 + }, + { + "epoch": 0.27161770706658533, + "grad_norm": 687.794921875, + "learning_rate": 3.662279567826096e-05, + "loss": 76.4374, + "step": 67230 + }, + { + "epoch": 0.271658108331953, + "grad_norm": 564.1090698242188, + "learning_rate": 3.66212426938936e-05, + "loss": 106.4722, + "step": 67240 + }, + { + "epoch": 0.2716985095973206, + "grad_norm": 1369.865966796875, + "learning_rate": 3.6619689385486566e-05, + "loss": 95.6472, + "step": 67250 + }, + { + "epoch": 0.2717389108626882, + "grad_norm": 709.9832763671875, + "learning_rate": 3.6618135753070136e-05, + "loss": 85.4511, + "step": 67260 + }, + { + "epoch": 0.27177931212805584, + "grad_norm": 765.769775390625, + "learning_rate": 3.661658179667459e-05, + "loss": 87.9901, + "step": 67270 + }, + { + "epoch": 0.2718197133934235, + "grad_norm": 768.8171997070312, + "learning_rate": 3.6615027516330226e-05, + "loss": 117.706, + "step": 67280 + }, + { + "epoch": 0.2718601146587911, + "grad_norm": 957.690185546875, + "learning_rate": 3.6613472912067345e-05, + "loss": 58.7262, + "step": 67290 + }, + { + "epoch": 0.27190051592415876, + "grad_norm": 507.2591857910156, + "learning_rate": 3.661191798391626e-05, + "loss": 99.1809, + "step": 67300 + }, + { + "epoch": 0.2719409171895264, + "grad_norm": 1278.200439453125, + "learning_rate": 3.661036273190727e-05, + "loss": 94.2571, + "step": 67310 + }, + { + "epoch": 0.271981318454894, + "grad_norm": 639.7318115234375, + "learning_rate": 3.660880715607072e-05, + "loss": 122.6524, + "step": 67320 + }, + { + "epoch": 0.2720217197202616, + "grad_norm": 558.89501953125, + "learning_rate": 3.6607251256436925e-05, + "loss": 82.6479, + "step": 67330 + }, + { + "epoch": 0.27206212098562926, + "grad_norm": 767.5194091796875, + "learning_rate": 3.660569503303621e-05, + "loss": 68.8054, + "step": 67340 + }, + { + "epoch": 0.2721025222509969, + "grad_norm": 327.6011047363281, + "learning_rate": 3.660413848589892e-05, + "loss": 90.8429, + "step": 67350 + }, + { + "epoch": 0.27214292351636454, + "grad_norm": 1048.76123046875, + "learning_rate": 3.6602581615055406e-05, + "loss": 87.1205, + "step": 67360 + }, + { + "epoch": 0.2721833247817322, + "grad_norm": 424.4896240234375, + "learning_rate": 3.660102442053601e-05, + "loss": 106.3329, + "step": 67370 + }, + { + "epoch": 0.27222372604709977, + "grad_norm": 881.1359252929688, + "learning_rate": 3.659946690237111e-05, + "loss": 97.5027, + "step": 67380 + }, + { + "epoch": 0.2722641273124674, + "grad_norm": 1051.02783203125, + "learning_rate": 3.659790906059105e-05, + "loss": 116.5932, + "step": 67390 + }, + { + "epoch": 0.27230452857783505, + "grad_norm": 762.4089965820312, + "learning_rate": 3.65963508952262e-05, + "loss": 113.3293, + "step": 67400 + }, + { + "epoch": 0.2723449298432027, + "grad_norm": 619.806640625, + "learning_rate": 3.659479240630695e-05, + "loss": 171.6082, + "step": 67410 + }, + { + "epoch": 0.2723853311085703, + "grad_norm": 748.5914306640625, + "learning_rate": 3.659323359386368e-05, + "loss": 85.7499, + "step": 67420 + }, + { + "epoch": 0.27242573237393797, + "grad_norm": 752.0733642578125, + "learning_rate": 3.659167445792677e-05, + "loss": 98.9257, + "step": 67430 + }, + { + "epoch": 0.2724661336393056, + "grad_norm": 492.3656005859375, + "learning_rate": 3.659011499852664e-05, + "loss": 94.4063, + "step": 67440 + }, + { + "epoch": 0.2725065349046732, + "grad_norm": 1411.8902587890625, + "learning_rate": 3.658855521569367e-05, + "loss": 80.0441, + "step": 67450 + }, + { + "epoch": 0.27254693617004083, + "grad_norm": 669.166015625, + "learning_rate": 3.6586995109458266e-05, + "loss": 92.9388, + "step": 67460 + }, + { + "epoch": 0.27258733743540847, + "grad_norm": 763.5968627929688, + "learning_rate": 3.6585434679850866e-05, + "loss": 56.3931, + "step": 67470 + }, + { + "epoch": 0.2726277387007761, + "grad_norm": 857.7355346679688, + "learning_rate": 3.6583873926901866e-05, + "loss": 80.9318, + "step": 67480 + }, + { + "epoch": 0.27266813996614375, + "grad_norm": 928.1865234375, + "learning_rate": 3.658231285064172e-05, + "loss": 86.8716, + "step": 67490 + }, + { + "epoch": 0.2727085412315114, + "grad_norm": 780.06640625, + "learning_rate": 3.658075145110083e-05, + "loss": 82.3669, + "step": 67500 + }, + { + "epoch": 0.272748942496879, + "grad_norm": 648.033447265625, + "learning_rate": 3.657918972830967e-05, + "loss": 97.4703, + "step": 67510 + }, + { + "epoch": 0.2727893437622466, + "grad_norm": 957.6702270507812, + "learning_rate": 3.657762768229867e-05, + "loss": 65.9688, + "step": 67520 + }, + { + "epoch": 0.27282974502761426, + "grad_norm": 518.7492065429688, + "learning_rate": 3.6576065313098276e-05, + "loss": 105.7534, + "step": 67530 + }, + { + "epoch": 0.2728701462929819, + "grad_norm": 410.5702819824219, + "learning_rate": 3.657450262073896e-05, + "loss": 69.4034, + "step": 67540 + }, + { + "epoch": 0.27291054755834954, + "grad_norm": 1124.7354736328125, + "learning_rate": 3.657293960525118e-05, + "loss": 88.6595, + "step": 67550 + }, + { + "epoch": 0.2729509488237172, + "grad_norm": 761.4927368164062, + "learning_rate": 3.657137626666541e-05, + "loss": 66.9091, + "step": 67560 + }, + { + "epoch": 0.2729913500890848, + "grad_norm": 826.975341796875, + "learning_rate": 3.656981260501213e-05, + "loss": 89.1747, + "step": 67570 + }, + { + "epoch": 0.2730317513544524, + "grad_norm": 833.301513671875, + "learning_rate": 3.656824862032182e-05, + "loss": 105.9145, + "step": 67580 + }, + { + "epoch": 0.27307215261982004, + "grad_norm": 547.9591064453125, + "learning_rate": 3.6566684312624966e-05, + "loss": 86.8532, + "step": 67590 + }, + { + "epoch": 0.2731125538851877, + "grad_norm": 1575.085205078125, + "learning_rate": 3.6565119681952086e-05, + "loss": 114.453, + "step": 67600 + }, + { + "epoch": 0.2731529551505553, + "grad_norm": 866.9111938476562, + "learning_rate": 3.6563554728333664e-05, + "loss": 78.5457, + "step": 67610 + }, + { + "epoch": 0.27319335641592296, + "grad_norm": 436.52813720703125, + "learning_rate": 3.6561989451800215e-05, + "loss": 85.6261, + "step": 67620 + }, + { + "epoch": 0.2732337576812906, + "grad_norm": 790.2314453125, + "learning_rate": 3.656042385238225e-05, + "loss": 80.8644, + "step": 67630 + }, + { + "epoch": 0.2732741589466582, + "grad_norm": 535.7910766601562, + "learning_rate": 3.655885793011031e-05, + "loss": 136.049, + "step": 67640 + }, + { + "epoch": 0.2733145602120258, + "grad_norm": 636.2142333984375, + "learning_rate": 3.6557291685014896e-05, + "loss": 121.8613, + "step": 67650 + }, + { + "epoch": 0.27335496147739347, + "grad_norm": 1940.0870361328125, + "learning_rate": 3.655572511712656e-05, + "loss": 128.0218, + "step": 67660 + }, + { + "epoch": 0.2733953627427611, + "grad_norm": 1264.65576171875, + "learning_rate": 3.655415822647584e-05, + "loss": 114.0267, + "step": 67670 + }, + { + "epoch": 0.27343576400812875, + "grad_norm": 925.796630859375, + "learning_rate": 3.655259101309328e-05, + "loss": 88.4862, + "step": 67680 + }, + { + "epoch": 0.2734761652734964, + "grad_norm": 722.7463989257812, + "learning_rate": 3.655102347700944e-05, + "loss": 109.0084, + "step": 67690 + }, + { + "epoch": 0.27351656653886397, + "grad_norm": 764.4606323242188, + "learning_rate": 3.654945561825488e-05, + "loss": 98.5051, + "step": 67700 + }, + { + "epoch": 0.2735569678042316, + "grad_norm": 1221.8919677734375, + "learning_rate": 3.6547887436860164e-05, + "loss": 117.89, + "step": 67710 + }, + { + "epoch": 0.27359736906959925, + "grad_norm": 1112.1854248046875, + "learning_rate": 3.654631893285585e-05, + "loss": 94.4862, + "step": 67720 + }, + { + "epoch": 0.2736377703349669, + "grad_norm": 630.9153442382812, + "learning_rate": 3.6544750106272534e-05, + "loss": 54.3689, + "step": 67730 + }, + { + "epoch": 0.27367817160033453, + "grad_norm": 535.27099609375, + "learning_rate": 3.6543180957140804e-05, + "loss": 107.3487, + "step": 67740 + }, + { + "epoch": 0.27371857286570217, + "grad_norm": 766.9866333007812, + "learning_rate": 3.654161148549124e-05, + "loss": 117.028, + "step": 67750 + }, + { + "epoch": 0.2737589741310698, + "grad_norm": 589.0127563476562, + "learning_rate": 3.654004169135444e-05, + "loss": 88.6585, + "step": 67760 + }, + { + "epoch": 0.2737993753964374, + "grad_norm": 422.9021301269531, + "learning_rate": 3.653847157476101e-05, + "loss": 64.9241, + "step": 67770 + }, + { + "epoch": 0.27383977666180503, + "grad_norm": 639.4475708007812, + "learning_rate": 3.653690113574158e-05, + "loss": 115.0782, + "step": 67780 + }, + { + "epoch": 0.2738801779271727, + "grad_norm": 548.0899658203125, + "learning_rate": 3.6535330374326734e-05, + "loss": 79.4729, + "step": 67790 + }, + { + "epoch": 0.2739205791925403, + "grad_norm": 758.9486694335938, + "learning_rate": 3.6533759290547114e-05, + "loss": 139.4322, + "step": 67800 + }, + { + "epoch": 0.27396098045790795, + "grad_norm": 479.7082824707031, + "learning_rate": 3.653218788443334e-05, + "loss": 53.27, + "step": 67810 + }, + { + "epoch": 0.2740013817232756, + "grad_norm": 412.579833984375, + "learning_rate": 3.653061615601606e-05, + "loss": 64.0726, + "step": 67820 + }, + { + "epoch": 0.2740417829886432, + "grad_norm": 939.8152465820312, + "learning_rate": 3.652904410532589e-05, + "loss": 78.4776, + "step": 67830 + }, + { + "epoch": 0.2740821842540108, + "grad_norm": 764.1455078125, + "learning_rate": 3.6527471732393515e-05, + "loss": 87.4707, + "step": 67840 + }, + { + "epoch": 0.27412258551937846, + "grad_norm": 677.4103393554688, + "learning_rate": 3.652589903724956e-05, + "loss": 83.1382, + "step": 67850 + }, + { + "epoch": 0.2741629867847461, + "grad_norm": 1063.554443359375, + "learning_rate": 3.65243260199247e-05, + "loss": 70.8683, + "step": 67860 + }, + { + "epoch": 0.27420338805011374, + "grad_norm": 824.3779296875, + "learning_rate": 3.652275268044959e-05, + "loss": 122.3711, + "step": 67870 + }, + { + "epoch": 0.2742437893154814, + "grad_norm": 1185.2037353515625, + "learning_rate": 3.6521179018854914e-05, + "loss": 99.4386, + "step": 67880 + }, + { + "epoch": 0.274284190580849, + "grad_norm": 1935.2410888671875, + "learning_rate": 3.651960503517135e-05, + "loss": 130.8121, + "step": 67890 + }, + { + "epoch": 0.2743245918462166, + "grad_norm": 430.25177001953125, + "learning_rate": 3.651803072942957e-05, + "loss": 65.29, + "step": 67900 + }, + { + "epoch": 0.27436499311158424, + "grad_norm": 4109.46240234375, + "learning_rate": 3.6516456101660286e-05, + "loss": 161.4546, + "step": 67910 + }, + { + "epoch": 0.2744053943769519, + "grad_norm": 1227.218505859375, + "learning_rate": 3.651488115189419e-05, + "loss": 120.5978, + "step": 67920 + }, + { + "epoch": 0.2744457956423195, + "grad_norm": 1899.3009033203125, + "learning_rate": 3.651330588016198e-05, + "loss": 91.9995, + "step": 67930 + }, + { + "epoch": 0.27448619690768716, + "grad_norm": 738.5293579101562, + "learning_rate": 3.6511730286494366e-05, + "loss": 69.5723, + "step": 67940 + }, + { + "epoch": 0.2745265981730548, + "grad_norm": 901.5470581054688, + "learning_rate": 3.651015437092208e-05, + "loss": 79.5962, + "step": 67950 + }, + { + "epoch": 0.2745669994384224, + "grad_norm": 672.0913696289062, + "learning_rate": 3.650857813347582e-05, + "loss": 86.7857, + "step": 67960 + }, + { + "epoch": 0.27460740070379, + "grad_norm": 802.9805297851562, + "learning_rate": 3.6507001574186346e-05, + "loss": 144.3319, + "step": 67970 + }, + { + "epoch": 0.27464780196915767, + "grad_norm": 676.3224487304688, + "learning_rate": 3.650542469308437e-05, + "loss": 104.4965, + "step": 67980 + }, + { + "epoch": 0.2746882032345253, + "grad_norm": 3342.94091796875, + "learning_rate": 3.6503847490200636e-05, + "loss": 154.7336, + "step": 67990 + }, + { + "epoch": 0.27472860449989295, + "grad_norm": 603.455078125, + "learning_rate": 3.6502269965565904e-05, + "loss": 69.5229, + "step": 68000 + }, + { + "epoch": 0.2747690057652606, + "grad_norm": 579.2432250976562, + "learning_rate": 3.650069211921093e-05, + "loss": 70.3289, + "step": 68010 + }, + { + "epoch": 0.27480940703062817, + "grad_norm": 585.8040771484375, + "learning_rate": 3.649911395116646e-05, + "loss": 65.5207, + "step": 68020 + }, + { + "epoch": 0.2748498082959958, + "grad_norm": 727.9530029296875, + "learning_rate": 3.649753546146327e-05, + "loss": 66.6137, + "step": 68030 + }, + { + "epoch": 0.27489020956136345, + "grad_norm": 778.4830322265625, + "learning_rate": 3.6495956650132126e-05, + "loss": 122.7406, + "step": 68040 + }, + { + "epoch": 0.2749306108267311, + "grad_norm": 476.44964599609375, + "learning_rate": 3.6494377517203825e-05, + "loss": 101.1476, + "step": 68050 + }, + { + "epoch": 0.27497101209209873, + "grad_norm": 629.7472534179688, + "learning_rate": 3.649279806270914e-05, + "loss": 84.705, + "step": 68060 + }, + { + "epoch": 0.27501141335746637, + "grad_norm": 574.1264038085938, + "learning_rate": 3.6491218286678867e-05, + "loss": 93.1382, + "step": 68070 + }, + { + "epoch": 0.275051814622834, + "grad_norm": 1126.776611328125, + "learning_rate": 3.648963818914379e-05, + "loss": 78.809, + "step": 68080 + }, + { + "epoch": 0.2750922158882016, + "grad_norm": 843.8395385742188, + "learning_rate": 3.6488057770134746e-05, + "loss": 73.437, + "step": 68090 + }, + { + "epoch": 0.27513261715356924, + "grad_norm": 780.7047119140625, + "learning_rate": 3.648647702968252e-05, + "loss": 96.8455, + "step": 68100 + }, + { + "epoch": 0.2751730184189369, + "grad_norm": 802.1358032226562, + "learning_rate": 3.6484895967817935e-05, + "loss": 75.0577, + "step": 68110 + }, + { + "epoch": 0.2752134196843045, + "grad_norm": 720.5551147460938, + "learning_rate": 3.6483314584571815e-05, + "loss": 57.3072, + "step": 68120 + }, + { + "epoch": 0.27525382094967216, + "grad_norm": 348.1614990234375, + "learning_rate": 3.6481732879974995e-05, + "loss": 89.3268, + "step": 68130 + }, + { + "epoch": 0.2752942222150398, + "grad_norm": 487.68707275390625, + "learning_rate": 3.64801508540583e-05, + "loss": 84.348, + "step": 68140 + }, + { + "epoch": 0.2753346234804074, + "grad_norm": 927.5310668945312, + "learning_rate": 3.647856850685259e-05, + "loss": 109.3317, + "step": 68150 + }, + { + "epoch": 0.275375024745775, + "grad_norm": 1217.30517578125, + "learning_rate": 3.6476985838388694e-05, + "loss": 92.527, + "step": 68160 + }, + { + "epoch": 0.27541542601114266, + "grad_norm": 650.2152709960938, + "learning_rate": 3.647540284869748e-05, + "loss": 84.9797, + "step": 68170 + }, + { + "epoch": 0.2754558272765103, + "grad_norm": 844.3194580078125, + "learning_rate": 3.647381953780981e-05, + "loss": 124.1536, + "step": 68180 + }, + { + "epoch": 0.27549622854187794, + "grad_norm": 544.3070678710938, + "learning_rate": 3.647223590575654e-05, + "loss": 84.4533, + "step": 68190 + }, + { + "epoch": 0.2755366298072456, + "grad_norm": 486.33978271484375, + "learning_rate": 3.647065195256855e-05, + "loss": 109.8164, + "step": 68200 + }, + { + "epoch": 0.2755770310726132, + "grad_norm": 799.48828125, + "learning_rate": 3.6469067678276726e-05, + "loss": 105.7089, + "step": 68210 + }, + { + "epoch": 0.2756174323379808, + "grad_norm": 323.52191162109375, + "learning_rate": 3.6467483082911945e-05, + "loss": 75.8217, + "step": 68220 + }, + { + "epoch": 0.27565783360334845, + "grad_norm": 466.0504150390625, + "learning_rate": 3.646589816650511e-05, + "loss": 108.8149, + "step": 68230 + }, + { + "epoch": 0.2756982348687161, + "grad_norm": 719.2071533203125, + "learning_rate": 3.64643129290871e-05, + "loss": 82.6375, + "step": 68240 + }, + { + "epoch": 0.2757386361340837, + "grad_norm": 911.8373413085938, + "learning_rate": 3.6462727370688844e-05, + "loss": 73.9994, + "step": 68250 + }, + { + "epoch": 0.27577903739945137, + "grad_norm": 408.0462646484375, + "learning_rate": 3.6461141491341235e-05, + "loss": 114.88, + "step": 68260 + }, + { + "epoch": 0.275819438664819, + "grad_norm": 434.5877380371094, + "learning_rate": 3.6459555291075204e-05, + "loss": 105.225, + "step": 68270 + }, + { + "epoch": 0.2758598399301866, + "grad_norm": 445.41802978515625, + "learning_rate": 3.6457968769921664e-05, + "loss": 88.2386, + "step": 68280 + }, + { + "epoch": 0.27590024119555423, + "grad_norm": 1021.8375854492188, + "learning_rate": 3.645638192791155e-05, + "loss": 103.4225, + "step": 68290 + }, + { + "epoch": 0.27594064246092187, + "grad_norm": 707.6278076171875, + "learning_rate": 3.64547947650758e-05, + "loss": 99.5091, + "step": 68300 + }, + { + "epoch": 0.2759810437262895, + "grad_norm": 1050.2548828125, + "learning_rate": 3.6453207281445356e-05, + "loss": 118.9096, + "step": 68310 + }, + { + "epoch": 0.27602144499165715, + "grad_norm": 902.828369140625, + "learning_rate": 3.6451619477051165e-05, + "loss": 109.0417, + "step": 68320 + }, + { + "epoch": 0.2760618462570248, + "grad_norm": 1697.173095703125, + "learning_rate": 3.645003135192418e-05, + "loss": 126.2822, + "step": 68330 + }, + { + "epoch": 0.2761022475223924, + "grad_norm": 1301.940673828125, + "learning_rate": 3.6448442906095365e-05, + "loss": 124.525, + "step": 68340 + }, + { + "epoch": 0.27614264878776, + "grad_norm": 505.5497131347656, + "learning_rate": 3.644685413959569e-05, + "loss": 83.2613, + "step": 68350 + }, + { + "epoch": 0.27618305005312765, + "grad_norm": 847.8020629882812, + "learning_rate": 3.644526505245612e-05, + "loss": 132.779, + "step": 68360 + }, + { + "epoch": 0.2762234513184953, + "grad_norm": 550.3712768554688, + "learning_rate": 3.6443675644707645e-05, + "loss": 91.7599, + "step": 68370 + }, + { + "epoch": 0.27626385258386293, + "grad_norm": 678.0668334960938, + "learning_rate": 3.6442085916381256e-05, + "loss": 117.8119, + "step": 68380 + }, + { + "epoch": 0.2763042538492306, + "grad_norm": 614.4730834960938, + "learning_rate": 3.644049586750792e-05, + "loss": 97.4599, + "step": 68390 + }, + { + "epoch": 0.2763446551145982, + "grad_norm": 488.6295471191406, + "learning_rate": 3.6438905498118665e-05, + "loss": 103.6707, + "step": 68400 + }, + { + "epoch": 0.2763850563799658, + "grad_norm": 513.9255981445312, + "learning_rate": 3.643731480824448e-05, + "loss": 81.8313, + "step": 68410 + }, + { + "epoch": 0.27642545764533344, + "grad_norm": 747.1922607421875, + "learning_rate": 3.6435723797916384e-05, + "loss": 106.2936, + "step": 68420 + }, + { + "epoch": 0.2764658589107011, + "grad_norm": 299.3599548339844, + "learning_rate": 3.643413246716539e-05, + "loss": 51.4801, + "step": 68430 + }, + { + "epoch": 0.2765062601760687, + "grad_norm": 390.653564453125, + "learning_rate": 3.643254081602252e-05, + "loss": 97.7996, + "step": 68440 + }, + { + "epoch": 0.27654666144143636, + "grad_norm": 1718.391357421875, + "learning_rate": 3.6430948844518806e-05, + "loss": 130.4032, + "step": 68450 + }, + { + "epoch": 0.276587062706804, + "grad_norm": 849.1654052734375, + "learning_rate": 3.6429356552685285e-05, + "loss": 74.4217, + "step": 68460 + }, + { + "epoch": 0.2766274639721716, + "grad_norm": 682.6337280273438, + "learning_rate": 3.6427763940553004e-05, + "loss": 76.5122, + "step": 68470 + }, + { + "epoch": 0.2766678652375392, + "grad_norm": 544.392333984375, + "learning_rate": 3.6426171008153e-05, + "loss": 85.6044, + "step": 68480 + }, + { + "epoch": 0.27670826650290686, + "grad_norm": 511.590576171875, + "learning_rate": 3.6424577755516344e-05, + "loss": 116.662, + "step": 68490 + }, + { + "epoch": 0.2767486677682745, + "grad_norm": 656.3893432617188, + "learning_rate": 3.642298418267408e-05, + "loss": 106.6331, + "step": 68500 + }, + { + "epoch": 0.27678906903364214, + "grad_norm": 615.6737060546875, + "learning_rate": 3.642139028965729e-05, + "loss": 62.4421, + "step": 68510 + }, + { + "epoch": 0.2768294702990098, + "grad_norm": 1557.6842041015625, + "learning_rate": 3.6419796076497045e-05, + "loss": 122.6403, + "step": 68520 + }, + { + "epoch": 0.2768698715643774, + "grad_norm": 11329.0419921875, + "learning_rate": 3.6418201543224414e-05, + "loss": 114.4745, + "step": 68530 + }, + { + "epoch": 0.276910272829745, + "grad_norm": 1043.625732421875, + "learning_rate": 3.64166066898705e-05, + "loss": 125.834, + "step": 68540 + }, + { + "epoch": 0.27695067409511265, + "grad_norm": 595.1864624023438, + "learning_rate": 3.641501151646638e-05, + "loss": 108.8715, + "step": 68550 + }, + { + "epoch": 0.2769910753604803, + "grad_norm": 778.237060546875, + "learning_rate": 3.641341602304316e-05, + "loss": 84.8831, + "step": 68560 + }, + { + "epoch": 0.27703147662584793, + "grad_norm": 957.9576416015625, + "learning_rate": 3.6411820209631944e-05, + "loss": 137.406, + "step": 68570 + }, + { + "epoch": 0.27707187789121557, + "grad_norm": 3196.039794921875, + "learning_rate": 3.641022407626385e-05, + "loss": 104.2446, + "step": 68580 + }, + { + "epoch": 0.2771122791565832, + "grad_norm": 1015.665283203125, + "learning_rate": 3.640862762296999e-05, + "loss": 68.0923, + "step": 68590 + }, + { + "epoch": 0.2771526804219508, + "grad_norm": 1055.8487548828125, + "learning_rate": 3.6407030849781475e-05, + "loss": 116.5657, + "step": 68600 + }, + { + "epoch": 0.27719308168731843, + "grad_norm": 661.7937622070312, + "learning_rate": 3.6405433756729456e-05, + "loss": 62.4905, + "step": 68610 + }, + { + "epoch": 0.27723348295268607, + "grad_norm": 534.2022705078125, + "learning_rate": 3.640383634384505e-05, + "loss": 96.2677, + "step": 68620 + }, + { + "epoch": 0.2772738842180537, + "grad_norm": 364.8976135253906, + "learning_rate": 3.6402238611159424e-05, + "loss": 118.5725, + "step": 68630 + }, + { + "epoch": 0.27731428548342135, + "grad_norm": 605.224365234375, + "learning_rate": 3.64006405587037e-05, + "loss": 93.4407, + "step": 68640 + }, + { + "epoch": 0.277354686748789, + "grad_norm": 845.5484619140625, + "learning_rate": 3.639904218650905e-05, + "loss": 104.17, + "step": 68650 + }, + { + "epoch": 0.2773950880141566, + "grad_norm": 770.7295532226562, + "learning_rate": 3.639744349460663e-05, + "loss": 77.7761, + "step": 68660 + }, + { + "epoch": 0.2774354892795242, + "grad_norm": 588.3968505859375, + "learning_rate": 3.639584448302761e-05, + "loss": 71.3449, + "step": 68670 + }, + { + "epoch": 0.27747589054489186, + "grad_norm": 859.4909057617188, + "learning_rate": 3.639424515180315e-05, + "loss": 76.3689, + "step": 68680 + }, + { + "epoch": 0.2775162918102595, + "grad_norm": 604.9257202148438, + "learning_rate": 3.639264550096445e-05, + "loss": 115.6792, + "step": 68690 + }, + { + "epoch": 0.27755669307562714, + "grad_norm": 351.2408447265625, + "learning_rate": 3.639104553054268e-05, + "loss": 118.301, + "step": 68700 + }, + { + "epoch": 0.2775970943409948, + "grad_norm": 1255.8525390625, + "learning_rate": 3.6389445240569043e-05, + "loss": 95.3754, + "step": 68710 + }, + { + "epoch": 0.2776374956063624, + "grad_norm": 1229.2650146484375, + "learning_rate": 3.6387844631074726e-05, + "loss": 78.6726, + "step": 68720 + }, + { + "epoch": 0.27767789687173, + "grad_norm": 492.1968078613281, + "learning_rate": 3.638624370209095e-05, + "loss": 82.0869, + "step": 68730 + }, + { + "epoch": 0.27771829813709764, + "grad_norm": 635.1704711914062, + "learning_rate": 3.638464245364891e-05, + "loss": 95.991, + "step": 68740 + }, + { + "epoch": 0.2777586994024653, + "grad_norm": 1472.442138671875, + "learning_rate": 3.638304088577984e-05, + "loss": 88.7849, + "step": 68750 + }, + { + "epoch": 0.2777991006678329, + "grad_norm": 1274.680908203125, + "learning_rate": 3.6381438998514945e-05, + "loss": 79.458, + "step": 68760 + }, + { + "epoch": 0.27783950193320056, + "grad_norm": 895.7799682617188, + "learning_rate": 3.637983679188547e-05, + "loss": 127.0817, + "step": 68770 + }, + { + "epoch": 0.2778799031985682, + "grad_norm": 451.1614990234375, + "learning_rate": 3.6378234265922636e-05, + "loss": 77.4037, + "step": 68780 + }, + { + "epoch": 0.2779203044639358, + "grad_norm": 751.6312255859375, + "learning_rate": 3.6376631420657695e-05, + "loss": 106.7418, + "step": 68790 + }, + { + "epoch": 0.2779607057293034, + "grad_norm": 942.9095458984375, + "learning_rate": 3.637502825612189e-05, + "loss": 96.0415, + "step": 68800 + }, + { + "epoch": 0.27800110699467107, + "grad_norm": 526.9249877929688, + "learning_rate": 3.637342477234649e-05, + "loss": 77.4476, + "step": 68810 + }, + { + "epoch": 0.2780415082600387, + "grad_norm": 1092.3048095703125, + "learning_rate": 3.637182096936274e-05, + "loss": 96.707, + "step": 68820 + }, + { + "epoch": 0.27808190952540635, + "grad_norm": 917.3517456054688, + "learning_rate": 3.63702168472019e-05, + "loss": 104.7531, + "step": 68830 + }, + { + "epoch": 0.278122310790774, + "grad_norm": 969.0011596679688, + "learning_rate": 3.636861240589527e-05, + "loss": 92.7792, + "step": 68840 + }, + { + "epoch": 0.2781627120561416, + "grad_norm": 1465.5838623046875, + "learning_rate": 3.63670076454741e-05, + "loss": 90.2071, + "step": 68850 + }, + { + "epoch": 0.2782031133215092, + "grad_norm": 1092.1461181640625, + "learning_rate": 3.63654025659697e-05, + "loss": 117.6914, + "step": 68860 + }, + { + "epoch": 0.27824351458687685, + "grad_norm": 1796.9549560546875, + "learning_rate": 3.636379716741335e-05, + "loss": 74.5576, + "step": 68870 + }, + { + "epoch": 0.2782839158522445, + "grad_norm": 531.8665771484375, + "learning_rate": 3.6362191449836346e-05, + "loss": 93.0529, + "step": 68880 + }, + { + "epoch": 0.27832431711761213, + "grad_norm": 737.0626831054688, + "learning_rate": 3.6360585413269995e-05, + "loss": 73.4925, + "step": 68890 + }, + { + "epoch": 0.27836471838297977, + "grad_norm": 961.3511352539062, + "learning_rate": 3.6358979057745615e-05, + "loss": 70.1652, + "step": 68900 + }, + { + "epoch": 0.2784051196483474, + "grad_norm": 599.8577270507812, + "learning_rate": 3.635737238329451e-05, + "loss": 114.5515, + "step": 68910 + }, + { + "epoch": 0.278445520913715, + "grad_norm": 741.0637817382812, + "learning_rate": 3.6355765389948015e-05, + "loss": 106.3986, + "step": 68920 + }, + { + "epoch": 0.27848592217908263, + "grad_norm": 1377.2880859375, + "learning_rate": 3.635415807773745e-05, + "loss": 121.5098, + "step": 68930 + }, + { + "epoch": 0.2785263234444503, + "grad_norm": 776.2672729492188, + "learning_rate": 3.635255044669415e-05, + "loss": 86.9908, + "step": 68940 + }, + { + "epoch": 0.2785667247098179, + "grad_norm": 1268.8033447265625, + "learning_rate": 3.635094249684946e-05, + "loss": 91.512, + "step": 68950 + }, + { + "epoch": 0.27860712597518555, + "grad_norm": 1089.370361328125, + "learning_rate": 3.6349334228234736e-05, + "loss": 87.2802, + "step": 68960 + }, + { + "epoch": 0.2786475272405532, + "grad_norm": 523.503173828125, + "learning_rate": 3.634772564088132e-05, + "loss": 75.9084, + "step": 68970 + }, + { + "epoch": 0.2786879285059208, + "grad_norm": 243.4495391845703, + "learning_rate": 3.6346116734820575e-05, + "loss": 76.2844, + "step": 68980 + }, + { + "epoch": 0.2787283297712884, + "grad_norm": 888.7525024414062, + "learning_rate": 3.634450751008387e-05, + "loss": 69.7166, + "step": 68990 + }, + { + "epoch": 0.27876873103665606, + "grad_norm": 770.096435546875, + "learning_rate": 3.634289796670257e-05, + "loss": 83.3546, + "step": 69000 + }, + { + "epoch": 0.2788091323020237, + "grad_norm": 1272.0819091796875, + "learning_rate": 3.634128810470807e-05, + "loss": 107.9621, + "step": 69010 + }, + { + "epoch": 0.27884953356739134, + "grad_norm": 1457.251220703125, + "learning_rate": 3.633967792413174e-05, + "loss": 98.9324, + "step": 69020 + }, + { + "epoch": 0.278889934832759, + "grad_norm": 544.1817016601562, + "learning_rate": 3.6338067425004975e-05, + "loss": 116.4078, + "step": 69030 + }, + { + "epoch": 0.2789303360981266, + "grad_norm": 560.96533203125, + "learning_rate": 3.633645660735918e-05, + "loss": 109.199, + "step": 69040 + }, + { + "epoch": 0.2789707373634942, + "grad_norm": 684.9548950195312, + "learning_rate": 3.633484547122575e-05, + "loss": 95.4643, + "step": 69050 + }, + { + "epoch": 0.27901113862886184, + "grad_norm": 297.54400634765625, + "learning_rate": 3.63332340166361e-05, + "loss": 80.901, + "step": 69060 + }, + { + "epoch": 0.2790515398942295, + "grad_norm": 490.6906433105469, + "learning_rate": 3.633162224362164e-05, + "loss": 50.0815, + "step": 69070 + }, + { + "epoch": 0.2790919411595971, + "grad_norm": 1397.1944580078125, + "learning_rate": 3.63300101522138e-05, + "loss": 157.9215, + "step": 69080 + }, + { + "epoch": 0.27913234242496476, + "grad_norm": 1231.57177734375, + "learning_rate": 3.6328397742444003e-05, + "loss": 76.6411, + "step": 69090 + }, + { + "epoch": 0.2791727436903324, + "grad_norm": 532.4107666015625, + "learning_rate": 3.632678501434368e-05, + "loss": 102.7966, + "step": 69100 + }, + { + "epoch": 0.2792131449557, + "grad_norm": 1495.0543212890625, + "learning_rate": 3.632517196794429e-05, + "loss": 208.6504, + "step": 69110 + }, + { + "epoch": 0.27925354622106763, + "grad_norm": 620.1536254882812, + "learning_rate": 3.632355860327725e-05, + "loss": 93.6962, + "step": 69120 + }, + { + "epoch": 0.27929394748643527, + "grad_norm": 246.7469482421875, + "learning_rate": 3.632194492037404e-05, + "loss": 92.9545, + "step": 69130 + }, + { + "epoch": 0.2793343487518029, + "grad_norm": 509.92431640625, + "learning_rate": 3.632033091926612e-05, + "loss": 81.3285, + "step": 69140 + }, + { + "epoch": 0.27937475001717055, + "grad_norm": 1113.5439453125, + "learning_rate": 3.631871659998494e-05, + "loss": 114.5649, + "step": 69150 + }, + { + "epoch": 0.2794151512825382, + "grad_norm": 1190.018310546875, + "learning_rate": 3.6317101962561974e-05, + "loss": 136.6734, + "step": 69160 + }, + { + "epoch": 0.27945555254790583, + "grad_norm": 705.5866088867188, + "learning_rate": 3.6315487007028706e-05, + "loss": 137.6185, + "step": 69170 + }, + { + "epoch": 0.2794959538132734, + "grad_norm": 714.1895141601562, + "learning_rate": 3.631387173341662e-05, + "loss": 124.9225, + "step": 69180 + }, + { + "epoch": 0.27953635507864105, + "grad_norm": 356.1354675292969, + "learning_rate": 3.631225614175721e-05, + "loss": 110.5913, + "step": 69190 + }, + { + "epoch": 0.2795767563440087, + "grad_norm": 775.5797119140625, + "learning_rate": 3.6310640232081966e-05, + "loss": 137.2104, + "step": 69200 + }, + { + "epoch": 0.27961715760937633, + "grad_norm": 617.612060546875, + "learning_rate": 3.630902400442239e-05, + "loss": 114.0922, + "step": 69210 + }, + { + "epoch": 0.27965755887474397, + "grad_norm": 966.131591796875, + "learning_rate": 3.630740745881e-05, + "loss": 100.8437, + "step": 69220 + }, + { + "epoch": 0.2796979601401116, + "grad_norm": 717.68017578125, + "learning_rate": 3.6305790595276303e-05, + "loss": 84.4165, + "step": 69230 + }, + { + "epoch": 0.2797383614054792, + "grad_norm": 3498.301513671875, + "learning_rate": 3.630417341385282e-05, + "loss": 97.8201, + "step": 69240 + }, + { + "epoch": 0.27977876267084684, + "grad_norm": 1601.896728515625, + "learning_rate": 3.630255591457108e-05, + "loss": 126.8111, + "step": 69250 + }, + { + "epoch": 0.2798191639362145, + "grad_norm": 1163.635986328125, + "learning_rate": 3.6300938097462634e-05, + "loss": 78.6188, + "step": 69260 + }, + { + "epoch": 0.2798595652015821, + "grad_norm": 826.2050170898438, + "learning_rate": 3.6299319962559e-05, + "loss": 109.3372, + "step": 69270 + }, + { + "epoch": 0.27989996646694976, + "grad_norm": 841.0287475585938, + "learning_rate": 3.629770150989173e-05, + "loss": 134.2689, + "step": 69280 + }, + { + "epoch": 0.2799403677323174, + "grad_norm": 709.0121459960938, + "learning_rate": 3.629608273949238e-05, + "loss": 98.9692, + "step": 69290 + }, + { + "epoch": 0.279980768997685, + "grad_norm": 801.518310546875, + "learning_rate": 3.62944636513925e-05, + "loss": 72.7152, + "step": 69300 + }, + { + "epoch": 0.2800211702630526, + "grad_norm": 765.3171997070312, + "learning_rate": 3.629284424562367e-05, + "loss": 113.7803, + "step": 69310 + }, + { + "epoch": 0.28006157152842026, + "grad_norm": 519.9540405273438, + "learning_rate": 3.629122452221746e-05, + "loss": 109.2167, + "step": 69320 + }, + { + "epoch": 0.2801019727937879, + "grad_norm": 387.75384521484375, + "learning_rate": 3.6289604481205424e-05, + "loss": 93.6366, + "step": 69330 + }, + { + "epoch": 0.28014237405915554, + "grad_norm": 692.5172119140625, + "learning_rate": 3.628798412261917e-05, + "loss": 79.3517, + "step": 69340 + }, + { + "epoch": 0.2801827753245232, + "grad_norm": 665.2496948242188, + "learning_rate": 3.628636344649028e-05, + "loss": 141.6948, + "step": 69350 + }, + { + "epoch": 0.2802231765898908, + "grad_norm": 868.7316284179688, + "learning_rate": 3.628474245285035e-05, + "loss": 80.3007, + "step": 69360 + }, + { + "epoch": 0.2802635778552584, + "grad_norm": 760.5103149414062, + "learning_rate": 3.628312114173098e-05, + "loss": 74.4238, + "step": 69370 + }, + { + "epoch": 0.28030397912062605, + "grad_norm": 645.2118530273438, + "learning_rate": 3.628149951316378e-05, + "loss": 75.6009, + "step": 69380 + }, + { + "epoch": 0.2803443803859937, + "grad_norm": 571.076171875, + "learning_rate": 3.627987756718037e-05, + "loss": 74.0658, + "step": 69390 + }, + { + "epoch": 0.2803847816513613, + "grad_norm": 860.3325805664062, + "learning_rate": 3.6278255303812366e-05, + "loss": 113.1183, + "step": 69400 + }, + { + "epoch": 0.28042518291672897, + "grad_norm": 581.766357421875, + "learning_rate": 3.627663272309139e-05, + "loss": 95.2437, + "step": 69410 + }, + { + "epoch": 0.2804655841820966, + "grad_norm": 1360.1229248046875, + "learning_rate": 3.627500982504908e-05, + "loss": 104.8845, + "step": 69420 + }, + { + "epoch": 0.2805059854474642, + "grad_norm": 817.2112426757812, + "learning_rate": 3.6273386609717076e-05, + "loss": 119.4876, + "step": 69430 + }, + { + "epoch": 0.28054638671283183, + "grad_norm": 930.3947143554688, + "learning_rate": 3.627176307712702e-05, + "loss": 74.0993, + "step": 69440 + }, + { + "epoch": 0.28058678797819947, + "grad_norm": 1179.5645751953125, + "learning_rate": 3.627013922731056e-05, + "loss": 101.3285, + "step": 69450 + }, + { + "epoch": 0.2806271892435671, + "grad_norm": 1648.2120361328125, + "learning_rate": 3.626851506029937e-05, + "loss": 74.7676, + "step": 69460 + }, + { + "epoch": 0.28066759050893475, + "grad_norm": 850.4752197265625, + "learning_rate": 3.6266890576125095e-05, + "loss": 92.7098, + "step": 69470 + }, + { + "epoch": 0.2807079917743024, + "grad_norm": 1422.18310546875, + "learning_rate": 3.6265265774819415e-05, + "loss": 107.7697, + "step": 69480 + }, + { + "epoch": 0.28074839303967003, + "grad_norm": 528.3123779296875, + "learning_rate": 3.626364065641401e-05, + "loss": 62.648, + "step": 69490 + }, + { + "epoch": 0.2807887943050376, + "grad_norm": 717.7213134765625, + "learning_rate": 3.6262015220940556e-05, + "loss": 150.9524, + "step": 69500 + }, + { + "epoch": 0.28082919557040525, + "grad_norm": 646.385986328125, + "learning_rate": 3.626038946843074e-05, + "loss": 84.6306, + "step": 69510 + }, + { + "epoch": 0.2808695968357729, + "grad_norm": 965.535400390625, + "learning_rate": 3.625876339891626e-05, + "loss": 126.8577, + "step": 69520 + }, + { + "epoch": 0.28090999810114053, + "grad_norm": 712.9901733398438, + "learning_rate": 3.625713701242882e-05, + "loss": 94.5043, + "step": 69530 + }, + { + "epoch": 0.2809503993665082, + "grad_norm": 972.0432739257812, + "learning_rate": 3.6255510309000126e-05, + "loss": 103.6998, + "step": 69540 + }, + { + "epoch": 0.2809908006318758, + "grad_norm": 631.82763671875, + "learning_rate": 3.625388328866188e-05, + "loss": 96.1898, + "step": 69550 + }, + { + "epoch": 0.2810312018972434, + "grad_norm": 268.8305358886719, + "learning_rate": 3.625225595144582e-05, + "loss": 102.8197, + "step": 69560 + }, + { + "epoch": 0.28107160316261104, + "grad_norm": 1720.981201171875, + "learning_rate": 3.6250628297383664e-05, + "loss": 92.016, + "step": 69570 + }, + { + "epoch": 0.2811120044279787, + "grad_norm": 736.8766479492188, + "learning_rate": 3.624900032650714e-05, + "loss": 104.1043, + "step": 69580 + }, + { + "epoch": 0.2811524056933463, + "grad_norm": 670.6270141601562, + "learning_rate": 3.624737203884798e-05, + "loss": 73.123, + "step": 69590 + }, + { + "epoch": 0.28119280695871396, + "grad_norm": 2091.0, + "learning_rate": 3.624574343443794e-05, + "loss": 153.4513, + "step": 69600 + }, + { + "epoch": 0.2812332082240816, + "grad_norm": 1131.5577392578125, + "learning_rate": 3.624411451330878e-05, + "loss": 88.3719, + "step": 69610 + }, + { + "epoch": 0.2812736094894492, + "grad_norm": 689.6427612304688, + "learning_rate": 3.624248527549224e-05, + "loss": 73.8437, + "step": 69620 + }, + { + "epoch": 0.2813140107548168, + "grad_norm": 767.3834838867188, + "learning_rate": 3.624085572102009e-05, + "loss": 88.0444, + "step": 69630 + }, + { + "epoch": 0.28135441202018446, + "grad_norm": 1033.8070068359375, + "learning_rate": 3.6239225849924086e-05, + "loss": 93.902, + "step": 69640 + }, + { + "epoch": 0.2813948132855521, + "grad_norm": 625.5725708007812, + "learning_rate": 3.623759566223602e-05, + "loss": 102.3185, + "step": 69650 + }, + { + "epoch": 0.28143521455091974, + "grad_norm": 965.9061889648438, + "learning_rate": 3.6235965157987665e-05, + "loss": 94.2732, + "step": 69660 + }, + { + "epoch": 0.2814756158162874, + "grad_norm": 527.814453125, + "learning_rate": 3.623433433721081e-05, + "loss": 171.9797, + "step": 69670 + }, + { + "epoch": 0.281516017081655, + "grad_norm": 422.3968505859375, + "learning_rate": 3.6232703199937246e-05, + "loss": 45.7727, + "step": 69680 + }, + { + "epoch": 0.2815564183470226, + "grad_norm": 608.8853759765625, + "learning_rate": 3.6231071746198784e-05, + "loss": 121.9524, + "step": 69690 + }, + { + "epoch": 0.28159681961239025, + "grad_norm": 1178.0777587890625, + "learning_rate": 3.6229439976027225e-05, + "loss": 84.3587, + "step": 69700 + }, + { + "epoch": 0.2816372208777579, + "grad_norm": 619.7548217773438, + "learning_rate": 3.622780788945437e-05, + "loss": 98.0369, + "step": 69710 + }, + { + "epoch": 0.28167762214312553, + "grad_norm": 416.82666015625, + "learning_rate": 3.622617548651205e-05, + "loss": 112.3105, + "step": 69720 + }, + { + "epoch": 0.28171802340849317, + "grad_norm": 661.3047485351562, + "learning_rate": 3.6224542767232076e-05, + "loss": 98.0698, + "step": 69730 + }, + { + "epoch": 0.2817584246738608, + "grad_norm": 661.6165771484375, + "learning_rate": 3.62229097316463e-05, + "loss": 131.1663, + "step": 69740 + }, + { + "epoch": 0.2817988259392284, + "grad_norm": 611.2937622070312, + "learning_rate": 3.6221276379786534e-05, + "loss": 115.536, + "step": 69750 + }, + { + "epoch": 0.28183922720459603, + "grad_norm": 525.2077026367188, + "learning_rate": 3.621964271168464e-05, + "loss": 117.4507, + "step": 69760 + }, + { + "epoch": 0.2818796284699637, + "grad_norm": 629.9892578125, + "learning_rate": 3.621800872737247e-05, + "loss": 80.003, + "step": 69770 + }, + { + "epoch": 0.2819200297353313, + "grad_norm": 1072.6875, + "learning_rate": 3.6216374426881854e-05, + "loss": 92.1317, + "step": 69780 + }, + { + "epoch": 0.28196043100069895, + "grad_norm": 1558.95703125, + "learning_rate": 3.6214739810244684e-05, + "loss": 82.9404, + "step": 69790 + }, + { + "epoch": 0.2820008322660666, + "grad_norm": 593.906005859375, + "learning_rate": 3.62131048774928e-05, + "loss": 75.2771, + "step": 69800 + }, + { + "epoch": 0.28204123353143423, + "grad_norm": 1023.5885009765625, + "learning_rate": 3.62114696286581e-05, + "loss": 139.1355, + "step": 69810 + }, + { + "epoch": 0.2820816347968018, + "grad_norm": 502.4533996582031, + "learning_rate": 3.6209834063772446e-05, + "loss": 52.1599, + "step": 69820 + }, + { + "epoch": 0.28212203606216946, + "grad_norm": 715.0071411132812, + "learning_rate": 3.6208198182867737e-05, + "loss": 83.2321, + "step": 69830 + }, + { + "epoch": 0.2821624373275371, + "grad_norm": 1168.307861328125, + "learning_rate": 3.620656198597586e-05, + "loss": 141.5054, + "step": 69840 + }, + { + "epoch": 0.28220283859290474, + "grad_norm": 725.0950927734375, + "learning_rate": 3.620492547312871e-05, + "loss": 91.4042, + "step": 69850 + }, + { + "epoch": 0.2822432398582724, + "grad_norm": 1058.2650146484375, + "learning_rate": 3.62032886443582e-05, + "loss": 82.0203, + "step": 69860 + }, + { + "epoch": 0.28228364112364, + "grad_norm": 1012.8541870117188, + "learning_rate": 3.620165149969623e-05, + "loss": 127.6806, + "step": 69870 + }, + { + "epoch": 0.2823240423890076, + "grad_norm": 526.880615234375, + "learning_rate": 3.620001403917472e-05, + "loss": 128.5968, + "step": 69880 + }, + { + "epoch": 0.28236444365437524, + "grad_norm": 781.0885009765625, + "learning_rate": 3.6198376262825606e-05, + "loss": 108.9989, + "step": 69890 + }, + { + "epoch": 0.2824048449197429, + "grad_norm": 811.70166015625, + "learning_rate": 3.61967381706808e-05, + "loss": 94.4925, + "step": 69900 + }, + { + "epoch": 0.2824452461851105, + "grad_norm": 671.2783813476562, + "learning_rate": 3.619509976277225e-05, + "loss": 128.3875, + "step": 69910 + }, + { + "epoch": 0.28248564745047816, + "grad_norm": 892.7888793945312, + "learning_rate": 3.619346103913189e-05, + "loss": 73.343, + "step": 69920 + }, + { + "epoch": 0.2825260487158458, + "grad_norm": 749.9616088867188, + "learning_rate": 3.6191821999791665e-05, + "loss": 67.7699, + "step": 69930 + }, + { + "epoch": 0.2825664499812134, + "grad_norm": 964.9902954101562, + "learning_rate": 3.619018264478354e-05, + "loss": 88.9362, + "step": 69940 + }, + { + "epoch": 0.282606851246581, + "grad_norm": 996.6857299804688, + "learning_rate": 3.6188542974139466e-05, + "loss": 83.9178, + "step": 69950 + }, + { + "epoch": 0.28264725251194867, + "grad_norm": 1171.570068359375, + "learning_rate": 3.618690298789142e-05, + "loss": 77.3296, + "step": 69960 + }, + { + "epoch": 0.2826876537773163, + "grad_norm": 866.6726684570312, + "learning_rate": 3.618526268607136e-05, + "loss": 134.7804, + "step": 69970 + }, + { + "epoch": 0.28272805504268395, + "grad_norm": 917.5099487304688, + "learning_rate": 3.618362206871127e-05, + "loss": 104.7554, + "step": 69980 + }, + { + "epoch": 0.2827684563080516, + "grad_norm": 628.8348999023438, + "learning_rate": 3.6181981135843134e-05, + "loss": 58.5779, + "step": 69990 + }, + { + "epoch": 0.2828088575734192, + "grad_norm": 1059.137939453125, + "learning_rate": 3.6180339887498953e-05, + "loss": 84.9169, + "step": 70000 + }, + { + "epoch": 0.2828492588387868, + "grad_norm": 467.4002380371094, + "learning_rate": 3.617869832371071e-05, + "loss": 110.4209, + "step": 70010 + }, + { + "epoch": 0.28288966010415445, + "grad_norm": 1080.4908447265625, + "learning_rate": 3.617705644451041e-05, + "loss": 72.2029, + "step": 70020 + }, + { + "epoch": 0.2829300613695221, + "grad_norm": 1294.772705078125, + "learning_rate": 3.617541424993007e-05, + "loss": 80.3091, + "step": 70030 + }, + { + "epoch": 0.28297046263488973, + "grad_norm": 513.294677734375, + "learning_rate": 3.617377174000171e-05, + "loss": 102.3664, + "step": 70040 + }, + { + "epoch": 0.28301086390025737, + "grad_norm": 734.8472900390625, + "learning_rate": 3.617212891475733e-05, + "loss": 124.9945, + "step": 70050 + }, + { + "epoch": 0.283051265165625, + "grad_norm": 757.7701416015625, + "learning_rate": 3.6170485774228976e-05, + "loss": 89.9253, + "step": 70060 + }, + { + "epoch": 0.2830916664309926, + "grad_norm": 803.4826049804688, + "learning_rate": 3.6168842318448684e-05, + "loss": 162.9363, + "step": 70070 + }, + { + "epoch": 0.28313206769636023, + "grad_norm": 654.7969360351562, + "learning_rate": 3.6167198547448476e-05, + "loss": 83.4097, + "step": 70080 + }, + { + "epoch": 0.2831724689617279, + "grad_norm": 904.8287963867188, + "learning_rate": 3.616555446126041e-05, + "loss": 100.1149, + "step": 70090 + }, + { + "epoch": 0.2832128702270955, + "grad_norm": 676.6531982421875, + "learning_rate": 3.6163910059916544e-05, + "loss": 98.6859, + "step": 70100 + }, + { + "epoch": 0.28325327149246315, + "grad_norm": 931.3750610351562, + "learning_rate": 3.6162265343448924e-05, + "loss": 96.9981, + "step": 70110 + }, + { + "epoch": 0.2832936727578308, + "grad_norm": 722.9249267578125, + "learning_rate": 3.616062031188962e-05, + "loss": 76.5997, + "step": 70120 + }, + { + "epoch": 0.2833340740231984, + "grad_norm": 1128.12158203125, + "learning_rate": 3.615897496527071e-05, + "loss": 107.047, + "step": 70130 + }, + { + "epoch": 0.283374475288566, + "grad_norm": 1091.1148681640625, + "learning_rate": 3.615732930362426e-05, + "loss": 104.396, + "step": 70140 + }, + { + "epoch": 0.28341487655393366, + "grad_norm": 826.0271606445312, + "learning_rate": 3.615568332698235e-05, + "loss": 111.4977, + "step": 70150 + }, + { + "epoch": 0.2834552778193013, + "grad_norm": 833.5360107421875, + "learning_rate": 3.6154037035377084e-05, + "loss": 91.9186, + "step": 70160 + }, + { + "epoch": 0.28349567908466894, + "grad_norm": 988.9189453125, + "learning_rate": 3.6152390428840546e-05, + "loss": 74.2944, + "step": 70170 + }, + { + "epoch": 0.2835360803500366, + "grad_norm": 1157.9456787109375, + "learning_rate": 3.6150743507404845e-05, + "loss": 94.0837, + "step": 70180 + }, + { + "epoch": 0.2835764816154042, + "grad_norm": 792.9242553710938, + "learning_rate": 3.614909627110208e-05, + "loss": 101.4369, + "step": 70190 + }, + { + "epoch": 0.2836168828807718, + "grad_norm": 330.88775634765625, + "learning_rate": 3.614744871996437e-05, + "loss": 58.6672, + "step": 70200 + }, + { + "epoch": 0.28365728414613944, + "grad_norm": 1010.1126708984375, + "learning_rate": 3.614580085402383e-05, + "loss": 83.507, + "step": 70210 + }, + { + "epoch": 0.2836976854115071, + "grad_norm": 1005.658203125, + "learning_rate": 3.6144152673312596e-05, + "loss": 79.9923, + "step": 70220 + }, + { + "epoch": 0.2837380866768747, + "grad_norm": 820.4414672851562, + "learning_rate": 3.6142504177862796e-05, + "loss": 144.488, + "step": 70230 + }, + { + "epoch": 0.28377848794224236, + "grad_norm": 558.9141845703125, + "learning_rate": 3.614085536770656e-05, + "loss": 73.1564, + "step": 70240 + }, + { + "epoch": 0.28381888920761, + "grad_norm": 842.5510864257812, + "learning_rate": 3.613920624287604e-05, + "loss": 81.4339, + "step": 70250 + }, + { + "epoch": 0.2838592904729776, + "grad_norm": 356.71087646484375, + "learning_rate": 3.613755680340339e-05, + "loss": 96.5085, + "step": 70260 + }, + { + "epoch": 0.28389969173834523, + "grad_norm": 801.2742919921875, + "learning_rate": 3.6135907049320757e-05, + "loss": 83.6973, + "step": 70270 + }, + { + "epoch": 0.28394009300371287, + "grad_norm": 220.39837646484375, + "learning_rate": 3.6134256980660306e-05, + "loss": 75.7027, + "step": 70280 + }, + { + "epoch": 0.2839804942690805, + "grad_norm": 560.4425659179688, + "learning_rate": 3.613260659745422e-05, + "loss": 114.1608, + "step": 70290 + }, + { + "epoch": 0.28402089553444815, + "grad_norm": 2176.75048828125, + "learning_rate": 3.613095589973465e-05, + "loss": 93.5828, + "step": 70300 + }, + { + "epoch": 0.2840612967998158, + "grad_norm": 842.7932739257812, + "learning_rate": 3.61293048875338e-05, + "loss": 93.4694, + "step": 70310 + }, + { + "epoch": 0.28410169806518343, + "grad_norm": 691.0603637695312, + "learning_rate": 3.612765356088384e-05, + "loss": 97.4522, + "step": 70320 + }, + { + "epoch": 0.284142099330551, + "grad_norm": 616.7498168945312, + "learning_rate": 3.612600191981697e-05, + "loss": 78.2529, + "step": 70330 + }, + { + "epoch": 0.28418250059591865, + "grad_norm": 416.3482666015625, + "learning_rate": 3.61243499643654e-05, + "loss": 127.9566, + "step": 70340 + }, + { + "epoch": 0.2842229018612863, + "grad_norm": 1326.6612548828125, + "learning_rate": 3.612269769456132e-05, + "loss": 101.7842, + "step": 70350 + }, + { + "epoch": 0.28426330312665393, + "grad_norm": 1144.6046142578125, + "learning_rate": 3.612104511043694e-05, + "loss": 72.9083, + "step": 70360 + }, + { + "epoch": 0.2843037043920216, + "grad_norm": 1168.9571533203125, + "learning_rate": 3.61193922120245e-05, + "loss": 153.6341, + "step": 70370 + }, + { + "epoch": 0.2843441056573892, + "grad_norm": 635.528076171875, + "learning_rate": 3.61177389993562e-05, + "loss": 67.4963, + "step": 70380 + }, + { + "epoch": 0.2843845069227568, + "grad_norm": 575.98486328125, + "learning_rate": 3.611608547246429e-05, + "loss": 117.8362, + "step": 70390 + }, + { + "epoch": 0.28442490818812444, + "grad_norm": 944.4436645507812, + "learning_rate": 3.611443163138099e-05, + "loss": 86.4013, + "step": 70400 + }, + { + "epoch": 0.2844653094534921, + "grad_norm": 423.26251220703125, + "learning_rate": 3.611277747613855e-05, + "loss": 70.9923, + "step": 70410 + }, + { + "epoch": 0.2845057107188597, + "grad_norm": 1588.222412109375, + "learning_rate": 3.611112300676921e-05, + "loss": 65.1676, + "step": 70420 + }, + { + "epoch": 0.28454611198422736, + "grad_norm": 332.4988098144531, + "learning_rate": 3.6109468223305244e-05, + "loss": 88.9684, + "step": 70430 + }, + { + "epoch": 0.284586513249595, + "grad_norm": 387.6188659667969, + "learning_rate": 3.61078131257789e-05, + "loss": 82.5718, + "step": 70440 + }, + { + "epoch": 0.2846269145149626, + "grad_norm": 981.771728515625, + "learning_rate": 3.6106157714222436e-05, + "loss": 81.3803, + "step": 70450 + }, + { + "epoch": 0.2846673157803302, + "grad_norm": 632.2242431640625, + "learning_rate": 3.610450198866815e-05, + "loss": 134.6074, + "step": 70460 + }, + { + "epoch": 0.28470771704569786, + "grad_norm": 2679.478271484375, + "learning_rate": 3.61028459491483e-05, + "loss": 126.9963, + "step": 70470 + }, + { + "epoch": 0.2847481183110655, + "grad_norm": 837.6699829101562, + "learning_rate": 3.6101189595695174e-05, + "loss": 70.1699, + "step": 70480 + }, + { + "epoch": 0.28478851957643314, + "grad_norm": 527.2325439453125, + "learning_rate": 3.609953292834107e-05, + "loss": 116.548, + "step": 70490 + }, + { + "epoch": 0.2848289208418008, + "grad_norm": 435.2842712402344, + "learning_rate": 3.609787594711828e-05, + "loss": 85.2434, + "step": 70500 + }, + { + "epoch": 0.2848693221071684, + "grad_norm": 466.9181213378906, + "learning_rate": 3.609621865205912e-05, + "loss": 91.738, + "step": 70510 + }, + { + "epoch": 0.284909723372536, + "grad_norm": 823.5928955078125, + "learning_rate": 3.609456104319589e-05, + "loss": 108.8546, + "step": 70520 + }, + { + "epoch": 0.28495012463790365, + "grad_norm": 417.4728698730469, + "learning_rate": 3.609290312056089e-05, + "loss": 104.4555, + "step": 70530 + }, + { + "epoch": 0.2849905259032713, + "grad_norm": 1021.3464965820312, + "learning_rate": 3.609124488418647e-05, + "loss": 124.5877, + "step": 70540 + }, + { + "epoch": 0.2850309271686389, + "grad_norm": 1041.1820068359375, + "learning_rate": 3.608958633410495e-05, + "loss": 133.8493, + "step": 70550 + }, + { + "epoch": 0.28507132843400657, + "grad_norm": 968.9937133789062, + "learning_rate": 3.608792747034866e-05, + "loss": 98.2077, + "step": 70560 + }, + { + "epoch": 0.2851117296993742, + "grad_norm": 952.7925415039062, + "learning_rate": 3.608626829294994e-05, + "loss": 80.4434, + "step": 70570 + }, + { + "epoch": 0.2851521309647418, + "grad_norm": 791.5453491210938, + "learning_rate": 3.608460880194113e-05, + "loss": 78.5657, + "step": 70580 + }, + { + "epoch": 0.28519253223010943, + "grad_norm": 589.7490234375, + "learning_rate": 3.60829489973546e-05, + "loss": 79.2221, + "step": 70590 + }, + { + "epoch": 0.28523293349547707, + "grad_norm": 1679.1435546875, + "learning_rate": 3.6081288879222696e-05, + "loss": 86.2412, + "step": 70600 + }, + { + "epoch": 0.2852733347608447, + "grad_norm": 728.5447998046875, + "learning_rate": 3.607962844757779e-05, + "loss": 106.4841, + "step": 70610 + }, + { + "epoch": 0.28531373602621235, + "grad_norm": 609.9203491210938, + "learning_rate": 3.607796770245224e-05, + "loss": 116.7143, + "step": 70620 + }, + { + "epoch": 0.28535413729158, + "grad_norm": 2138.461181640625, + "learning_rate": 3.607630664387844e-05, + "loss": 89.447, + "step": 70630 + }, + { + "epoch": 0.28539453855694763, + "grad_norm": 839.8328247070312, + "learning_rate": 3.6074645271888765e-05, + "loss": 87.513, + "step": 70640 + }, + { + "epoch": 0.2854349398223152, + "grad_norm": 708.2579956054688, + "learning_rate": 3.60729835865156e-05, + "loss": 121.2733, + "step": 70650 + }, + { + "epoch": 0.28547534108768285, + "grad_norm": 706.3178100585938, + "learning_rate": 3.607132158779135e-05, + "loss": 74.4141, + "step": 70660 + }, + { + "epoch": 0.2855157423530505, + "grad_norm": 443.3250732421875, + "learning_rate": 3.6069659275748404e-05, + "loss": 119.7384, + "step": 70670 + }, + { + "epoch": 0.28555614361841813, + "grad_norm": 792.5812377929688, + "learning_rate": 3.606799665041918e-05, + "loss": 77.2126, + "step": 70680 + }, + { + "epoch": 0.2855965448837858, + "grad_norm": 746.7576293945312, + "learning_rate": 3.60663337118361e-05, + "loss": 114.6337, + "step": 70690 + }, + { + "epoch": 0.2856369461491534, + "grad_norm": 722.7032470703125, + "learning_rate": 3.606467046003156e-05, + "loss": 72.5086, + "step": 70700 + }, + { + "epoch": 0.285677347414521, + "grad_norm": 1377.533203125, + "learning_rate": 3.6063006895038004e-05, + "loss": 93.3579, + "step": 70710 + }, + { + "epoch": 0.28571774867988864, + "grad_norm": 493.3031921386719, + "learning_rate": 3.6061343016887856e-05, + "loss": 92.6154, + "step": 70720 + }, + { + "epoch": 0.2857581499452563, + "grad_norm": 411.08941650390625, + "learning_rate": 3.605967882561356e-05, + "loss": 69.1374, + "step": 70730 + }, + { + "epoch": 0.2857985512106239, + "grad_norm": 572.9785766601562, + "learning_rate": 3.6058014321247556e-05, + "loss": 83.447, + "step": 70740 + }, + { + "epoch": 0.28583895247599156, + "grad_norm": 532.602294921875, + "learning_rate": 3.6056349503822295e-05, + "loss": 87.3144, + "step": 70750 + }, + { + "epoch": 0.2858793537413592, + "grad_norm": 2543.312744140625, + "learning_rate": 3.605468437337023e-05, + "loss": 122.8214, + "step": 70760 + }, + { + "epoch": 0.2859197550067268, + "grad_norm": 1000.5532836914062, + "learning_rate": 3.605301892992383e-05, + "loss": 103.5003, + "step": 70770 + }, + { + "epoch": 0.2859601562720944, + "grad_norm": 545.7823486328125, + "learning_rate": 3.605135317351556e-05, + "loss": 185.1342, + "step": 70780 + }, + { + "epoch": 0.28600055753746206, + "grad_norm": 658.3118286132812, + "learning_rate": 3.604968710417791e-05, + "loss": 137.316, + "step": 70790 + }, + { + "epoch": 0.2860409588028297, + "grad_norm": 518.5614624023438, + "learning_rate": 3.604802072194334e-05, + "loss": 101.5955, + "step": 70800 + }, + { + "epoch": 0.28608136006819734, + "grad_norm": 580.1831665039062, + "learning_rate": 3.604635402684434e-05, + "loss": 111.1494, + "step": 70810 + }, + { + "epoch": 0.286121761333565, + "grad_norm": 1517.73681640625, + "learning_rate": 3.60446870189134e-05, + "loss": 104.7506, + "step": 70820 + }, + { + "epoch": 0.2861621625989326, + "grad_norm": 670.51513671875, + "learning_rate": 3.604301969818304e-05, + "loss": 84.8733, + "step": 70830 + }, + { + "epoch": 0.2862025638643002, + "grad_norm": 712.9550170898438, + "learning_rate": 3.604135206468574e-05, + "loss": 67.738, + "step": 70840 + }, + { + "epoch": 0.28624296512966785, + "grad_norm": 912.24658203125, + "learning_rate": 3.603968411845402e-05, + "loss": 83.4703, + "step": 70850 + }, + { + "epoch": 0.2862833663950355, + "grad_norm": 1047.503173828125, + "learning_rate": 3.603801585952042e-05, + "loss": 114.4944, + "step": 70860 + }, + { + "epoch": 0.28632376766040313, + "grad_norm": 657.581298828125, + "learning_rate": 3.603634728791743e-05, + "loss": 96.2493, + "step": 70870 + }, + { + "epoch": 0.28636416892577077, + "grad_norm": 399.7559509277344, + "learning_rate": 3.6034678403677595e-05, + "loss": 62.6405, + "step": 70880 + }, + { + "epoch": 0.2864045701911384, + "grad_norm": 588.5263061523438, + "learning_rate": 3.603300920683345e-05, + "loss": 73.9569, + "step": 70890 + }, + { + "epoch": 0.286444971456506, + "grad_norm": 867.572998046875, + "learning_rate": 3.6031339697417535e-05, + "loss": 115.8036, + "step": 70900 + }, + { + "epoch": 0.28648537272187363, + "grad_norm": 1761.2406005859375, + "learning_rate": 3.60296698754624e-05, + "loss": 74.6702, + "step": 70910 + }, + { + "epoch": 0.2865257739872413, + "grad_norm": 914.2118530273438, + "learning_rate": 3.6027999741000596e-05, + "loss": 84.3046, + "step": 70920 + }, + { + "epoch": 0.2865661752526089, + "grad_norm": 1042.0968017578125, + "learning_rate": 3.602632929406469e-05, + "loss": 86.3715, + "step": 70930 + }, + { + "epoch": 0.28660657651797655, + "grad_norm": 488.1643981933594, + "learning_rate": 3.602465853468724e-05, + "loss": 112.5397, + "step": 70940 + }, + { + "epoch": 0.2866469777833442, + "grad_norm": 1167.1517333984375, + "learning_rate": 3.6022987462900824e-05, + "loss": 149.0563, + "step": 70950 + }, + { + "epoch": 0.28668737904871183, + "grad_norm": 530.4674072265625, + "learning_rate": 3.602131607873801e-05, + "loss": 126.5089, + "step": 70960 + }, + { + "epoch": 0.2867277803140794, + "grad_norm": 1380.521240234375, + "learning_rate": 3.601964438223141e-05, + "loss": 97.3757, + "step": 70970 + }, + { + "epoch": 0.28676818157944706, + "grad_norm": 738.44873046875, + "learning_rate": 3.601797237341358e-05, + "loss": 134.7578, + "step": 70980 + }, + { + "epoch": 0.2868085828448147, + "grad_norm": 853.2123413085938, + "learning_rate": 3.6016300052317135e-05, + "loss": 91.1347, + "step": 70990 + }, + { + "epoch": 0.28684898411018234, + "grad_norm": 1180.442138671875, + "learning_rate": 3.601462741897467e-05, + "loss": 77.8991, + "step": 71000 + }, + { + "epoch": 0.28688938537555, + "grad_norm": 582.2689819335938, + "learning_rate": 3.6012954473418806e-05, + "loss": 98.8282, + "step": 71010 + }, + { + "epoch": 0.2869297866409176, + "grad_norm": 636.5897216796875, + "learning_rate": 3.601128121568215e-05, + "loss": 90.8609, + "step": 71020 + }, + { + "epoch": 0.2869701879062852, + "grad_norm": 587.1292114257812, + "learning_rate": 3.600960764579732e-05, + "loss": 76.9735, + "step": 71030 + }, + { + "epoch": 0.28701058917165284, + "grad_norm": 661.5059814453125, + "learning_rate": 3.6007933763796946e-05, + "loss": 90.5676, + "step": 71040 + }, + { + "epoch": 0.2870509904370205, + "grad_norm": 823.51806640625, + "learning_rate": 3.6006259569713665e-05, + "loss": 88.8703, + "step": 71050 + }, + { + "epoch": 0.2870913917023881, + "grad_norm": 1008.6951293945312, + "learning_rate": 3.600458506358011e-05, + "loss": 82.2968, + "step": 71060 + }, + { + "epoch": 0.28713179296775576, + "grad_norm": 733.68212890625, + "learning_rate": 3.6002910245428936e-05, + "loss": 64.8268, + "step": 71070 + }, + { + "epoch": 0.2871721942331234, + "grad_norm": 1319.3992919921875, + "learning_rate": 3.6001235115292784e-05, + "loss": 81.923, + "step": 71080 + }, + { + "epoch": 0.287212595498491, + "grad_norm": 586.9310302734375, + "learning_rate": 3.599955967320432e-05, + "loss": 106.8895, + "step": 71090 + }, + { + "epoch": 0.2872529967638586, + "grad_norm": 1251.1234130859375, + "learning_rate": 3.5997883919196193e-05, + "loss": 97.0781, + "step": 71100 + }, + { + "epoch": 0.28729339802922627, + "grad_norm": 478.7159118652344, + "learning_rate": 3.5996207853301084e-05, + "loss": 78.27, + "step": 71110 + }, + { + "epoch": 0.2873337992945939, + "grad_norm": 511.67279052734375, + "learning_rate": 3.599453147555167e-05, + "loss": 59.1904, + "step": 71120 + }, + { + "epoch": 0.28737420055996155, + "grad_norm": 683.7440185546875, + "learning_rate": 3.5992854785980635e-05, + "loss": 136.8193, + "step": 71130 + }, + { + "epoch": 0.2874146018253292, + "grad_norm": 1316.229736328125, + "learning_rate": 3.599117778462066e-05, + "loss": 99.0468, + "step": 71140 + }, + { + "epoch": 0.2874550030906968, + "grad_norm": 1193.0850830078125, + "learning_rate": 3.598950047150444e-05, + "loss": 147.5542, + "step": 71150 + }, + { + "epoch": 0.2874954043560644, + "grad_norm": 683.37158203125, + "learning_rate": 3.598782284666467e-05, + "loss": 130.6934, + "step": 71160 + }, + { + "epoch": 0.28753580562143205, + "grad_norm": 576.25048828125, + "learning_rate": 3.5986144910134074e-05, + "loss": 57.211, + "step": 71170 + }, + { + "epoch": 0.2875762068867997, + "grad_norm": 594.0399169921875, + "learning_rate": 3.5984466661945346e-05, + "loss": 124.3705, + "step": 71180 + }, + { + "epoch": 0.28761660815216733, + "grad_norm": 1409.768798828125, + "learning_rate": 3.598278810213121e-05, + "loss": 131.5035, + "step": 71190 + }, + { + "epoch": 0.28765700941753497, + "grad_norm": 785.3926391601562, + "learning_rate": 3.59811092307244e-05, + "loss": 87.5524, + "step": 71200 + }, + { + "epoch": 0.2876974106829026, + "grad_norm": 955.5843505859375, + "learning_rate": 3.5979430047757634e-05, + "loss": 78.6508, + "step": 71210 + }, + { + "epoch": 0.2877378119482702, + "grad_norm": 915.6661987304688, + "learning_rate": 3.597775055326365e-05, + "loss": 77.3629, + "step": 71220 + }, + { + "epoch": 0.28777821321363783, + "grad_norm": 724.9585571289062, + "learning_rate": 3.5976070747275194e-05, + "loss": 63.3482, + "step": 71230 + }, + { + "epoch": 0.2878186144790055, + "grad_norm": 382.6462097167969, + "learning_rate": 3.597439062982501e-05, + "loss": 99.4082, + "step": 71240 + }, + { + "epoch": 0.2878590157443731, + "grad_norm": 3038.90673828125, + "learning_rate": 3.597271020094586e-05, + "loss": 131.9073, + "step": 71250 + }, + { + "epoch": 0.28789941700974075, + "grad_norm": 236.25941467285156, + "learning_rate": 3.59710294606705e-05, + "loss": 101.0058, + "step": 71260 + }, + { + "epoch": 0.2879398182751084, + "grad_norm": 528.7738037109375, + "learning_rate": 3.59693484090317e-05, + "loss": 124.5235, + "step": 71270 + }, + { + "epoch": 0.28798021954047603, + "grad_norm": 716.123291015625, + "learning_rate": 3.596766704606223e-05, + "loss": 130.8756, + "step": 71280 + }, + { + "epoch": 0.2880206208058436, + "grad_norm": 915.3007202148438, + "learning_rate": 3.596598537179487e-05, + "loss": 99.2651, + "step": 71290 + }, + { + "epoch": 0.28806102207121126, + "grad_norm": 718.2686767578125, + "learning_rate": 3.596430338626241e-05, + "loss": 76.803, + "step": 71300 + }, + { + "epoch": 0.2881014233365789, + "grad_norm": 863.21484375, + "learning_rate": 3.5962621089497634e-05, + "loss": 80.6553, + "step": 71310 + }, + { + "epoch": 0.28814182460194654, + "grad_norm": 449.41900634765625, + "learning_rate": 3.596093848153334e-05, + "loss": 83.7661, + "step": 71320 + }, + { + "epoch": 0.2881822258673142, + "grad_norm": 820.5440063476562, + "learning_rate": 3.595925556240233e-05, + "loss": 99.024, + "step": 71330 + }, + { + "epoch": 0.2882226271326818, + "grad_norm": 487.0654602050781, + "learning_rate": 3.595757233213742e-05, + "loss": 77.9306, + "step": 71340 + }, + { + "epoch": 0.2882630283980494, + "grad_norm": 706.29833984375, + "learning_rate": 3.5955888790771426e-05, + "loss": 90.7535, + "step": 71350 + }, + { + "epoch": 0.28830342966341704, + "grad_norm": 670.2536010742188, + "learning_rate": 3.5954204938337156e-05, + "loss": 96.3765, + "step": 71360 + }, + { + "epoch": 0.2883438309287847, + "grad_norm": 543.1063232421875, + "learning_rate": 3.5952520774867454e-05, + "loss": 87.9723, + "step": 71370 + }, + { + "epoch": 0.2883842321941523, + "grad_norm": 769.443603515625, + "learning_rate": 3.5950836300395146e-05, + "loss": 74.5191, + "step": 71380 + }, + { + "epoch": 0.28842463345951996, + "grad_norm": 865.3870239257812, + "learning_rate": 3.5949151514953074e-05, + "loss": 80.2997, + "step": 71390 + }, + { + "epoch": 0.2884650347248876, + "grad_norm": 743.4219970703125, + "learning_rate": 3.594746641857407e-05, + "loss": 136.3789, + "step": 71400 + }, + { + "epoch": 0.2885054359902552, + "grad_norm": 934.5713500976562, + "learning_rate": 3.594578101129101e-05, + "loss": 113.3718, + "step": 71410 + }, + { + "epoch": 0.28854583725562283, + "grad_norm": 572.929443359375, + "learning_rate": 3.594409529313674e-05, + "loss": 84.5333, + "step": 71420 + }, + { + "epoch": 0.28858623852099047, + "grad_norm": 683.1107788085938, + "learning_rate": 3.594240926414412e-05, + "loss": 95.4069, + "step": 71430 + }, + { + "epoch": 0.2886266397863581, + "grad_norm": 971.50634765625, + "learning_rate": 3.594072292434602e-05, + "loss": 85.4527, + "step": 71440 + }, + { + "epoch": 0.28866704105172575, + "grad_norm": 1039.610107421875, + "learning_rate": 3.593903627377533e-05, + "loss": 105.982, + "step": 71450 + }, + { + "epoch": 0.2887074423170934, + "grad_norm": 845.2203369140625, + "learning_rate": 3.593734931246491e-05, + "loss": 66.9271, + "step": 71460 + }, + { + "epoch": 0.28874784358246103, + "grad_norm": 896.7979736328125, + "learning_rate": 3.593566204044767e-05, + "loss": 78.2973, + "step": 71470 + }, + { + "epoch": 0.2887882448478286, + "grad_norm": 858.2577514648438, + "learning_rate": 3.5933974457756494e-05, + "loss": 91.6034, + "step": 71480 + }, + { + "epoch": 0.28882864611319625, + "grad_norm": 602.2761840820312, + "learning_rate": 3.593228656442427e-05, + "loss": 96.3232, + "step": 71490 + }, + { + "epoch": 0.2888690473785639, + "grad_norm": 1461.43896484375, + "learning_rate": 3.5930598360483926e-05, + "loss": 79.5257, + "step": 71500 + }, + { + "epoch": 0.28890944864393153, + "grad_norm": 1086.226318359375, + "learning_rate": 3.592890984596837e-05, + "loss": 107.3945, + "step": 71510 + }, + { + "epoch": 0.2889498499092992, + "grad_norm": 900.5025634765625, + "learning_rate": 3.592722102091051e-05, + "loss": 97.1163, + "step": 71520 + }, + { + "epoch": 0.2889902511746668, + "grad_norm": 1118.611083984375, + "learning_rate": 3.592553188534328e-05, + "loss": 102.8479, + "step": 71530 + }, + { + "epoch": 0.2890306524400344, + "grad_norm": 694.652587890625, + "learning_rate": 3.59238424392996e-05, + "loss": 64.6989, + "step": 71540 + }, + { + "epoch": 0.28907105370540204, + "grad_norm": 726.5424194335938, + "learning_rate": 3.592215268281242e-05, + "loss": 72.9872, + "step": 71550 + }, + { + "epoch": 0.2891114549707697, + "grad_norm": 389.5179443359375, + "learning_rate": 3.592046261591467e-05, + "loss": 111.665, + "step": 71560 + }, + { + "epoch": 0.2891518562361373, + "grad_norm": 950.0123901367188, + "learning_rate": 3.5918772238639304e-05, + "loss": 97.2428, + "step": 71570 + }, + { + "epoch": 0.28919225750150496, + "grad_norm": 1688.843017578125, + "learning_rate": 3.591708155101928e-05, + "loss": 86.6062, + "step": 71580 + }, + { + "epoch": 0.2892326587668726, + "grad_norm": 905.09130859375, + "learning_rate": 3.591539055308756e-05, + "loss": 97.9923, + "step": 71590 + }, + { + "epoch": 0.28927306003224024, + "grad_norm": 678.3927001953125, + "learning_rate": 3.591369924487711e-05, + "loss": 108.918, + "step": 71600 + }, + { + "epoch": 0.2893134612976078, + "grad_norm": 655.4866943359375, + "learning_rate": 3.59120076264209e-05, + "loss": 49.8212, + "step": 71610 + }, + { + "epoch": 0.28935386256297546, + "grad_norm": 561.523193359375, + "learning_rate": 3.59103156977519e-05, + "loss": 108.0888, + "step": 71620 + }, + { + "epoch": 0.2893942638283431, + "grad_norm": 687.6011962890625, + "learning_rate": 3.590862345890311e-05, + "loss": 101.2732, + "step": 71630 + }, + { + "epoch": 0.28943466509371074, + "grad_norm": 1609.2037353515625, + "learning_rate": 3.590693090990752e-05, + "loss": 119.8612, + "step": 71640 + }, + { + "epoch": 0.2894750663590784, + "grad_norm": 255.64794921875, + "learning_rate": 3.590523805079812e-05, + "loss": 133.946, + "step": 71650 + }, + { + "epoch": 0.289515467624446, + "grad_norm": 952.5621337890625, + "learning_rate": 3.590354488160791e-05, + "loss": 101.8427, + "step": 71660 + }, + { + "epoch": 0.2895558688898136, + "grad_norm": 602.4816284179688, + "learning_rate": 3.5901851402369905e-05, + "loss": 96.3364, + "step": 71670 + }, + { + "epoch": 0.28959627015518125, + "grad_norm": 930.1558837890625, + "learning_rate": 3.590015761311713e-05, + "loss": 98.377, + "step": 71680 + }, + { + "epoch": 0.2896366714205489, + "grad_norm": 1040.9761962890625, + "learning_rate": 3.5898463513882584e-05, + "loss": 134.3193, + "step": 71690 + }, + { + "epoch": 0.2896770726859165, + "grad_norm": 433.5143737792969, + "learning_rate": 3.589676910469932e-05, + "loss": 84.4779, + "step": 71700 + }, + { + "epoch": 0.28971747395128417, + "grad_norm": 709.3893432617188, + "learning_rate": 3.589507438560034e-05, + "loss": 122.3503, + "step": 71710 + }, + { + "epoch": 0.2897578752166518, + "grad_norm": 1142.872802734375, + "learning_rate": 3.589337935661871e-05, + "loss": 138.8856, + "step": 71720 + }, + { + "epoch": 0.2897982764820194, + "grad_norm": 754.624267578125, + "learning_rate": 3.589168401778747e-05, + "loss": 115.4002, + "step": 71730 + }, + { + "epoch": 0.28983867774738703, + "grad_norm": 773.010009765625, + "learning_rate": 3.588998836913967e-05, + "loss": 113.2278, + "step": 71740 + }, + { + "epoch": 0.28987907901275467, + "grad_norm": 665.0406494140625, + "learning_rate": 3.5888292410708364e-05, + "loss": 89.3755, + "step": 71750 + }, + { + "epoch": 0.2899194802781223, + "grad_norm": 554.4705810546875, + "learning_rate": 3.588659614252662e-05, + "loss": 72.4111, + "step": 71760 + }, + { + "epoch": 0.28995988154348995, + "grad_norm": 720.2310180664062, + "learning_rate": 3.5884899564627504e-05, + "loss": 82.4981, + "step": 71770 + }, + { + "epoch": 0.2900002828088576, + "grad_norm": 891.1979370117188, + "learning_rate": 3.588320267704409e-05, + "loss": 111.6974, + "step": 71780 + }, + { + "epoch": 0.29004068407422523, + "grad_norm": 695.9783325195312, + "learning_rate": 3.588150547980946e-05, + "loss": 87.6014, + "step": 71790 + }, + { + "epoch": 0.2900810853395928, + "grad_norm": 656.4078369140625, + "learning_rate": 3.587980797295671e-05, + "loss": 82.386, + "step": 71800 + }, + { + "epoch": 0.29012148660496045, + "grad_norm": 1019.5216674804688, + "learning_rate": 3.587811015651893e-05, + "loss": 90.0584, + "step": 71810 + }, + { + "epoch": 0.2901618878703281, + "grad_norm": 777.8086547851562, + "learning_rate": 3.587641203052921e-05, + "loss": 94.4279, + "step": 71820 + }, + { + "epoch": 0.29020228913569573, + "grad_norm": 509.34429931640625, + "learning_rate": 3.587471359502066e-05, + "loss": 74.3182, + "step": 71830 + }, + { + "epoch": 0.2902426904010634, + "grad_norm": 881.0162353515625, + "learning_rate": 3.587301485002641e-05, + "loss": 81.9621, + "step": 71840 + }, + { + "epoch": 0.290283091666431, + "grad_norm": 1683.5653076171875, + "learning_rate": 3.587131579557956e-05, + "loss": 112.632, + "step": 71850 + }, + { + "epoch": 0.2903234929317986, + "grad_norm": 676.3965454101562, + "learning_rate": 3.5869616431713235e-05, + "loss": 78.6638, + "step": 71860 + }, + { + "epoch": 0.29036389419716624, + "grad_norm": 621.2343139648438, + "learning_rate": 3.586791675846057e-05, + "loss": 107.0322, + "step": 71870 + }, + { + "epoch": 0.2904042954625339, + "grad_norm": 334.3507995605469, + "learning_rate": 3.586621677585469e-05, + "loss": 85.8448, + "step": 71880 + }, + { + "epoch": 0.2904446967279015, + "grad_norm": 427.0581359863281, + "learning_rate": 3.586451648392875e-05, + "loss": 76.2641, + "step": 71890 + }, + { + "epoch": 0.29048509799326916, + "grad_norm": 551.7465209960938, + "learning_rate": 3.58628158827159e-05, + "loss": 92.0299, + "step": 71900 + }, + { + "epoch": 0.2905254992586368, + "grad_norm": 352.3697509765625, + "learning_rate": 3.586111497224928e-05, + "loss": 80.4535, + "step": 71910 + }, + { + "epoch": 0.29056590052400444, + "grad_norm": 760.9170532226562, + "learning_rate": 3.585941375256206e-05, + "loss": 102.3532, + "step": 71920 + }, + { + "epoch": 0.290606301789372, + "grad_norm": 936.9695434570312, + "learning_rate": 3.58577122236874e-05, + "loss": 86.1859, + "step": 71930 + }, + { + "epoch": 0.29064670305473966, + "grad_norm": 781.8998413085938, + "learning_rate": 3.585601038565848e-05, + "loss": 81.686, + "step": 71940 + }, + { + "epoch": 0.2906871043201073, + "grad_norm": 410.9126892089844, + "learning_rate": 3.585430823850847e-05, + "loss": 103.4618, + "step": 71950 + }, + { + "epoch": 0.29072750558547494, + "grad_norm": 783.36083984375, + "learning_rate": 3.5852605782270566e-05, + "loss": 93.8821, + "step": 71960 + }, + { + "epoch": 0.2907679068508426, + "grad_norm": 1107.1121826171875, + "learning_rate": 3.585090301697795e-05, + "loss": 122.7634, + "step": 71970 + }, + { + "epoch": 0.2908083081162102, + "grad_norm": 521.3311767578125, + "learning_rate": 3.584919994266382e-05, + "loss": 64.3297, + "step": 71980 + }, + { + "epoch": 0.2908487093815778, + "grad_norm": 1842.224365234375, + "learning_rate": 3.584749655936137e-05, + "loss": 134.6189, + "step": 71990 + }, + { + "epoch": 0.29088911064694545, + "grad_norm": 1089.5184326171875, + "learning_rate": 3.584579286710382e-05, + "loss": 122.7782, + "step": 72000 + }, + { + "epoch": 0.2909295119123131, + "grad_norm": 503.7202453613281, + "learning_rate": 3.5844088865924376e-05, + "loss": 100.2483, + "step": 72010 + }, + { + "epoch": 0.29096991317768073, + "grad_norm": 638.1285400390625, + "learning_rate": 3.584238455585626e-05, + "loss": 88.415, + "step": 72020 + }, + { + "epoch": 0.29101031444304837, + "grad_norm": 539.9647216796875, + "learning_rate": 3.5840679936932714e-05, + "loss": 65.0639, + "step": 72030 + }, + { + "epoch": 0.291050715708416, + "grad_norm": 664.2471923828125, + "learning_rate": 3.5838975009186945e-05, + "loss": 134.7047, + "step": 72040 + }, + { + "epoch": 0.2910911169737836, + "grad_norm": 913.1116333007812, + "learning_rate": 3.583726977265222e-05, + "loss": 112.7141, + "step": 72050 + }, + { + "epoch": 0.29113151823915123, + "grad_norm": 1499.9100341796875, + "learning_rate": 3.583556422736175e-05, + "loss": 153.8179, + "step": 72060 + }, + { + "epoch": 0.2911719195045189, + "grad_norm": 718.5802612304688, + "learning_rate": 3.583385837334881e-05, + "loss": 90.5473, + "step": 72070 + }, + { + "epoch": 0.2912123207698865, + "grad_norm": 576.2139892578125, + "learning_rate": 3.5832152210646646e-05, + "loss": 81.4915, + "step": 72080 + }, + { + "epoch": 0.29125272203525415, + "grad_norm": 1747.0372314453125, + "learning_rate": 3.583044573928853e-05, + "loss": 99.9698, + "step": 72090 + }, + { + "epoch": 0.2912931233006218, + "grad_norm": 1257.2374267578125, + "learning_rate": 3.5828738959307715e-05, + "loss": 86.8282, + "step": 72100 + }, + { + "epoch": 0.29133352456598943, + "grad_norm": 1134.6214599609375, + "learning_rate": 3.582703187073749e-05, + "loss": 86.8558, + "step": 72110 + }, + { + "epoch": 0.291373925831357, + "grad_norm": 446.9798278808594, + "learning_rate": 3.582532447361114e-05, + "loss": 87.0388, + "step": 72120 + }, + { + "epoch": 0.29141432709672466, + "grad_norm": 706.3759765625, + "learning_rate": 3.5823616767961924e-05, + "loss": 87.0564, + "step": 72130 + }, + { + "epoch": 0.2914547283620923, + "grad_norm": 481.1129455566406, + "learning_rate": 3.5821908753823163e-05, + "loss": 83.68, + "step": 72140 + }, + { + "epoch": 0.29149512962745994, + "grad_norm": 1094.1630859375, + "learning_rate": 3.582020043122814e-05, + "loss": 129.7535, + "step": 72150 + }, + { + "epoch": 0.2915355308928276, + "grad_norm": 1239.111328125, + "learning_rate": 3.581849180021017e-05, + "loss": 91.437, + "step": 72160 + }, + { + "epoch": 0.2915759321581952, + "grad_norm": 1191.783447265625, + "learning_rate": 3.581678286080256e-05, + "loss": 139.8153, + "step": 72170 + }, + { + "epoch": 0.2916163334235628, + "grad_norm": 348.2909851074219, + "learning_rate": 3.581507361303862e-05, + "loss": 67.6737, + "step": 72180 + }, + { + "epoch": 0.29165673468893044, + "grad_norm": 618.5779418945312, + "learning_rate": 3.5813364056951676e-05, + "loss": 121.2152, + "step": 72190 + }, + { + "epoch": 0.2916971359542981, + "grad_norm": 954.4894409179688, + "learning_rate": 3.5811654192575064e-05, + "loss": 73.6696, + "step": 72200 + }, + { + "epoch": 0.2917375372196657, + "grad_norm": 920.869873046875, + "learning_rate": 3.580994401994211e-05, + "loss": 113.7984, + "step": 72210 + }, + { + "epoch": 0.29177793848503336, + "grad_norm": 492.7052307128906, + "learning_rate": 3.5808233539086155e-05, + "loss": 75.1067, + "step": 72220 + }, + { + "epoch": 0.291818339750401, + "grad_norm": 864.5853881835938, + "learning_rate": 3.580652275004055e-05, + "loss": 78.7054, + "step": 72230 + }, + { + "epoch": 0.29185874101576864, + "grad_norm": 1099.725341796875, + "learning_rate": 3.580481165283865e-05, + "loss": 131.3889, + "step": 72240 + }, + { + "epoch": 0.2918991422811362, + "grad_norm": 2941.907470703125, + "learning_rate": 3.580310024751381e-05, + "loss": 113.5114, + "step": 72250 + }, + { + "epoch": 0.29193954354650387, + "grad_norm": 638.6181640625, + "learning_rate": 3.5801388534099396e-05, + "loss": 137.047, + "step": 72260 + }, + { + "epoch": 0.2919799448118715, + "grad_norm": 461.42535400390625, + "learning_rate": 3.5799676512628775e-05, + "loss": 125.0499, + "step": 72270 + }, + { + "epoch": 0.29202034607723915, + "grad_norm": 685.3917846679688, + "learning_rate": 3.579796418313532e-05, + "loss": 97.547, + "step": 72280 + }, + { + "epoch": 0.2920607473426068, + "grad_norm": 312.480712890625, + "learning_rate": 3.5796251545652425e-05, + "loss": 117.5048, + "step": 72290 + }, + { + "epoch": 0.2921011486079744, + "grad_norm": 349.53546142578125, + "learning_rate": 3.579453860021348e-05, + "loss": 80.8323, + "step": 72300 + }, + { + "epoch": 0.292141549873342, + "grad_norm": 1444.580322265625, + "learning_rate": 3.579282534685186e-05, + "loss": 129.3323, + "step": 72310 + }, + { + "epoch": 0.29218195113870965, + "grad_norm": 1259.7830810546875, + "learning_rate": 3.579111178560099e-05, + "loss": 86.843, + "step": 72320 + }, + { + "epoch": 0.2922223524040773, + "grad_norm": 786.1168823242188, + "learning_rate": 3.578939791649426e-05, + "loss": 122.4909, + "step": 72330 + }, + { + "epoch": 0.29226275366944493, + "grad_norm": 714.9317016601562, + "learning_rate": 3.5787683739565096e-05, + "loss": 84.9132, + "step": 72340 + }, + { + "epoch": 0.29230315493481257, + "grad_norm": 872.474365234375, + "learning_rate": 3.57859692548469e-05, + "loss": 132.4389, + "step": 72350 + }, + { + "epoch": 0.2923435562001802, + "grad_norm": 950.5380859375, + "learning_rate": 3.578425446237312e-05, + "loss": 118.9912, + "step": 72360 + }, + { + "epoch": 0.2923839574655478, + "grad_norm": 1084.451416015625, + "learning_rate": 3.578253936217716e-05, + "loss": 111.001, + "step": 72370 + }, + { + "epoch": 0.29242435873091543, + "grad_norm": 504.3788146972656, + "learning_rate": 3.578082395429247e-05, + "loss": 90.5439, + "step": 72380 + }, + { + "epoch": 0.2924647599962831, + "grad_norm": 947.7161865234375, + "learning_rate": 3.57791082387525e-05, + "loss": 79.2676, + "step": 72390 + }, + { + "epoch": 0.2925051612616507, + "grad_norm": 844.5031127929688, + "learning_rate": 3.577739221559069e-05, + "loss": 80.0211, + "step": 72400 + }, + { + "epoch": 0.29254556252701835, + "grad_norm": 1031.9097900390625, + "learning_rate": 3.577567588484049e-05, + "loss": 113.7916, + "step": 72410 + }, + { + "epoch": 0.292585963792386, + "grad_norm": 462.06390380859375, + "learning_rate": 3.577395924653537e-05, + "loss": 77.0004, + "step": 72420 + }, + { + "epoch": 0.29262636505775363, + "grad_norm": 620.7058715820312, + "learning_rate": 3.577224230070879e-05, + "loss": 98.8806, + "step": 72430 + }, + { + "epoch": 0.2926667663231212, + "grad_norm": 1253.0765380859375, + "learning_rate": 3.577052504739423e-05, + "loss": 101.3286, + "step": 72440 + }, + { + "epoch": 0.29270716758848886, + "grad_norm": 949.2723388671875, + "learning_rate": 3.5768807486625166e-05, + "loss": 106.9771, + "step": 72450 + }, + { + "epoch": 0.2927475688538565, + "grad_norm": 628.865966796875, + "learning_rate": 3.576708961843508e-05, + "loss": 89.801, + "step": 72460 + }, + { + "epoch": 0.29278797011922414, + "grad_norm": 835.4890747070312, + "learning_rate": 3.576537144285747e-05, + "loss": 81.5792, + "step": 72470 + }, + { + "epoch": 0.2928283713845918, + "grad_norm": 989.212158203125, + "learning_rate": 3.576365295992582e-05, + "loss": 99.7632, + "step": 72480 + }, + { + "epoch": 0.2928687726499594, + "grad_norm": 721.414794921875, + "learning_rate": 3.576193416967364e-05, + "loss": 119.3642, + "step": 72490 + }, + { + "epoch": 0.292909173915327, + "grad_norm": 520.2438354492188, + "learning_rate": 3.576021507213444e-05, + "loss": 77.7748, + "step": 72500 + }, + { + "epoch": 0.29294957518069464, + "grad_norm": 702.87158203125, + "learning_rate": 3.575849566734174e-05, + "loss": 73.8816, + "step": 72510 + }, + { + "epoch": 0.2929899764460623, + "grad_norm": 500.411865234375, + "learning_rate": 3.5756775955329045e-05, + "loss": 75.6594, + "step": 72520 + }, + { + "epoch": 0.2930303777114299, + "grad_norm": 733.4732666015625, + "learning_rate": 3.575505593612989e-05, + "loss": 65.1086, + "step": 72530 + }, + { + "epoch": 0.29307077897679756, + "grad_norm": 448.20526123046875, + "learning_rate": 3.575333560977782e-05, + "loss": 79.7185, + "step": 72540 + }, + { + "epoch": 0.2931111802421652, + "grad_norm": 1040.3751220703125, + "learning_rate": 3.5751614976306347e-05, + "loss": 92.6277, + "step": 72550 + }, + { + "epoch": 0.29315158150753284, + "grad_norm": 852.045654296875, + "learning_rate": 3.574989403574904e-05, + "loss": 122.8803, + "step": 72560 + }, + { + "epoch": 0.29319198277290043, + "grad_norm": 667.4397583007812, + "learning_rate": 3.574817278813943e-05, + "loss": 108.0853, + "step": 72570 + }, + { + "epoch": 0.29323238403826807, + "grad_norm": 602.3939819335938, + "learning_rate": 3.574645123351109e-05, + "loss": 111.2836, + "step": 72580 + }, + { + "epoch": 0.2932727853036357, + "grad_norm": 1957.2642822265625, + "learning_rate": 3.5744729371897584e-05, + "loss": 125.581, + "step": 72590 + }, + { + "epoch": 0.29331318656900335, + "grad_norm": 1082.717529296875, + "learning_rate": 3.574300720333247e-05, + "loss": 74.4039, + "step": 72600 + }, + { + "epoch": 0.293353587834371, + "grad_norm": 862.1207885742188, + "learning_rate": 3.574128472784932e-05, + "loss": 57.7977, + "step": 72610 + }, + { + "epoch": 0.29339398909973863, + "grad_norm": 452.7544860839844, + "learning_rate": 3.573956194548172e-05, + "loss": 58.3715, + "step": 72620 + }, + { + "epoch": 0.2934343903651062, + "grad_norm": 869.1669921875, + "learning_rate": 3.573783885626326e-05, + "loss": 100.8732, + "step": 72630 + }, + { + "epoch": 0.29347479163047385, + "grad_norm": 763.2515258789062, + "learning_rate": 3.573611546022753e-05, + "loss": 121.7777, + "step": 72640 + }, + { + "epoch": 0.2935151928958415, + "grad_norm": 623.1080322265625, + "learning_rate": 3.5734391757408123e-05, + "loss": 132.8998, + "step": 72650 + }, + { + "epoch": 0.29355559416120913, + "grad_norm": 532.2351684570312, + "learning_rate": 3.5732667747838654e-05, + "loss": 75.619, + "step": 72660 + }, + { + "epoch": 0.2935959954265768, + "grad_norm": 471.5350341796875, + "learning_rate": 3.573094343155272e-05, + "loss": 101.0413, + "step": 72670 + }, + { + "epoch": 0.2936363966919444, + "grad_norm": 377.2166442871094, + "learning_rate": 3.572921880858395e-05, + "loss": 73.1568, + "step": 72680 + }, + { + "epoch": 0.293676797957312, + "grad_norm": 788.689208984375, + "learning_rate": 3.572749387896596e-05, + "loss": 79.1313, + "step": 72690 + }, + { + "epoch": 0.29371719922267964, + "grad_norm": 596.8563232421875, + "learning_rate": 3.572576864273238e-05, + "loss": 76.3214, + "step": 72700 + }, + { + "epoch": 0.2937576004880473, + "grad_norm": 758.3897705078125, + "learning_rate": 3.572404309991685e-05, + "loss": 120.3851, + "step": 72710 + }, + { + "epoch": 0.2937980017534149, + "grad_norm": 1056.82470703125, + "learning_rate": 3.5722317250553e-05, + "loss": 224.5917, + "step": 72720 + }, + { + "epoch": 0.29383840301878256, + "grad_norm": 883.3718872070312, + "learning_rate": 3.572059109467447e-05, + "loss": 114.4066, + "step": 72730 + }, + { + "epoch": 0.2938788042841502, + "grad_norm": 635.0252685546875, + "learning_rate": 3.5718864632314936e-05, + "loss": 87.7703, + "step": 72740 + }, + { + "epoch": 0.29391920554951784, + "grad_norm": 1090.9697265625, + "learning_rate": 3.571713786350804e-05, + "loss": 187.8033, + "step": 72750 + }, + { + "epoch": 0.2939596068148854, + "grad_norm": 750.0673828125, + "learning_rate": 3.571541078828745e-05, + "loss": 89.0441, + "step": 72760 + }, + { + "epoch": 0.29400000808025306, + "grad_norm": 618.0006713867188, + "learning_rate": 3.5713683406686834e-05, + "loss": 108.5695, + "step": 72770 + }, + { + "epoch": 0.2940404093456207, + "grad_norm": 593.7196044921875, + "learning_rate": 3.571195571873988e-05, + "loss": 92.0049, + "step": 72780 + }, + { + "epoch": 0.29408081061098834, + "grad_norm": 400.6498718261719, + "learning_rate": 3.571022772448024e-05, + "loss": 112.944, + "step": 72790 + }, + { + "epoch": 0.294121211876356, + "grad_norm": 437.6410217285156, + "learning_rate": 3.570849942394164e-05, + "loss": 105.0623, + "step": 72800 + }, + { + "epoch": 0.2941616131417236, + "grad_norm": 1211.8363037109375, + "learning_rate": 3.570677081715775e-05, + "loss": 89.5896, + "step": 72810 + }, + { + "epoch": 0.2942020144070912, + "grad_norm": 1303.0433349609375, + "learning_rate": 3.5705041904162274e-05, + "loss": 88.2197, + "step": 72820 + }, + { + "epoch": 0.29424241567245885, + "grad_norm": 3822.9638671875, + "learning_rate": 3.5703312684988924e-05, + "loss": 127.6329, + "step": 72830 + }, + { + "epoch": 0.2942828169378265, + "grad_norm": 808.0755615234375, + "learning_rate": 3.570158315967141e-05, + "loss": 108.3772, + "step": 72840 + }, + { + "epoch": 0.2943232182031941, + "grad_norm": 457.414794921875, + "learning_rate": 3.569985332824345e-05, + "loss": 59.9871, + "step": 72850 + }, + { + "epoch": 0.29436361946856177, + "grad_norm": 1636.004150390625, + "learning_rate": 3.569812319073876e-05, + "loss": 148.2287, + "step": 72860 + }, + { + "epoch": 0.2944040207339294, + "grad_norm": 698.8783569335938, + "learning_rate": 3.5696392747191084e-05, + "loss": 127.4473, + "step": 72870 + }, + { + "epoch": 0.29444442199929705, + "grad_norm": 1077.862060546875, + "learning_rate": 3.569466199763414e-05, + "loss": 98.2053, + "step": 72880 + }, + { + "epoch": 0.29448482326466463, + "grad_norm": 1081.0142822265625, + "learning_rate": 3.569293094210169e-05, + "loss": 72.2839, + "step": 72890 + }, + { + "epoch": 0.29452522453003227, + "grad_norm": 305.5542297363281, + "learning_rate": 3.569119958062747e-05, + "loss": 98.8256, + "step": 72900 + }, + { + "epoch": 0.2945656257953999, + "grad_norm": 1745.231201171875, + "learning_rate": 3.568946791324523e-05, + "loss": 115.314, + "step": 72910 + }, + { + "epoch": 0.29460602706076755, + "grad_norm": 362.6074523925781, + "learning_rate": 3.5687735939988745e-05, + "loss": 118.273, + "step": 72920 + }, + { + "epoch": 0.2946464283261352, + "grad_norm": 729.9808349609375, + "learning_rate": 3.5686003660891763e-05, + "loss": 91.1678, + "step": 72930 + }, + { + "epoch": 0.29468682959150283, + "grad_norm": 495.85943603515625, + "learning_rate": 3.568427107598807e-05, + "loss": 53.1156, + "step": 72940 + }, + { + "epoch": 0.2947272308568704, + "grad_norm": 1018.3499145507812, + "learning_rate": 3.568253818531143e-05, + "loss": 102.6948, + "step": 72950 + }, + { + "epoch": 0.29476763212223805, + "grad_norm": 1074.23046875, + "learning_rate": 3.568080498889564e-05, + "loss": 104.7789, + "step": 72960 + }, + { + "epoch": 0.2948080333876057, + "grad_norm": 1750.8551025390625, + "learning_rate": 3.567907148677448e-05, + "loss": 128.3635, + "step": 72970 + }, + { + "epoch": 0.29484843465297333, + "grad_norm": 937.0585327148438, + "learning_rate": 3.567733767898176e-05, + "loss": 96.6746, + "step": 72980 + }, + { + "epoch": 0.294888835918341, + "grad_norm": 436.53472900390625, + "learning_rate": 3.567560356555126e-05, + "loss": 63.1523, + "step": 72990 + }, + { + "epoch": 0.2949292371837086, + "grad_norm": 737.880859375, + "learning_rate": 3.56738691465168e-05, + "loss": 91.9063, + "step": 73000 + }, + { + "epoch": 0.2949696384490762, + "grad_norm": 816.466796875, + "learning_rate": 3.5672134421912186e-05, + "loss": 117.2942, + "step": 73010 + }, + { + "epoch": 0.29501003971444384, + "grad_norm": 639.8851928710938, + "learning_rate": 3.567039939177125e-05, + "loss": 104.2634, + "step": 73020 + }, + { + "epoch": 0.2950504409798115, + "grad_norm": 1224.109619140625, + "learning_rate": 3.566866405612781e-05, + "loss": 80.4657, + "step": 73030 + }, + { + "epoch": 0.2950908422451791, + "grad_norm": 679.2395629882812, + "learning_rate": 3.56669284150157e-05, + "loss": 80.445, + "step": 73040 + }, + { + "epoch": 0.29513124351054676, + "grad_norm": 473.8035888671875, + "learning_rate": 3.566519246846875e-05, + "loss": 113.522, + "step": 73050 + }, + { + "epoch": 0.2951716447759144, + "grad_norm": 393.9403381347656, + "learning_rate": 3.566345621652081e-05, + "loss": 51.296, + "step": 73060 + }, + { + "epoch": 0.29521204604128204, + "grad_norm": 1269.7156982421875, + "learning_rate": 3.566171965920573e-05, + "loss": 148.6698, + "step": 73070 + }, + { + "epoch": 0.2952524473066496, + "grad_norm": 1674.3201904296875, + "learning_rate": 3.565998279655736e-05, + "loss": 125.929, + "step": 73080 + }, + { + "epoch": 0.29529284857201726, + "grad_norm": 538.67333984375, + "learning_rate": 3.565824562860956e-05, + "loss": 91.2189, + "step": 73090 + }, + { + "epoch": 0.2953332498373849, + "grad_norm": 675.0604248046875, + "learning_rate": 3.56565081553962e-05, + "loss": 89.5929, + "step": 73100 + }, + { + "epoch": 0.29537365110275254, + "grad_norm": 368.86474609375, + "learning_rate": 3.565477037695116e-05, + "loss": 84.0492, + "step": 73110 + }, + { + "epoch": 0.2954140523681202, + "grad_norm": 679.0097045898438, + "learning_rate": 3.56530322933083e-05, + "loss": 82.9171, + "step": 73120 + }, + { + "epoch": 0.2954544536334878, + "grad_norm": 791.1397094726562, + "learning_rate": 3.565129390450152e-05, + "loss": 101.7493, + "step": 73130 + }, + { + "epoch": 0.2954948548988554, + "grad_norm": 1768.6806640625, + "learning_rate": 3.564955521056471e-05, + "loss": 106.8497, + "step": 73140 + }, + { + "epoch": 0.29553525616422305, + "grad_norm": 802.4238891601562, + "learning_rate": 3.5647816211531765e-05, + "loss": 71.442, + "step": 73150 + }, + { + "epoch": 0.2955756574295907, + "grad_norm": 723.917724609375, + "learning_rate": 3.5646076907436586e-05, + "loss": 104.6151, + "step": 73160 + }, + { + "epoch": 0.29561605869495833, + "grad_norm": 714.0927734375, + "learning_rate": 3.5644337298313086e-05, + "loss": 107.0777, + "step": 73170 + }, + { + "epoch": 0.29565645996032597, + "grad_norm": 525.4021606445312, + "learning_rate": 3.5642597384195166e-05, + "loss": 145.3963, + "step": 73180 + }, + { + "epoch": 0.2956968612256936, + "grad_norm": 882.1038208007812, + "learning_rate": 3.564085716511677e-05, + "loss": 70.7709, + "step": 73190 + }, + { + "epoch": 0.2957372624910612, + "grad_norm": 1011.0650634765625, + "learning_rate": 3.5639116641111804e-05, + "loss": 118.638, + "step": 73200 + }, + { + "epoch": 0.29577766375642883, + "grad_norm": 764.7418823242188, + "learning_rate": 3.563737581221421e-05, + "loss": 106.5217, + "step": 73210 + }, + { + "epoch": 0.2958180650217965, + "grad_norm": 810.8838500976562, + "learning_rate": 3.563563467845792e-05, + "loss": 89.0784, + "step": 73220 + }, + { + "epoch": 0.2958584662871641, + "grad_norm": 926.8535766601562, + "learning_rate": 3.563389323987688e-05, + "loss": 129.2933, + "step": 73230 + }, + { + "epoch": 0.29589886755253175, + "grad_norm": 1497.0152587890625, + "learning_rate": 3.563215149650505e-05, + "loss": 135.2045, + "step": 73240 + }, + { + "epoch": 0.2959392688178994, + "grad_norm": 554.1571655273438, + "learning_rate": 3.563040944837638e-05, + "loss": 109.4917, + "step": 73250 + }, + { + "epoch": 0.29597967008326703, + "grad_norm": 887.9640502929688, + "learning_rate": 3.562866709552483e-05, + "loss": 113.0188, + "step": 73260 + }, + { + "epoch": 0.2960200713486346, + "grad_norm": 604.0414428710938, + "learning_rate": 3.562692443798436e-05, + "loss": 94.211, + "step": 73270 + }, + { + "epoch": 0.29606047261400226, + "grad_norm": 1119.0899658203125, + "learning_rate": 3.562518147578896e-05, + "loss": 109.4852, + "step": 73280 + }, + { + "epoch": 0.2961008738793699, + "grad_norm": 1029.7083740234375, + "learning_rate": 3.56234382089726e-05, + "loss": 101.8557, + "step": 73290 + }, + { + "epoch": 0.29614127514473754, + "grad_norm": 1542.7054443359375, + "learning_rate": 3.5621694637569263e-05, + "loss": 104.1286, + "step": 73300 + }, + { + "epoch": 0.2961816764101052, + "grad_norm": 438.6230163574219, + "learning_rate": 3.561995076161296e-05, + "loss": 86.4454, + "step": 73310 + }, + { + "epoch": 0.2962220776754728, + "grad_norm": 783.0236206054688, + "learning_rate": 3.5618206581137664e-05, + "loss": 63.7207, + "step": 73320 + }, + { + "epoch": 0.2962624789408404, + "grad_norm": 630.3781127929688, + "learning_rate": 3.5616462096177396e-05, + "loss": 75.6398, + "step": 73330 + }, + { + "epoch": 0.29630288020620804, + "grad_norm": 880.0503540039062, + "learning_rate": 3.561471730676616e-05, + "loss": 129.9963, + "step": 73340 + }, + { + "epoch": 0.2963432814715757, + "grad_norm": 605.1770629882812, + "learning_rate": 3.561297221293797e-05, + "loss": 68.7529, + "step": 73350 + }, + { + "epoch": 0.2963836827369433, + "grad_norm": 963.17919921875, + "learning_rate": 3.561122681472684e-05, + "loss": 92.501, + "step": 73360 + }, + { + "epoch": 0.29642408400231096, + "grad_norm": 713.5911254882812, + "learning_rate": 3.560948111216682e-05, + "loss": 80.6997, + "step": 73370 + }, + { + "epoch": 0.2964644852676786, + "grad_norm": 670.5381469726562, + "learning_rate": 3.560773510529192e-05, + "loss": 153.1753, + "step": 73380 + }, + { + "epoch": 0.29650488653304624, + "grad_norm": 2099.031982421875, + "learning_rate": 3.560598879413619e-05, + "loss": 109.3242, + "step": 73390 + }, + { + "epoch": 0.2965452877984138, + "grad_norm": 398.383544921875, + "learning_rate": 3.560424217873368e-05, + "loss": 70.8962, + "step": 73400 + }, + { + "epoch": 0.29658568906378147, + "grad_norm": 757.4462280273438, + "learning_rate": 3.560249525911842e-05, + "loss": 78.6815, + "step": 73410 + }, + { + "epoch": 0.2966260903291491, + "grad_norm": 896.744384765625, + "learning_rate": 3.56007480353245e-05, + "loss": 82.0195, + "step": 73420 + }, + { + "epoch": 0.29666649159451675, + "grad_norm": 463.30767822265625, + "learning_rate": 3.559900050738596e-05, + "loss": 72.2658, + "step": 73430 + }, + { + "epoch": 0.2967068928598844, + "grad_norm": 1006.2472534179688, + "learning_rate": 3.559725267533686e-05, + "loss": 90.5217, + "step": 73440 + }, + { + "epoch": 0.296747294125252, + "grad_norm": 1168.34619140625, + "learning_rate": 3.559550453921131e-05, + "loss": 88.8084, + "step": 73450 + }, + { + "epoch": 0.2967876953906196, + "grad_norm": 895.0573120117188, + "learning_rate": 3.559375609904336e-05, + "loss": 99.7012, + "step": 73460 + }, + { + "epoch": 0.29682809665598725, + "grad_norm": 610.4022216796875, + "learning_rate": 3.559200735486711e-05, + "loss": 86.2379, + "step": 73470 + }, + { + "epoch": 0.2968684979213549, + "grad_norm": 1034.5635986328125, + "learning_rate": 3.559025830671664e-05, + "loss": 91.842, + "step": 73480 + }, + { + "epoch": 0.29690889918672253, + "grad_norm": 1043.623291015625, + "learning_rate": 3.558850895462607e-05, + "loss": 110.7254, + "step": 73490 + }, + { + "epoch": 0.29694930045209017, + "grad_norm": 458.1163024902344, + "learning_rate": 3.5586759298629486e-05, + "loss": 52.2999, + "step": 73500 + }, + { + "epoch": 0.2969897017174578, + "grad_norm": 1285.097412109375, + "learning_rate": 3.5585009338761005e-05, + "loss": 84.8727, + "step": 73510 + }, + { + "epoch": 0.2970301029828254, + "grad_norm": 341.47247314453125, + "learning_rate": 3.5583259075054746e-05, + "loss": 82.4082, + "step": 73520 + }, + { + "epoch": 0.29707050424819303, + "grad_norm": 816.9872436523438, + "learning_rate": 3.5581508507544825e-05, + "loss": 123.4681, + "step": 73530 + }, + { + "epoch": 0.2971109055135607, + "grad_norm": 294.5831298828125, + "learning_rate": 3.5579757636265377e-05, + "loss": 72.8225, + "step": 73540 + }, + { + "epoch": 0.2971513067789283, + "grad_norm": 710.370361328125, + "learning_rate": 3.557800646125053e-05, + "loss": 110.3215, + "step": 73550 + }, + { + "epoch": 0.29719170804429595, + "grad_norm": 469.0617980957031, + "learning_rate": 3.557625498253443e-05, + "loss": 105.6178, + "step": 73560 + }, + { + "epoch": 0.2972321093096636, + "grad_norm": 1631.373291015625, + "learning_rate": 3.5574503200151213e-05, + "loss": 123.4133, + "step": 73570 + }, + { + "epoch": 0.29727251057503123, + "grad_norm": 697.901611328125, + "learning_rate": 3.557275111413505e-05, + "loss": 151.9511, + "step": 73580 + }, + { + "epoch": 0.2973129118403988, + "grad_norm": 856.397705078125, + "learning_rate": 3.557099872452008e-05, + "loss": 97.4193, + "step": 73590 + }, + { + "epoch": 0.29735331310576646, + "grad_norm": 552.8448486328125, + "learning_rate": 3.5569246031340474e-05, + "loss": 107.3246, + "step": 73600 + }, + { + "epoch": 0.2973937143711341, + "grad_norm": 805.550048828125, + "learning_rate": 3.5567493034630395e-05, + "loss": 122.183, + "step": 73610 + }, + { + "epoch": 0.29743411563650174, + "grad_norm": 985.920166015625, + "learning_rate": 3.5565739734424034e-05, + "loss": 83.8456, + "step": 73620 + }, + { + "epoch": 0.2974745169018694, + "grad_norm": 660.8603515625, + "learning_rate": 3.5563986130755557e-05, + "loss": 69.1948, + "step": 73630 + }, + { + "epoch": 0.297514918167237, + "grad_norm": 578.798583984375, + "learning_rate": 3.556223222365916e-05, + "loss": 78.2649, + "step": 73640 + }, + { + "epoch": 0.2975553194326046, + "grad_norm": 1126.590576171875, + "learning_rate": 3.556047801316903e-05, + "loss": 118.9123, + "step": 73650 + }, + { + "epoch": 0.29759572069797224, + "grad_norm": 687.1566162109375, + "learning_rate": 3.555872349931938e-05, + "loss": 113.1908, + "step": 73660 + }, + { + "epoch": 0.2976361219633399, + "grad_norm": 1330.6400146484375, + "learning_rate": 3.5556968682144395e-05, + "loss": 77.1064, + "step": 73670 + }, + { + "epoch": 0.2976765232287075, + "grad_norm": 548.3091430664062, + "learning_rate": 3.5555213561678305e-05, + "loss": 71.5434, + "step": 73680 + }, + { + "epoch": 0.29771692449407516, + "grad_norm": 800.5945434570312, + "learning_rate": 3.555345813795531e-05, + "loss": 110.3622, + "step": 73690 + }, + { + "epoch": 0.2977573257594428, + "grad_norm": 572.8548583984375, + "learning_rate": 3.5551702411009645e-05, + "loss": 53.0722, + "step": 73700 + }, + { + "epoch": 0.29779772702481044, + "grad_norm": 805.6329956054688, + "learning_rate": 3.5549946380875536e-05, + "loss": 105.1707, + "step": 73710 + }, + { + "epoch": 0.29783812829017803, + "grad_norm": 857.299560546875, + "learning_rate": 3.554819004758721e-05, + "loss": 85.835, + "step": 73720 + }, + { + "epoch": 0.29787852955554567, + "grad_norm": 1012.1513671875, + "learning_rate": 3.554643341117892e-05, + "loss": 99.1676, + "step": 73730 + }, + { + "epoch": 0.2979189308209133, + "grad_norm": 1537.2667236328125, + "learning_rate": 3.5544676471684906e-05, + "loss": 138.3423, + "step": 73740 + }, + { + "epoch": 0.29795933208628095, + "grad_norm": 660.005615234375, + "learning_rate": 3.554291922913942e-05, + "loss": 78.9076, + "step": 73750 + }, + { + "epoch": 0.2979997333516486, + "grad_norm": 406.47723388671875, + "learning_rate": 3.554116168357673e-05, + "loss": 74.2193, + "step": 73760 + }, + { + "epoch": 0.29804013461701623, + "grad_norm": 747.5978393554688, + "learning_rate": 3.5539403835031075e-05, + "loss": 66.7666, + "step": 73770 + }, + { + "epoch": 0.2980805358823838, + "grad_norm": 1316.7376708984375, + "learning_rate": 3.553764568353676e-05, + "loss": 116.5129, + "step": 73780 + }, + { + "epoch": 0.29812093714775145, + "grad_norm": 595.47314453125, + "learning_rate": 3.553588722912803e-05, + "loss": 94.4178, + "step": 73790 + }, + { + "epoch": 0.2981613384131191, + "grad_norm": 489.09930419921875, + "learning_rate": 3.553412847183919e-05, + "loss": 144.5807, + "step": 73800 + }, + { + "epoch": 0.29820173967848673, + "grad_norm": 607.5166015625, + "learning_rate": 3.5532369411704505e-05, + "loss": 74.4417, + "step": 73810 + }, + { + "epoch": 0.2982421409438544, + "grad_norm": 697.1487426757812, + "learning_rate": 3.5530610048758295e-05, + "loss": 97.4243, + "step": 73820 + }, + { + "epoch": 0.298282542209222, + "grad_norm": 825.5671997070312, + "learning_rate": 3.552885038303484e-05, + "loss": 95.2513, + "step": 73830 + }, + { + "epoch": 0.2983229434745896, + "grad_norm": 990.84765625, + "learning_rate": 3.552709041456845e-05, + "loss": 83.8904, + "step": 73840 + }, + { + "epoch": 0.29836334473995724, + "grad_norm": 2495.047119140625, + "learning_rate": 3.552533014339344e-05, + "loss": 116.3548, + "step": 73850 + }, + { + "epoch": 0.2984037460053249, + "grad_norm": 2136.4462890625, + "learning_rate": 3.552356956954413e-05, + "loss": 157.9737, + "step": 73860 + }, + { + "epoch": 0.2984441472706925, + "grad_norm": 510.4173278808594, + "learning_rate": 3.552180869305483e-05, + "loss": 88.4836, + "step": 73870 + }, + { + "epoch": 0.29848454853606016, + "grad_norm": 791.8234252929688, + "learning_rate": 3.552004751395989e-05, + "loss": 73.9227, + "step": 73880 + }, + { + "epoch": 0.2985249498014278, + "grad_norm": 1223.3106689453125, + "learning_rate": 3.551828603229363e-05, + "loss": 106.1701, + "step": 73890 + }, + { + "epoch": 0.29856535106679544, + "grad_norm": 914.9356689453125, + "learning_rate": 3.551652424809039e-05, + "loss": 86.1097, + "step": 73900 + }, + { + "epoch": 0.298605752332163, + "grad_norm": 1044.721923828125, + "learning_rate": 3.551476216138453e-05, + "loss": 72.6803, + "step": 73910 + }, + { + "epoch": 0.29864615359753066, + "grad_norm": 1102.233642578125, + "learning_rate": 3.551299977221038e-05, + "loss": 118.1697, + "step": 73920 + }, + { + "epoch": 0.2986865548628983, + "grad_norm": 491.4104919433594, + "learning_rate": 3.551123708060233e-05, + "loss": 101.7775, + "step": 73930 + }, + { + "epoch": 0.29872695612826594, + "grad_norm": 1254.646484375, + "learning_rate": 3.550947408659471e-05, + "loss": 154.5753, + "step": 73940 + }, + { + "epoch": 0.2987673573936336, + "grad_norm": 396.68438720703125, + "learning_rate": 3.550771079022192e-05, + "loss": 96.2887, + "step": 73950 + }, + { + "epoch": 0.2988077586590012, + "grad_norm": 2299.55029296875, + "learning_rate": 3.550594719151832e-05, + "loss": 84.8141, + "step": 73960 + }, + { + "epoch": 0.2988481599243688, + "grad_norm": 397.58050537109375, + "learning_rate": 3.550418329051829e-05, + "loss": 92.7895, + "step": 73970 + }, + { + "epoch": 0.29888856118973645, + "grad_norm": 561.9165649414062, + "learning_rate": 3.550241908725624e-05, + "loss": 88.398, + "step": 73980 + }, + { + "epoch": 0.2989289624551041, + "grad_norm": 407.2564697265625, + "learning_rate": 3.550065458176653e-05, + "loss": 74.6208, + "step": 73990 + }, + { + "epoch": 0.2989693637204717, + "grad_norm": 630.5107421875, + "learning_rate": 3.549888977408359e-05, + "loss": 84.5313, + "step": 74000 + }, + { + "epoch": 0.29900976498583937, + "grad_norm": 781.4435424804688, + "learning_rate": 3.5497124664241816e-05, + "loss": 103.5429, + "step": 74010 + }, + { + "epoch": 0.299050166251207, + "grad_norm": 503.18585205078125, + "learning_rate": 3.549535925227562e-05, + "loss": 67.3364, + "step": 74020 + }, + { + "epoch": 0.29909056751657465, + "grad_norm": 565.442626953125, + "learning_rate": 3.549359353821941e-05, + "loss": 84.2031, + "step": 74030 + }, + { + "epoch": 0.29913096878194223, + "grad_norm": 822.9754028320312, + "learning_rate": 3.5491827522107624e-05, + "loss": 94.971, + "step": 74040 + }, + { + "epoch": 0.29917137004730987, + "grad_norm": 636.8062133789062, + "learning_rate": 3.5490061203974676e-05, + "loss": 93.8837, + "step": 74050 + }, + { + "epoch": 0.2992117713126775, + "grad_norm": 927.669189453125, + "learning_rate": 3.548829458385502e-05, + "loss": 105.0926, + "step": 74060 + }, + { + "epoch": 0.29925217257804515, + "grad_norm": 792.1777954101562, + "learning_rate": 3.548652766178308e-05, + "loss": 78.5365, + "step": 74070 + }, + { + "epoch": 0.2992925738434128, + "grad_norm": 1374.617919921875, + "learning_rate": 3.5484760437793316e-05, + "loss": 100.3402, + "step": 74080 + }, + { + "epoch": 0.29933297510878043, + "grad_norm": 2063.49267578125, + "learning_rate": 3.5482992911920174e-05, + "loss": 100.0549, + "step": 74090 + }, + { + "epoch": 0.299373376374148, + "grad_norm": 1076.0081787109375, + "learning_rate": 3.548122508419811e-05, + "loss": 136.6966, + "step": 74100 + }, + { + "epoch": 0.29941377763951565, + "grad_norm": 482.9560546875, + "learning_rate": 3.547945695466159e-05, + "loss": 100.3358, + "step": 74110 + }, + { + "epoch": 0.2994541789048833, + "grad_norm": 953.5364379882812, + "learning_rate": 3.5477688523345095e-05, + "loss": 142.9263, + "step": 74120 + }, + { + "epoch": 0.29949458017025093, + "grad_norm": 606.3841552734375, + "learning_rate": 3.547591979028309e-05, + "loss": 123.0597, + "step": 74130 + }, + { + "epoch": 0.2995349814356186, + "grad_norm": 973.8309936523438, + "learning_rate": 3.5474150755510065e-05, + "loss": 81.4754, + "step": 74140 + }, + { + "epoch": 0.2995753827009862, + "grad_norm": 946.6571044921875, + "learning_rate": 3.54723814190605e-05, + "loss": 92.0348, + "step": 74150 + }, + { + "epoch": 0.2996157839663538, + "grad_norm": 1317.8057861328125, + "learning_rate": 3.54706117809689e-05, + "loss": 98.7904, + "step": 74160 + }, + { + "epoch": 0.29965618523172144, + "grad_norm": 1277.4215087890625, + "learning_rate": 3.546884184126975e-05, + "loss": 112.3245, + "step": 74170 + }, + { + "epoch": 0.2996965864970891, + "grad_norm": 814.4998168945312, + "learning_rate": 3.546707159999756e-05, + "loss": 104.1726, + "step": 74180 + }, + { + "epoch": 0.2997369877624567, + "grad_norm": 272.8604431152344, + "learning_rate": 3.5465301057186864e-05, + "loss": 55.0763, + "step": 74190 + }, + { + "epoch": 0.29977738902782436, + "grad_norm": 624.0514526367188, + "learning_rate": 3.5463530212872145e-05, + "loss": 69.2697, + "step": 74200 + }, + { + "epoch": 0.299817790293192, + "grad_norm": 328.993896484375, + "learning_rate": 3.546175906708795e-05, + "loss": 58.5152, + "step": 74210 + }, + { + "epoch": 0.29985819155855964, + "grad_norm": 541.69580078125, + "learning_rate": 3.545998761986881e-05, + "loss": 94.5393, + "step": 74220 + }, + { + "epoch": 0.2998985928239272, + "grad_norm": 1323.9019775390625, + "learning_rate": 3.545821587124924e-05, + "loss": 105.6672, + "step": 74230 + }, + { + "epoch": 0.29993899408929486, + "grad_norm": 665.5728149414062, + "learning_rate": 3.54564438212638e-05, + "loss": 89.207, + "step": 74240 + }, + { + "epoch": 0.2999793953546625, + "grad_norm": 448.5319519042969, + "learning_rate": 3.5454671469947024e-05, + "loss": 63.8227, + "step": 74250 + }, + { + "epoch": 0.30001979662003014, + "grad_norm": 801.254638671875, + "learning_rate": 3.5452898817333474e-05, + "loss": 111.091, + "step": 74260 + }, + { + "epoch": 0.3000601978853978, + "grad_norm": 676.0682983398438, + "learning_rate": 3.545112586345771e-05, + "loss": 72.5947, + "step": 74270 + }, + { + "epoch": 0.3001005991507654, + "grad_norm": 1117.1337890625, + "learning_rate": 3.544935260835429e-05, + "loss": 101.322, + "step": 74280 + }, + { + "epoch": 0.300141000416133, + "grad_norm": 513.2069702148438, + "learning_rate": 3.5447579052057776e-05, + "loss": 78.8297, + "step": 74290 + }, + { + "epoch": 0.30018140168150065, + "grad_norm": 443.7637023925781, + "learning_rate": 3.544580519460277e-05, + "loss": 94.8032, + "step": 74300 + }, + { + "epoch": 0.3002218029468683, + "grad_norm": 4922.9921875, + "learning_rate": 3.5444031036023837e-05, + "loss": 124.3278, + "step": 74310 + }, + { + "epoch": 0.30026220421223593, + "grad_norm": 868.1502075195312, + "learning_rate": 3.5442256576355564e-05, + "loss": 96.5833, + "step": 74320 + }, + { + "epoch": 0.30030260547760357, + "grad_norm": 699.4078369140625, + "learning_rate": 3.544048181563255e-05, + "loss": 155.8657, + "step": 74330 + }, + { + "epoch": 0.3003430067429712, + "grad_norm": 994.3177490234375, + "learning_rate": 3.5438706753889396e-05, + "loss": 121.9613, + "step": 74340 + }, + { + "epoch": 0.30038340800833885, + "grad_norm": 597.1870727539062, + "learning_rate": 3.54369313911607e-05, + "loss": 114.3603, + "step": 74350 + }, + { + "epoch": 0.30042380927370643, + "grad_norm": 3997.804443359375, + "learning_rate": 3.543515572748108e-05, + "loss": 95.1283, + "step": 74360 + }, + { + "epoch": 0.3004642105390741, + "grad_norm": 817.706298828125, + "learning_rate": 3.5433379762885165e-05, + "loss": 107.7654, + "step": 74370 + }, + { + "epoch": 0.3005046118044417, + "grad_norm": 759.072265625, + "learning_rate": 3.543160349740755e-05, + "loss": 90.5282, + "step": 74380 + }, + { + "epoch": 0.30054501306980935, + "grad_norm": 320.8977355957031, + "learning_rate": 3.542982693108289e-05, + "loss": 86.5773, + "step": 74390 + }, + { + "epoch": 0.300585414335177, + "grad_norm": 1489.1531982421875, + "learning_rate": 3.542805006394581e-05, + "loss": 97.7011, + "step": 74400 + }, + { + "epoch": 0.30062581560054463, + "grad_norm": 997.093505859375, + "learning_rate": 3.5426272896030944e-05, + "loss": 100.8638, + "step": 74410 + }, + { + "epoch": 0.3006662168659122, + "grad_norm": 338.9146728515625, + "learning_rate": 3.5424495427372946e-05, + "loss": 97.3218, + "step": 74420 + }, + { + "epoch": 0.30070661813127986, + "grad_norm": 655.2784423828125, + "learning_rate": 3.5422717658006475e-05, + "loss": 84.424, + "step": 74430 + }, + { + "epoch": 0.3007470193966475, + "grad_norm": 1148.9034423828125, + "learning_rate": 3.542093958796618e-05, + "loss": 93.0877, + "step": 74440 + }, + { + "epoch": 0.30078742066201514, + "grad_norm": 281.6935119628906, + "learning_rate": 3.541916121728673e-05, + "loss": 57.0628, + "step": 74450 + }, + { + "epoch": 0.3008278219273828, + "grad_norm": 601.1865844726562, + "learning_rate": 3.541738254600279e-05, + "loss": 81.3794, + "step": 74460 + }, + { + "epoch": 0.3008682231927504, + "grad_norm": 527.767578125, + "learning_rate": 3.541560357414904e-05, + "loss": 83.9061, + "step": 74470 + }, + { + "epoch": 0.300908624458118, + "grad_norm": 801.2999267578125, + "learning_rate": 3.5413824301760165e-05, + "loss": 106.5141, + "step": 74480 + }, + { + "epoch": 0.30094902572348564, + "grad_norm": 987.2526245117188, + "learning_rate": 3.541204472887085e-05, + "loss": 105.9663, + "step": 74490 + }, + { + "epoch": 0.3009894269888533, + "grad_norm": 562.3426513671875, + "learning_rate": 3.541026485551579e-05, + "loss": 72.8046, + "step": 74500 + }, + { + "epoch": 0.3010298282542209, + "grad_norm": 920.2920532226562, + "learning_rate": 3.540848468172968e-05, + "loss": 120.0557, + "step": 74510 + }, + { + "epoch": 0.30107022951958856, + "grad_norm": 909.4987182617188, + "learning_rate": 3.540670420754722e-05, + "loss": 84.949, + "step": 74520 + }, + { + "epoch": 0.3011106307849562, + "grad_norm": 415.70953369140625, + "learning_rate": 3.540492343300314e-05, + "loss": 107.3379, + "step": 74530 + }, + { + "epoch": 0.30115103205032384, + "grad_norm": 668.4337768554688, + "learning_rate": 3.540314235813215e-05, + "loss": 94.1862, + "step": 74540 + }, + { + "epoch": 0.3011914333156914, + "grad_norm": 600.5255737304688, + "learning_rate": 3.540136098296896e-05, + "loss": 85.5975, + "step": 74550 + }, + { + "epoch": 0.30123183458105907, + "grad_norm": 1146.2159423828125, + "learning_rate": 3.5399579307548314e-05, + "loss": 95.3545, + "step": 74560 + }, + { + "epoch": 0.3012722358464267, + "grad_norm": 836.2349853515625, + "learning_rate": 3.539779733190494e-05, + "loss": 90.3371, + "step": 74570 + }, + { + "epoch": 0.30131263711179435, + "grad_norm": 1415.2987060546875, + "learning_rate": 3.539601505607358e-05, + "loss": 81.427, + "step": 74580 + }, + { + "epoch": 0.301353038377162, + "grad_norm": 592.9070434570312, + "learning_rate": 3.5394232480088986e-05, + "loss": 81.3222, + "step": 74590 + }, + { + "epoch": 0.3013934396425296, + "grad_norm": 781.201904296875, + "learning_rate": 3.5392449603985894e-05, + "loss": 109.5037, + "step": 74600 + }, + { + "epoch": 0.3014338409078972, + "grad_norm": 937.92333984375, + "learning_rate": 3.539066642779907e-05, + "loss": 93.5191, + "step": 74610 + }, + { + "epoch": 0.30147424217326485, + "grad_norm": 382.8507080078125, + "learning_rate": 3.538888295156329e-05, + "loss": 83.4422, + "step": 74620 + }, + { + "epoch": 0.3015146434386325, + "grad_norm": 3662.99658203125, + "learning_rate": 3.538709917531331e-05, + "loss": 76.1091, + "step": 74630 + }, + { + "epoch": 0.30155504470400013, + "grad_norm": 1154.6297607421875, + "learning_rate": 3.538531509908391e-05, + "loss": 88.0078, + "step": 74640 + }, + { + "epoch": 0.30159544596936777, + "grad_norm": 574.8715209960938, + "learning_rate": 3.538353072290988e-05, + "loss": 99.8561, + "step": 74650 + }, + { + "epoch": 0.3016358472347354, + "grad_norm": 559.659912109375, + "learning_rate": 3.538174604682599e-05, + "loss": 77.5735, + "step": 74660 + }, + { + "epoch": 0.30167624850010305, + "grad_norm": 830.078369140625, + "learning_rate": 3.537996107086704e-05, + "loss": 118.018, + "step": 74670 + }, + { + "epoch": 0.30171664976547063, + "grad_norm": 718.487548828125, + "learning_rate": 3.537817579506783e-05, + "loss": 86.2108, + "step": 74680 + }, + { + "epoch": 0.3017570510308383, + "grad_norm": 982.845458984375, + "learning_rate": 3.537639021946317e-05, + "loss": 90.9477, + "step": 74690 + }, + { + "epoch": 0.3017974522962059, + "grad_norm": 886.0020751953125, + "learning_rate": 3.5374604344087866e-05, + "loss": 74.7436, + "step": 74700 + }, + { + "epoch": 0.30183785356157355, + "grad_norm": 1066.4111328125, + "learning_rate": 3.5372818168976734e-05, + "loss": 64.1878, + "step": 74710 + }, + { + "epoch": 0.3018782548269412, + "grad_norm": 992.9368896484375, + "learning_rate": 3.53710316941646e-05, + "loss": 77.5955, + "step": 74720 + }, + { + "epoch": 0.30191865609230883, + "grad_norm": 553.1739501953125, + "learning_rate": 3.5369244919686284e-05, + "loss": 83.7009, + "step": 74730 + }, + { + "epoch": 0.3019590573576764, + "grad_norm": 1036.6888427734375, + "learning_rate": 3.536745784557663e-05, + "loss": 81.3799, + "step": 74740 + }, + { + "epoch": 0.30199945862304406, + "grad_norm": 1021.1803588867188, + "learning_rate": 3.536567047187047e-05, + "loss": 150.6701, + "step": 74750 + }, + { + "epoch": 0.3020398598884117, + "grad_norm": 1102.6463623046875, + "learning_rate": 3.536388279860266e-05, + "loss": 89.5919, + "step": 74760 + }, + { + "epoch": 0.30208026115377934, + "grad_norm": 1017.8956298828125, + "learning_rate": 3.536209482580804e-05, + "loss": 116.5996, + "step": 74770 + }, + { + "epoch": 0.302120662419147, + "grad_norm": 910.1808471679688, + "learning_rate": 3.536030655352147e-05, + "loss": 92.136, + "step": 74780 + }, + { + "epoch": 0.3021610636845146, + "grad_norm": 430.8291320800781, + "learning_rate": 3.535851798177782e-05, + "loss": 84.4523, + "step": 74790 + }, + { + "epoch": 0.3022014649498822, + "grad_norm": 1068.9766845703125, + "learning_rate": 3.535672911061196e-05, + "loss": 101.269, + "step": 74800 + }, + { + "epoch": 0.30224186621524984, + "grad_norm": 491.10235595703125, + "learning_rate": 3.535493994005874e-05, + "loss": 59.4957, + "step": 74810 + }, + { + "epoch": 0.3022822674806175, + "grad_norm": 581.9970092773438, + "learning_rate": 3.535315047015308e-05, + "loss": 66.7833, + "step": 74820 + }, + { + "epoch": 0.3023226687459851, + "grad_norm": 879.5509033203125, + "learning_rate": 3.535136070092984e-05, + "loss": 109.5769, + "step": 74830 + }, + { + "epoch": 0.30236307001135276, + "grad_norm": 1119.7183837890625, + "learning_rate": 3.5349570632423925e-05, + "loss": 149.3705, + "step": 74840 + }, + { + "epoch": 0.3024034712767204, + "grad_norm": 432.0198059082031, + "learning_rate": 3.534778026467022e-05, + "loss": 86.8758, + "step": 74850 + }, + { + "epoch": 0.30244387254208804, + "grad_norm": 1420.2919921875, + "learning_rate": 3.534598959770364e-05, + "loss": 142.0068, + "step": 74860 + }, + { + "epoch": 0.30248427380745563, + "grad_norm": 508.3063049316406, + "learning_rate": 3.5344198631559096e-05, + "loss": 83.9257, + "step": 74870 + }, + { + "epoch": 0.30252467507282327, + "grad_norm": 1449.8505859375, + "learning_rate": 3.5342407366271495e-05, + "loss": 81.5313, + "step": 74880 + }, + { + "epoch": 0.3025650763381909, + "grad_norm": 1799.7210693359375, + "learning_rate": 3.534061580187577e-05, + "loss": 104.9424, + "step": 74890 + }, + { + "epoch": 0.30260547760355855, + "grad_norm": 729.3676147460938, + "learning_rate": 3.5338823938406834e-05, + "loss": 83.3082, + "step": 74900 + }, + { + "epoch": 0.3026458788689262, + "grad_norm": 1916.992431640625, + "learning_rate": 3.533703177589964e-05, + "loss": 84.789, + "step": 74910 + }, + { + "epoch": 0.30268628013429383, + "grad_norm": 681.5362548828125, + "learning_rate": 3.53352393143891e-05, + "loss": 62.2884, + "step": 74920 + }, + { + "epoch": 0.3027266813996614, + "grad_norm": 1132.558837890625, + "learning_rate": 3.5333446553910184e-05, + "loss": 90.4527, + "step": 74930 + }, + { + "epoch": 0.30276708266502905, + "grad_norm": 1380.676513671875, + "learning_rate": 3.533165349449783e-05, + "loss": 144.8058, + "step": 74940 + }, + { + "epoch": 0.3028074839303967, + "grad_norm": 707.5682983398438, + "learning_rate": 3.5329860136187e-05, + "loss": 84.9177, + "step": 74950 + }, + { + "epoch": 0.30284788519576433, + "grad_norm": 589.2267456054688, + "learning_rate": 3.5328066479012655e-05, + "loss": 92.9963, + "step": 74960 + }, + { + "epoch": 0.302888286461132, + "grad_norm": 438.8916931152344, + "learning_rate": 3.5326272523009754e-05, + "loss": 85.9967, + "step": 74970 + }, + { + "epoch": 0.3029286877264996, + "grad_norm": 636.9055786132812, + "learning_rate": 3.532447826821329e-05, + "loss": 59.0041, + "step": 74980 + }, + { + "epoch": 0.30296908899186725, + "grad_norm": 753.7213745117188, + "learning_rate": 3.532268371465823e-05, + "loss": 95.0601, + "step": 74990 + }, + { + "epoch": 0.30300949025723484, + "grad_norm": 608.34423828125, + "learning_rate": 3.532088886237956e-05, + "loss": 68.0231, + "step": 75000 + }, + { + "epoch": 0.3030498915226025, + "grad_norm": 816.1535034179688, + "learning_rate": 3.531909371141228e-05, + "loss": 162.0904, + "step": 75010 + }, + { + "epoch": 0.3030902927879701, + "grad_norm": 1256.03955078125, + "learning_rate": 3.531729826179138e-05, + "loss": 90.5211, + "step": 75020 + }, + { + "epoch": 0.30313069405333776, + "grad_norm": 1162.117431640625, + "learning_rate": 3.531550251355186e-05, + "loss": 79.5033, + "step": 75030 + }, + { + "epoch": 0.3031710953187054, + "grad_norm": 1199.6749267578125, + "learning_rate": 3.531370646672874e-05, + "loss": 84.9314, + "step": 75040 + }, + { + "epoch": 0.30321149658407304, + "grad_norm": 808.6011352539062, + "learning_rate": 3.5311910121357016e-05, + "loss": 90.0238, + "step": 75050 + }, + { + "epoch": 0.3032518978494406, + "grad_norm": 548.4639282226562, + "learning_rate": 3.531011347747173e-05, + "loss": 90.9224, + "step": 75060 + }, + { + "epoch": 0.30329229911480826, + "grad_norm": 1319.4871826171875, + "learning_rate": 3.53083165351079e-05, + "loss": 143.9744, + "step": 75070 + }, + { + "epoch": 0.3033327003801759, + "grad_norm": 593.004638671875, + "learning_rate": 3.530651929430055e-05, + "loss": 94.2955, + "step": 75080 + }, + { + "epoch": 0.30337310164554354, + "grad_norm": 791.7371826171875, + "learning_rate": 3.5304721755084734e-05, + "loss": 59.7628, + "step": 75090 + }, + { + "epoch": 0.3034135029109112, + "grad_norm": 756.7528686523438, + "learning_rate": 3.530292391749549e-05, + "loss": 82.2418, + "step": 75100 + }, + { + "epoch": 0.3034539041762788, + "grad_norm": 472.8874206542969, + "learning_rate": 3.530112578156786e-05, + "loss": 131.3817, + "step": 75110 + }, + { + "epoch": 0.3034943054416464, + "grad_norm": 780.8790893554688, + "learning_rate": 3.529932734733691e-05, + "loss": 153.8077, + "step": 75120 + }, + { + "epoch": 0.30353470670701405, + "grad_norm": 659.4782104492188, + "learning_rate": 3.529752861483769e-05, + "loss": 103.8008, + "step": 75130 + }, + { + "epoch": 0.3035751079723817, + "grad_norm": 758.5029296875, + "learning_rate": 3.529572958410528e-05, + "loss": 60.0209, + "step": 75140 + }, + { + "epoch": 0.3036155092377493, + "grad_norm": 754.2025756835938, + "learning_rate": 3.529393025517475e-05, + "loss": 94.0954, + "step": 75150 + }, + { + "epoch": 0.30365591050311697, + "grad_norm": 822.6426391601562, + "learning_rate": 3.529213062808116e-05, + "loss": 61.464, + "step": 75160 + }, + { + "epoch": 0.3036963117684846, + "grad_norm": 938.2557983398438, + "learning_rate": 3.5290330702859624e-05, + "loss": 89.6551, + "step": 75170 + }, + { + "epoch": 0.30373671303385225, + "grad_norm": 659.6058349609375, + "learning_rate": 3.528853047954521e-05, + "loss": 89.4903, + "step": 75180 + }, + { + "epoch": 0.30377711429921983, + "grad_norm": 665.314697265625, + "learning_rate": 3.5286729958173036e-05, + "loss": 110.4092, + "step": 75190 + }, + { + "epoch": 0.30381751556458747, + "grad_norm": 516.0924072265625, + "learning_rate": 3.528492913877818e-05, + "loss": 105.2134, + "step": 75200 + }, + { + "epoch": 0.3038579168299551, + "grad_norm": 746.4448852539062, + "learning_rate": 3.528312802139577e-05, + "loss": 88.1314, + "step": 75210 + }, + { + "epoch": 0.30389831809532275, + "grad_norm": 557.1563720703125, + "learning_rate": 3.5281326606060905e-05, + "loss": 87.2663, + "step": 75220 + }, + { + "epoch": 0.3039387193606904, + "grad_norm": 2042.0421142578125, + "learning_rate": 3.5279524892808714e-05, + "loss": 99.5958, + "step": 75230 + }, + { + "epoch": 0.30397912062605803, + "grad_norm": 757.5419921875, + "learning_rate": 3.5277722881674314e-05, + "loss": 99.1284, + "step": 75240 + }, + { + "epoch": 0.3040195218914256, + "grad_norm": 1037.7877197265625, + "learning_rate": 3.527592057269285e-05, + "loss": 60.1063, + "step": 75250 + }, + { + "epoch": 0.30405992315679325, + "grad_norm": 617.4739379882812, + "learning_rate": 3.527411796589944e-05, + "loss": 86.8463, + "step": 75260 + }, + { + "epoch": 0.3041003244221609, + "grad_norm": 1774.4805908203125, + "learning_rate": 3.5272315061329236e-05, + "loss": 86.31, + "step": 75270 + }, + { + "epoch": 0.30414072568752853, + "grad_norm": 573.5119018554688, + "learning_rate": 3.52705118590174e-05, + "loss": 67.479, + "step": 75280 + }, + { + "epoch": 0.3041811269528962, + "grad_norm": 444.5970764160156, + "learning_rate": 3.5268708358999064e-05, + "loss": 82.9734, + "step": 75290 + }, + { + "epoch": 0.3042215282182638, + "grad_norm": 519.9718017578125, + "learning_rate": 3.52669045613094e-05, + "loss": 62.2762, + "step": 75300 + }, + { + "epoch": 0.30426192948363145, + "grad_norm": 620.54931640625, + "learning_rate": 3.5265100465983564e-05, + "loss": 86.4676, + "step": 75310 + }, + { + "epoch": 0.30430233074899904, + "grad_norm": 1024.6064453125, + "learning_rate": 3.526329607305675e-05, + "loss": 75.09, + "step": 75320 + }, + { + "epoch": 0.3043427320143667, + "grad_norm": 993.5052490234375, + "learning_rate": 3.526149138256411e-05, + "loss": 85.0729, + "step": 75330 + }, + { + "epoch": 0.3043831332797343, + "grad_norm": 500.16461181640625, + "learning_rate": 3.525968639454084e-05, + "loss": 79.9794, + "step": 75340 + }, + { + "epoch": 0.30442353454510196, + "grad_norm": 1147.7724609375, + "learning_rate": 3.525788110902213e-05, + "loss": 110.9145, + "step": 75350 + }, + { + "epoch": 0.3044639358104696, + "grad_norm": 2272.030029296875, + "learning_rate": 3.525607552604317e-05, + "loss": 96.0947, + "step": 75360 + }, + { + "epoch": 0.30450433707583724, + "grad_norm": 1114.7352294921875, + "learning_rate": 3.525426964563916e-05, + "loss": 97.097, + "step": 75370 + }, + { + "epoch": 0.3045447383412048, + "grad_norm": 827.1620483398438, + "learning_rate": 3.525246346784532e-05, + "loss": 108.8357, + "step": 75380 + }, + { + "epoch": 0.30458513960657246, + "grad_norm": 748.5338134765625, + "learning_rate": 3.525065699269684e-05, + "loss": 85.1238, + "step": 75390 + }, + { + "epoch": 0.3046255408719401, + "grad_norm": 487.64788818359375, + "learning_rate": 3.524885022022896e-05, + "loss": 101.7588, + "step": 75400 + }, + { + "epoch": 0.30466594213730774, + "grad_norm": 649.5431518554688, + "learning_rate": 3.5247043150476895e-05, + "loss": 78.1904, + "step": 75410 + }, + { + "epoch": 0.3047063434026754, + "grad_norm": 673.86962890625, + "learning_rate": 3.5245235783475866e-05, + "loss": 72.804, + "step": 75420 + }, + { + "epoch": 0.304746744668043, + "grad_norm": 1036.577392578125, + "learning_rate": 3.524342811926112e-05, + "loss": 77.892, + "step": 75430 + }, + { + "epoch": 0.3047871459334106, + "grad_norm": 953.71826171875, + "learning_rate": 3.524162015786789e-05, + "loss": 100.7487, + "step": 75440 + }, + { + "epoch": 0.30482754719877825, + "grad_norm": 1372.090087890625, + "learning_rate": 3.523981189933144e-05, + "loss": 106.9125, + "step": 75450 + }, + { + "epoch": 0.3048679484641459, + "grad_norm": 896.4630737304688, + "learning_rate": 3.5238003343687005e-05, + "loss": 141.7754, + "step": 75460 + }, + { + "epoch": 0.30490834972951353, + "grad_norm": 445.6387939453125, + "learning_rate": 3.523619449096985e-05, + "loss": 47.7618, + "step": 75470 + }, + { + "epoch": 0.30494875099488117, + "grad_norm": 852.0575561523438, + "learning_rate": 3.523438534121524e-05, + "loss": 89.086, + "step": 75480 + }, + { + "epoch": 0.3049891522602488, + "grad_norm": 697.03662109375, + "learning_rate": 3.523257589445845e-05, + "loss": 76.3027, + "step": 75490 + }, + { + "epoch": 0.30502955352561645, + "grad_norm": 1090.6300048828125, + "learning_rate": 3.523076615073474e-05, + "loss": 118.0551, + "step": 75500 + }, + { + "epoch": 0.30506995479098403, + "grad_norm": 804.5003662109375, + "learning_rate": 3.522895611007941e-05, + "loss": 119.6818, + "step": 75510 + }, + { + "epoch": 0.3051103560563517, + "grad_norm": 752.4467163085938, + "learning_rate": 3.522714577252773e-05, + "loss": 118.7564, + "step": 75520 + }, + { + "epoch": 0.3051507573217193, + "grad_norm": 783.4786376953125, + "learning_rate": 3.5225335138115016e-05, + "loss": 83.9836, + "step": 75530 + }, + { + "epoch": 0.30519115858708695, + "grad_norm": 764.0257568359375, + "learning_rate": 3.522352420687655e-05, + "loss": 73.0073, + "step": 75540 + }, + { + "epoch": 0.3052315598524546, + "grad_norm": 737.009765625, + "learning_rate": 3.522171297884764e-05, + "loss": 110.8381, + "step": 75550 + }, + { + "epoch": 0.30527196111782223, + "grad_norm": 525.6859741210938, + "learning_rate": 3.52199014540636e-05, + "loss": 113.8256, + "step": 75560 + }, + { + "epoch": 0.3053123623831898, + "grad_norm": 822.7090454101562, + "learning_rate": 3.5218089632559744e-05, + "loss": 79.6426, + "step": 75570 + }, + { + "epoch": 0.30535276364855746, + "grad_norm": 296.90704345703125, + "learning_rate": 3.52162775143714e-05, + "loss": 62.7881, + "step": 75580 + }, + { + "epoch": 0.3053931649139251, + "grad_norm": 486.07244873046875, + "learning_rate": 3.521446509953389e-05, + "loss": 81.8727, + "step": 75590 + }, + { + "epoch": 0.30543356617929274, + "grad_norm": 1020.9933471679688, + "learning_rate": 3.521265238808255e-05, + "loss": 71.1298, + "step": 75600 + }, + { + "epoch": 0.3054739674446604, + "grad_norm": 1117.8572998046875, + "learning_rate": 3.521083938005272e-05, + "loss": 100.2724, + "step": 75610 + }, + { + "epoch": 0.305514368710028, + "grad_norm": 543.33447265625, + "learning_rate": 3.520902607547974e-05, + "loss": 72.092, + "step": 75620 + }, + { + "epoch": 0.30555476997539566, + "grad_norm": 801.8721923828125, + "learning_rate": 3.520721247439897e-05, + "loss": 53.5707, + "step": 75630 + }, + { + "epoch": 0.30559517124076324, + "grad_norm": 1049.67626953125, + "learning_rate": 3.520539857684577e-05, + "loss": 121.6607, + "step": 75640 + }, + { + "epoch": 0.3056355725061309, + "grad_norm": 593.3145751953125, + "learning_rate": 3.520358438285548e-05, + "loss": 116.4022, + "step": 75650 + }, + { + "epoch": 0.3056759737714985, + "grad_norm": 900.843994140625, + "learning_rate": 3.5201769892463506e-05, + "loss": 82.4357, + "step": 75660 + }, + { + "epoch": 0.30571637503686616, + "grad_norm": 730.3462524414062, + "learning_rate": 3.519995510570519e-05, + "loss": 108.7263, + "step": 75670 + }, + { + "epoch": 0.3057567763022338, + "grad_norm": 641.1817016601562, + "learning_rate": 3.519814002261593e-05, + "loss": 75.3302, + "step": 75680 + }, + { + "epoch": 0.30579717756760144, + "grad_norm": 1239.102783203125, + "learning_rate": 3.5196324643231094e-05, + "loss": 99.4065, + "step": 75690 + }, + { + "epoch": 0.305837578832969, + "grad_norm": 737.1497192382812, + "learning_rate": 3.51945089675861e-05, + "loss": 67.5064, + "step": 75700 + }, + { + "epoch": 0.30587798009833667, + "grad_norm": 1194.5933837890625, + "learning_rate": 3.5192692995716324e-05, + "loss": 86.7827, + "step": 75710 + }, + { + "epoch": 0.3059183813637043, + "grad_norm": 366.7989807128906, + "learning_rate": 3.519087672765717e-05, + "loss": 100.5204, + "step": 75720 + }, + { + "epoch": 0.30595878262907195, + "grad_norm": 468.67315673828125, + "learning_rate": 3.518906016344406e-05, + "loss": 93.8615, + "step": 75730 + }, + { + "epoch": 0.3059991838944396, + "grad_norm": 244.69961547851562, + "learning_rate": 3.5187243303112406e-05, + "loss": 91.6946, + "step": 75740 + }, + { + "epoch": 0.3060395851598072, + "grad_norm": 537.9833374023438, + "learning_rate": 3.518542614669762e-05, + "loss": 83.1466, + "step": 75750 + }, + { + "epoch": 0.3060799864251748, + "grad_norm": 549.72412109375, + "learning_rate": 3.518360869423514e-05, + "loss": 112.3695, + "step": 75760 + }, + { + "epoch": 0.30612038769054245, + "grad_norm": 3424.054931640625, + "learning_rate": 3.518179094576038e-05, + "loss": 121.9671, + "step": 75770 + }, + { + "epoch": 0.3061607889559101, + "grad_norm": 1080.578369140625, + "learning_rate": 3.5179972901308794e-05, + "loss": 78.1097, + "step": 75780 + }, + { + "epoch": 0.30620119022127773, + "grad_norm": 1149.3614501953125, + "learning_rate": 3.5178154560915825e-05, + "loss": 133.2095, + "step": 75790 + }, + { + "epoch": 0.30624159148664537, + "grad_norm": 1135.28857421875, + "learning_rate": 3.5176335924616916e-05, + "loss": 156.1948, + "step": 75800 + }, + { + "epoch": 0.306281992752013, + "grad_norm": 450.2395324707031, + "learning_rate": 3.517451699244752e-05, + "loss": 137.8065, + "step": 75810 + }, + { + "epoch": 0.30632239401738065, + "grad_norm": 1386.046630859375, + "learning_rate": 3.517269776444311e-05, + "loss": 106.7791, + "step": 75820 + }, + { + "epoch": 0.30636279528274823, + "grad_norm": 1172.8770751953125, + "learning_rate": 3.5170878240639145e-05, + "loss": 121.2994, + "step": 75830 + }, + { + "epoch": 0.3064031965481159, + "grad_norm": 565.5414428710938, + "learning_rate": 3.516905842107109e-05, + "loss": 71.6139, + "step": 75840 + }, + { + "epoch": 0.3064435978134835, + "grad_norm": 1030.6856689453125, + "learning_rate": 3.5167238305774444e-05, + "loss": 96.7907, + "step": 75850 + }, + { + "epoch": 0.30648399907885115, + "grad_norm": 776.51171875, + "learning_rate": 3.516541789478467e-05, + "loss": 116.3786, + "step": 75860 + }, + { + "epoch": 0.3065244003442188, + "grad_norm": 540.6917724609375, + "learning_rate": 3.516359718813727e-05, + "loss": 64.7987, + "step": 75870 + }, + { + "epoch": 0.30656480160958643, + "grad_norm": 733.9385375976562, + "learning_rate": 3.516177618586773e-05, + "loss": 70.9422, + "step": 75880 + }, + { + "epoch": 0.306605202874954, + "grad_norm": 688.9964599609375, + "learning_rate": 3.5159954888011564e-05, + "loss": 122.7526, + "step": 75890 + }, + { + "epoch": 0.30664560414032166, + "grad_norm": 1064.5574951171875, + "learning_rate": 3.515813329460427e-05, + "loss": 62.1875, + "step": 75900 + }, + { + "epoch": 0.3066860054056893, + "grad_norm": 711.2247314453125, + "learning_rate": 3.5156311405681366e-05, + "loss": 55.8943, + "step": 75910 + }, + { + "epoch": 0.30672640667105694, + "grad_norm": 706.8795166015625, + "learning_rate": 3.5154489221278366e-05, + "loss": 95.9284, + "step": 75920 + }, + { + "epoch": 0.3067668079364246, + "grad_norm": 1085.199951171875, + "learning_rate": 3.51526667414308e-05, + "loss": 98.3391, + "step": 75930 + }, + { + "epoch": 0.3068072092017922, + "grad_norm": 577.3673706054688, + "learning_rate": 3.515084396617419e-05, + "loss": 127.5729, + "step": 75940 + }, + { + "epoch": 0.3068476104671598, + "grad_norm": 822.4535522460938, + "learning_rate": 3.514902089554408e-05, + "loss": 115.634, + "step": 75950 + }, + { + "epoch": 0.30688801173252744, + "grad_norm": 591.6640625, + "learning_rate": 3.5147197529576e-05, + "loss": 139.0624, + "step": 75960 + }, + { + "epoch": 0.3069284129978951, + "grad_norm": 626.9274291992188, + "learning_rate": 3.514537386830552e-05, + "loss": 70.475, + "step": 75970 + }, + { + "epoch": 0.3069688142632627, + "grad_norm": 1219.3929443359375, + "learning_rate": 3.5143549911768166e-05, + "loss": 94.192, + "step": 75980 + }, + { + "epoch": 0.30700921552863036, + "grad_norm": 529.3153076171875, + "learning_rate": 3.514172565999952e-05, + "loss": 93.4651, + "step": 75990 + }, + { + "epoch": 0.307049616793998, + "grad_norm": 794.3355712890625, + "learning_rate": 3.513990111303513e-05, + "loss": 106.6991, + "step": 76000 + }, + { + "epoch": 0.30709001805936564, + "grad_norm": 874.5580444335938, + "learning_rate": 3.5138076270910575e-05, + "loss": 92.7786, + "step": 76010 + }, + { + "epoch": 0.30713041932473323, + "grad_norm": 708.5238037109375, + "learning_rate": 3.513625113366144e-05, + "loss": 66.4339, + "step": 76020 + }, + { + "epoch": 0.30717082059010087, + "grad_norm": 505.3689880371094, + "learning_rate": 3.513442570132328e-05, + "loss": 80.2903, + "step": 76030 + }, + { + "epoch": 0.3072112218554685, + "grad_norm": 993.173095703125, + "learning_rate": 3.513259997393171e-05, + "loss": 85.4304, + "step": 76040 + }, + { + "epoch": 0.30725162312083615, + "grad_norm": 1095.616943359375, + "learning_rate": 3.51307739515223e-05, + "loss": 105.3665, + "step": 76050 + }, + { + "epoch": 0.3072920243862038, + "grad_norm": 1705.7646484375, + "learning_rate": 3.512894763413068e-05, + "loss": 141.6371, + "step": 76060 + }, + { + "epoch": 0.30733242565157143, + "grad_norm": 708.2833862304688, + "learning_rate": 3.5127121021792425e-05, + "loss": 85.8733, + "step": 76070 + }, + { + "epoch": 0.307372826916939, + "grad_norm": 563.24169921875, + "learning_rate": 3.512529411454316e-05, + "loss": 88.8286, + "step": 76080 + }, + { + "epoch": 0.30741322818230665, + "grad_norm": 471.5151672363281, + "learning_rate": 3.51234669124185e-05, + "loss": 59.6623, + "step": 76090 + }, + { + "epoch": 0.3074536294476743, + "grad_norm": 1196.919677734375, + "learning_rate": 3.512163941545407e-05, + "loss": 108.7151, + "step": 76100 + }, + { + "epoch": 0.30749403071304193, + "grad_norm": 663.6149291992188, + "learning_rate": 3.511981162368549e-05, + "loss": 92.5439, + "step": 76110 + }, + { + "epoch": 0.3075344319784096, + "grad_norm": 684.4686889648438, + "learning_rate": 3.5117983537148395e-05, + "loss": 93.5788, + "step": 76120 + }, + { + "epoch": 0.3075748332437772, + "grad_norm": 1088.009033203125, + "learning_rate": 3.511615515587843e-05, + "loss": 133.8421, + "step": 76130 + }, + { + "epoch": 0.30761523450914485, + "grad_norm": 658.8212280273438, + "learning_rate": 3.5114326479911244e-05, + "loss": 70.927, + "step": 76140 + }, + { + "epoch": 0.30765563577451244, + "grad_norm": 684.7052001953125, + "learning_rate": 3.5112497509282474e-05, + "loss": 78.3058, + "step": 76150 + }, + { + "epoch": 0.3076960370398801, + "grad_norm": 796.8023681640625, + "learning_rate": 3.511066824402779e-05, + "loss": 160.1838, + "step": 76160 + }, + { + "epoch": 0.3077364383052477, + "grad_norm": 929.03955078125, + "learning_rate": 3.510883868418284e-05, + "loss": 93.0608, + "step": 76170 + }, + { + "epoch": 0.30777683957061536, + "grad_norm": 958.738037109375, + "learning_rate": 3.5107008829783314e-05, + "loss": 100.0772, + "step": 76180 + }, + { + "epoch": 0.307817240835983, + "grad_norm": 459.3719787597656, + "learning_rate": 3.510517868086487e-05, + "loss": 72.2858, + "step": 76190 + }, + { + "epoch": 0.30785764210135064, + "grad_norm": 1110.762939453125, + "learning_rate": 3.5103348237463184e-05, + "loss": 98.8394, + "step": 76200 + }, + { + "epoch": 0.3078980433667182, + "grad_norm": 598.3909912109375, + "learning_rate": 3.510151749961395e-05, + "loss": 133.9623, + "step": 76210 + }, + { + "epoch": 0.30793844463208586, + "grad_norm": 706.9154663085938, + "learning_rate": 3.509968646735287e-05, + "loss": 79.9213, + "step": 76220 + }, + { + "epoch": 0.3079788458974535, + "grad_norm": 2250.949462890625, + "learning_rate": 3.509785514071562e-05, + "loss": 113.8621, + "step": 76230 + }, + { + "epoch": 0.30801924716282114, + "grad_norm": 1004.4246826171875, + "learning_rate": 3.50960235197379e-05, + "loss": 96.694, + "step": 76240 + }, + { + "epoch": 0.3080596484281888, + "grad_norm": 779.1666259765625, + "learning_rate": 3.5094191604455446e-05, + "loss": 70.5833, + "step": 76250 + }, + { + "epoch": 0.3081000496935564, + "grad_norm": 860.2079467773438, + "learning_rate": 3.509235939490394e-05, + "loss": 124.2212, + "step": 76260 + }, + { + "epoch": 0.308140450958924, + "grad_norm": 771.9727172851562, + "learning_rate": 3.509052689111913e-05, + "loss": 116.5388, + "step": 76270 + }, + { + "epoch": 0.30818085222429165, + "grad_norm": 663.447021484375, + "learning_rate": 3.5088694093136726e-05, + "loss": 75.4063, + "step": 76280 + }, + { + "epoch": 0.3082212534896593, + "grad_norm": 1106.0267333984375, + "learning_rate": 3.508686100099246e-05, + "loss": 145.4451, + "step": 76290 + }, + { + "epoch": 0.3082616547550269, + "grad_norm": 908.5853271484375, + "learning_rate": 3.508502761472208e-05, + "loss": 95.575, + "step": 76300 + }, + { + "epoch": 0.30830205602039457, + "grad_norm": 712.333251953125, + "learning_rate": 3.508319393436131e-05, + "loss": 99.2565, + "step": 76310 + }, + { + "epoch": 0.3083424572857622, + "grad_norm": 983.7498168945312, + "learning_rate": 3.5081359959945916e-05, + "loss": 99.7084, + "step": 76320 + }, + { + "epoch": 0.30838285855112985, + "grad_norm": 1541.847412109375, + "learning_rate": 3.5079525691511644e-05, + "loss": 66.37, + "step": 76330 + }, + { + "epoch": 0.30842325981649743, + "grad_norm": 815.22265625, + "learning_rate": 3.507769112909425e-05, + "loss": 109.5607, + "step": 76340 + }, + { + "epoch": 0.30846366108186507, + "grad_norm": 782.0569458007812, + "learning_rate": 3.507585627272951e-05, + "loss": 63.6697, + "step": 76350 + }, + { + "epoch": 0.3085040623472327, + "grad_norm": 469.1524658203125, + "learning_rate": 3.507402112245319e-05, + "loss": 72.9864, + "step": 76360 + }, + { + "epoch": 0.30854446361260035, + "grad_norm": 1317.0535888671875, + "learning_rate": 3.507218567830107e-05, + "loss": 152.878, + "step": 76370 + }, + { + "epoch": 0.308584864877968, + "grad_norm": 619.58447265625, + "learning_rate": 3.507034994030892e-05, + "loss": 85.8027, + "step": 76380 + }, + { + "epoch": 0.30862526614333563, + "grad_norm": 718.7081909179688, + "learning_rate": 3.506851390851255e-05, + "loss": 79.4839, + "step": 76390 + }, + { + "epoch": 0.3086656674087032, + "grad_norm": 1394.074462890625, + "learning_rate": 3.5066677582947744e-05, + "loss": 105.4643, + "step": 76400 + }, + { + "epoch": 0.30870606867407085, + "grad_norm": 797.4261474609375, + "learning_rate": 3.5064840963650295e-05, + "loss": 69.2915, + "step": 76410 + }, + { + "epoch": 0.3087464699394385, + "grad_norm": 495.8677978515625, + "learning_rate": 3.5063004050656016e-05, + "loss": 106.2083, + "step": 76420 + }, + { + "epoch": 0.30878687120480613, + "grad_norm": 1306.0897216796875, + "learning_rate": 3.506116684400072e-05, + "loss": 83.2931, + "step": 76430 + }, + { + "epoch": 0.3088272724701738, + "grad_norm": 555.8763427734375, + "learning_rate": 3.505932934372022e-05, + "loss": 81.8678, + "step": 76440 + }, + { + "epoch": 0.3088676737355414, + "grad_norm": 487.9269104003906, + "learning_rate": 3.505749154985035e-05, + "loss": 77.3646, + "step": 76450 + }, + { + "epoch": 0.30890807500090905, + "grad_norm": 1014.9325561523438, + "learning_rate": 3.505565346242692e-05, + "loss": 98.7378, + "step": 76460 + }, + { + "epoch": 0.30894847626627664, + "grad_norm": 648.1895751953125, + "learning_rate": 3.5053815081485776e-05, + "loss": 115.4346, + "step": 76470 + }, + { + "epoch": 0.3089888775316443, + "grad_norm": 1093.61865234375, + "learning_rate": 3.505197640706276e-05, + "loss": 82.8143, + "step": 76480 + }, + { + "epoch": 0.3090292787970119, + "grad_norm": 698.6439208984375, + "learning_rate": 3.505013743919372e-05, + "loss": 63.4549, + "step": 76490 + }, + { + "epoch": 0.30906968006237956, + "grad_norm": 458.2552490234375, + "learning_rate": 3.504829817791449e-05, + "loss": 92.076, + "step": 76500 + }, + { + "epoch": 0.3091100813277472, + "grad_norm": 402.65966796875, + "learning_rate": 3.5046458623260946e-05, + "loss": 73.5132, + "step": 76510 + }, + { + "epoch": 0.30915048259311484, + "grad_norm": 461.0862121582031, + "learning_rate": 3.5044618775268944e-05, + "loss": 94.3058, + "step": 76520 + }, + { + "epoch": 0.3091908838584824, + "grad_norm": 2305.477783203125, + "learning_rate": 3.5042778633974355e-05, + "loss": 118.6391, + "step": 76530 + }, + { + "epoch": 0.30923128512385006, + "grad_norm": 804.6665649414062, + "learning_rate": 3.504093819941305e-05, + "loss": 92.8512, + "step": 76540 + }, + { + "epoch": 0.3092716863892177, + "grad_norm": 947.3541870117188, + "learning_rate": 3.503909747162091e-05, + "loss": 85.6497, + "step": 76550 + }, + { + "epoch": 0.30931208765458534, + "grad_norm": 1152.4552001953125, + "learning_rate": 3.503725645063383e-05, + "loss": 159.1584, + "step": 76560 + }, + { + "epoch": 0.309352488919953, + "grad_norm": 943.487548828125, + "learning_rate": 3.5035415136487685e-05, + "loss": 105.766, + "step": 76570 + }, + { + "epoch": 0.3093928901853206, + "grad_norm": 793.91748046875, + "learning_rate": 3.503357352921839e-05, + "loss": 84.7495, + "step": 76580 + }, + { + "epoch": 0.3094332914506882, + "grad_norm": 782.8949584960938, + "learning_rate": 3.503173162886183e-05, + "loss": 70.5542, + "step": 76590 + }, + { + "epoch": 0.30947369271605585, + "grad_norm": 1075.4957275390625, + "learning_rate": 3.5029889435453924e-05, + "loss": 108.1243, + "step": 76600 + }, + { + "epoch": 0.3095140939814235, + "grad_norm": 399.3110046386719, + "learning_rate": 3.5028046949030584e-05, + "loss": 111.9371, + "step": 76610 + }, + { + "epoch": 0.30955449524679113, + "grad_norm": 429.1173400878906, + "learning_rate": 3.5026204169627744e-05, + "loss": 140.9614, + "step": 76620 + }, + { + "epoch": 0.30959489651215877, + "grad_norm": 1197.984130859375, + "learning_rate": 3.50243610972813e-05, + "loss": 140.0498, + "step": 76630 + }, + { + "epoch": 0.3096352977775264, + "grad_norm": 508.80816650390625, + "learning_rate": 3.502251773202722e-05, + "loss": 101.9244, + "step": 76640 + }, + { + "epoch": 0.30967569904289405, + "grad_norm": 1104.640869140625, + "learning_rate": 3.5020674073901406e-05, + "loss": 146.0139, + "step": 76650 + }, + { + "epoch": 0.30971610030826163, + "grad_norm": 769.8191528320312, + "learning_rate": 3.501883012293983e-05, + "loss": 107.5063, + "step": 76660 + }, + { + "epoch": 0.3097565015736293, + "grad_norm": 1195.0914306640625, + "learning_rate": 3.501698587917842e-05, + "loss": 67.92, + "step": 76670 + }, + { + "epoch": 0.3097969028389969, + "grad_norm": 551.9175415039062, + "learning_rate": 3.501514134265315e-05, + "loss": 79.7413, + "step": 76680 + }, + { + "epoch": 0.30983730410436455, + "grad_norm": 1002.2484741210938, + "learning_rate": 3.501329651339996e-05, + "loss": 80.8726, + "step": 76690 + }, + { + "epoch": 0.3098777053697322, + "grad_norm": 1090.8453369140625, + "learning_rate": 3.501145139145483e-05, + "loss": 143.7586, + "step": 76700 + }, + { + "epoch": 0.30991810663509983, + "grad_norm": 377.802001953125, + "learning_rate": 3.500960597685372e-05, + "loss": 100.2108, + "step": 76710 + }, + { + "epoch": 0.3099585079004674, + "grad_norm": 1834.626953125, + "learning_rate": 3.500776026963262e-05, + "loss": 125.764, + "step": 76720 + }, + { + "epoch": 0.30999890916583506, + "grad_norm": 428.42315673828125, + "learning_rate": 3.50059142698275e-05, + "loss": 59.1343, + "step": 76730 + }, + { + "epoch": 0.3100393104312027, + "grad_norm": 822.8917236328125, + "learning_rate": 3.500406797747436e-05, + "loss": 86.4992, + "step": 76740 + }, + { + "epoch": 0.31007971169657034, + "grad_norm": 888.9019165039062, + "learning_rate": 3.5002221392609196e-05, + "loss": 68.6775, + "step": 76750 + }, + { + "epoch": 0.310120112961938, + "grad_norm": 1310.589599609375, + "learning_rate": 3.5000374515268e-05, + "loss": 139.7333, + "step": 76760 + }, + { + "epoch": 0.3101605142273056, + "grad_norm": 761.132080078125, + "learning_rate": 3.499852734548677e-05, + "loss": 82.7654, + "step": 76770 + }, + { + "epoch": 0.31020091549267326, + "grad_norm": 1258.5445556640625, + "learning_rate": 3.4996679883301535e-05, + "loss": 86.2607, + "step": 76780 + }, + { + "epoch": 0.31024131675804084, + "grad_norm": 547.8323364257812, + "learning_rate": 3.49948321287483e-05, + "loss": 78.4961, + "step": 76790 + }, + { + "epoch": 0.3102817180234085, + "grad_norm": 1125.9188232421875, + "learning_rate": 3.49929840818631e-05, + "loss": 90.3732, + "step": 76800 + }, + { + "epoch": 0.3103221192887761, + "grad_norm": 460.8324279785156, + "learning_rate": 3.499113574268196e-05, + "loss": 65.8841, + "step": 76810 + }, + { + "epoch": 0.31036252055414376, + "grad_norm": 928.0711059570312, + "learning_rate": 3.49892871112409e-05, + "loss": 99.5523, + "step": 76820 + }, + { + "epoch": 0.3104029218195114, + "grad_norm": 1088.3243408203125, + "learning_rate": 3.498743818757598e-05, + "loss": 85.5138, + "step": 76830 + }, + { + "epoch": 0.31044332308487904, + "grad_norm": 1083.9697265625, + "learning_rate": 3.498558897172324e-05, + "loss": 118.7722, + "step": 76840 + }, + { + "epoch": 0.3104837243502466, + "grad_norm": 820.591796875, + "learning_rate": 3.4983739463718706e-05, + "loss": 73.5572, + "step": 76850 + }, + { + "epoch": 0.31052412561561427, + "grad_norm": 353.7636413574219, + "learning_rate": 3.498188966359848e-05, + "loss": 126.5526, + "step": 76860 + }, + { + "epoch": 0.3105645268809819, + "grad_norm": 1057.5396728515625, + "learning_rate": 3.498003957139859e-05, + "loss": 80.7668, + "step": 76870 + }, + { + "epoch": 0.31060492814634955, + "grad_norm": 169.22789001464844, + "learning_rate": 3.4978189187155114e-05, + "loss": 88.0607, + "step": 76880 + }, + { + "epoch": 0.3106453294117172, + "grad_norm": 843.0842895507812, + "learning_rate": 3.4976338510904134e-05, + "loss": 70.9406, + "step": 76890 + }, + { + "epoch": 0.3106857306770848, + "grad_norm": 717.1528930664062, + "learning_rate": 3.4974487542681724e-05, + "loss": 67.7798, + "step": 76900 + }, + { + "epoch": 0.3107261319424524, + "grad_norm": 526.6531982421875, + "learning_rate": 3.497263628252397e-05, + "loss": 79.4017, + "step": 76910 + }, + { + "epoch": 0.31076653320782005, + "grad_norm": 948.5043334960938, + "learning_rate": 3.497078473046697e-05, + "loss": 83.2203, + "step": 76920 + }, + { + "epoch": 0.3108069344731877, + "grad_norm": 470.4901428222656, + "learning_rate": 3.49689328865468e-05, + "loss": 71.0031, + "step": 76930 + }, + { + "epoch": 0.31084733573855533, + "grad_norm": 482.288818359375, + "learning_rate": 3.496708075079959e-05, + "loss": 52.9518, + "step": 76940 + }, + { + "epoch": 0.31088773700392297, + "grad_norm": 1077.14111328125, + "learning_rate": 3.496522832326143e-05, + "loss": 132.4006, + "step": 76950 + }, + { + "epoch": 0.3109281382692906, + "grad_norm": 776.029052734375, + "learning_rate": 3.496337560396844e-05, + "loss": 68.8906, + "step": 76960 + }, + { + "epoch": 0.31096853953465825, + "grad_norm": 886.7988891601562, + "learning_rate": 3.496152259295673e-05, + "loss": 95.0891, + "step": 76970 + }, + { + "epoch": 0.31100894080002583, + "grad_norm": 1059.6234130859375, + "learning_rate": 3.495966929026244e-05, + "loss": 77.9053, + "step": 76980 + }, + { + "epoch": 0.3110493420653935, + "grad_norm": 1070.8516845703125, + "learning_rate": 3.49578156959217e-05, + "loss": 82.1329, + "step": 76990 + }, + { + "epoch": 0.3110897433307611, + "grad_norm": 1466.452880859375, + "learning_rate": 3.495596180997064e-05, + "loss": 133.6303, + "step": 77000 + }, + { + "epoch": 0.31113014459612875, + "grad_norm": 1020.1854248046875, + "learning_rate": 3.495410763244541e-05, + "loss": 111.2496, + "step": 77010 + }, + { + "epoch": 0.3111705458614964, + "grad_norm": 964.2380981445312, + "learning_rate": 3.4952253163382144e-05, + "loss": 74.932, + "step": 77020 + }, + { + "epoch": 0.31121094712686403, + "grad_norm": 868.1168823242188, + "learning_rate": 3.4950398402817006e-05, + "loss": 123.6613, + "step": 77030 + }, + { + "epoch": 0.3112513483922316, + "grad_norm": 454.317626953125, + "learning_rate": 3.4948543350786156e-05, + "loss": 69.811, + "step": 77040 + }, + { + "epoch": 0.31129174965759926, + "grad_norm": 342.3891906738281, + "learning_rate": 3.494668800732575e-05, + "loss": 82.828, + "step": 77050 + }, + { + "epoch": 0.3113321509229669, + "grad_norm": 943.3175048828125, + "learning_rate": 3.4944832372471977e-05, + "loss": 113.1843, + "step": 77060 + }, + { + "epoch": 0.31137255218833454, + "grad_norm": 814.3349609375, + "learning_rate": 3.4942976446261e-05, + "loss": 89.6647, + "step": 77070 + }, + { + "epoch": 0.3114129534537022, + "grad_norm": 519.8678588867188, + "learning_rate": 3.494112022872901e-05, + "loss": 87.3495, + "step": 77080 + }, + { + "epoch": 0.3114533547190698, + "grad_norm": 3497.483642578125, + "learning_rate": 3.493926371991218e-05, + "loss": 92.7522, + "step": 77090 + }, + { + "epoch": 0.31149375598443746, + "grad_norm": 956.0463256835938, + "learning_rate": 3.493740691984672e-05, + "loss": 97.3672, + "step": 77100 + }, + { + "epoch": 0.31153415724980504, + "grad_norm": 607.5399780273438, + "learning_rate": 3.4935549828568807e-05, + "loss": 77.453, + "step": 77110 + }, + { + "epoch": 0.3115745585151727, + "grad_norm": 521.7297973632812, + "learning_rate": 3.493369244611467e-05, + "loss": 131.4988, + "step": 77120 + }, + { + "epoch": 0.3116149597805403, + "grad_norm": 444.5405578613281, + "learning_rate": 3.493183477252051e-05, + "loss": 92.0708, + "step": 77130 + }, + { + "epoch": 0.31165536104590796, + "grad_norm": 1005.0468139648438, + "learning_rate": 3.4929976807822546e-05, + "loss": 77.8784, + "step": 77140 + }, + { + "epoch": 0.3116957623112756, + "grad_norm": 513.6875610351562, + "learning_rate": 3.4928118552056994e-05, + "loss": 57.7453, + "step": 77150 + }, + { + "epoch": 0.31173616357664324, + "grad_norm": 2956.280029296875, + "learning_rate": 3.492626000526008e-05, + "loss": 153.2668, + "step": 77160 + }, + { + "epoch": 0.31177656484201083, + "grad_norm": 844.9546508789062, + "learning_rate": 3.492440116746805e-05, + "loss": 79.4886, + "step": 77170 + }, + { + "epoch": 0.31181696610737847, + "grad_norm": 322.8497314453125, + "learning_rate": 3.492254203871714e-05, + "loss": 114.3675, + "step": 77180 + }, + { + "epoch": 0.3118573673727461, + "grad_norm": 1457.4068603515625, + "learning_rate": 3.4920682619043584e-05, + "loss": 97.8785, + "step": 77190 + }, + { + "epoch": 0.31189776863811375, + "grad_norm": 566.4368896484375, + "learning_rate": 3.4918822908483645e-05, + "loss": 82.1013, + "step": 77200 + }, + { + "epoch": 0.3119381699034814, + "grad_norm": 445.5070495605469, + "learning_rate": 3.491696290707357e-05, + "loss": 80.2579, + "step": 77210 + }, + { + "epoch": 0.31197857116884903, + "grad_norm": 1039.0986328125, + "learning_rate": 3.491510261484962e-05, + "loss": 98.2772, + "step": 77220 + }, + { + "epoch": 0.3120189724342166, + "grad_norm": 1061.1866455078125, + "learning_rate": 3.4913242031848064e-05, + "loss": 89.6608, + "step": 77230 + }, + { + "epoch": 0.31205937369958425, + "grad_norm": 469.5644836425781, + "learning_rate": 3.4911381158105185e-05, + "loss": 98.1146, + "step": 77240 + }, + { + "epoch": 0.3120997749649519, + "grad_norm": 844.3052368164062, + "learning_rate": 3.4909519993657244e-05, + "loss": 83.0805, + "step": 77250 + }, + { + "epoch": 0.31214017623031953, + "grad_norm": 1005.2880859375, + "learning_rate": 3.490765853854054e-05, + "loss": 162.4562, + "step": 77260 + }, + { + "epoch": 0.3121805774956872, + "grad_norm": 671.4625244140625, + "learning_rate": 3.490579679279136e-05, + "loss": 72.3943, + "step": 77270 + }, + { + "epoch": 0.3122209787610548, + "grad_norm": 960.1744384765625, + "learning_rate": 3.4903934756445995e-05, + "loss": 64.6194, + "step": 77280 + }, + { + "epoch": 0.31226138002642245, + "grad_norm": 796.4124755859375, + "learning_rate": 3.490207242954075e-05, + "loss": 113.5335, + "step": 77290 + }, + { + "epoch": 0.31230178129179004, + "grad_norm": 781.4956665039062, + "learning_rate": 3.4900209812111927e-05, + "loss": 99.9743, + "step": 77300 + }, + { + "epoch": 0.3123421825571577, + "grad_norm": 1052.9482421875, + "learning_rate": 3.489834690419584e-05, + "loss": 75.7307, + "step": 77310 + }, + { + "epoch": 0.3123825838225253, + "grad_norm": 729.5875244140625, + "learning_rate": 3.489648370582882e-05, + "loss": 125.4541, + "step": 77320 + }, + { + "epoch": 0.31242298508789296, + "grad_norm": 938.28173828125, + "learning_rate": 3.489462021704717e-05, + "loss": 114.9556, + "step": 77330 + }, + { + "epoch": 0.3124633863532606, + "grad_norm": 628.4651489257812, + "learning_rate": 3.489275643788724e-05, + "loss": 99.3214, + "step": 77340 + }, + { + "epoch": 0.31250378761862824, + "grad_norm": 1086.796142578125, + "learning_rate": 3.489089236838535e-05, + "loss": 127.3169, + "step": 77350 + }, + { + "epoch": 0.3125441888839958, + "grad_norm": 1736.28076171875, + "learning_rate": 3.488902800857785e-05, + "loss": 105.4304, + "step": 77360 + }, + { + "epoch": 0.31258459014936346, + "grad_norm": 671.3499145507812, + "learning_rate": 3.488716335850108e-05, + "loss": 85.8676, + "step": 77370 + }, + { + "epoch": 0.3126249914147311, + "grad_norm": 591.64501953125, + "learning_rate": 3.4885298418191405e-05, + "loss": 89.2911, + "step": 77380 + }, + { + "epoch": 0.31266539268009874, + "grad_norm": 1476.5181884765625, + "learning_rate": 3.488343318768516e-05, + "loss": 96.1556, + "step": 77390 + }, + { + "epoch": 0.3127057939454664, + "grad_norm": 602.7538452148438, + "learning_rate": 3.488156766701873e-05, + "loss": 121.7768, + "step": 77400 + }, + { + "epoch": 0.312746195210834, + "grad_norm": 884.7041625976562, + "learning_rate": 3.487970185622848e-05, + "loss": 125.3914, + "step": 77410 + }, + { + "epoch": 0.31278659647620166, + "grad_norm": 584.8744506835938, + "learning_rate": 3.487783575535078e-05, + "loss": 75.1792, + "step": 77420 + }, + { + "epoch": 0.31282699774156925, + "grad_norm": 886.2075805664062, + "learning_rate": 3.487596936442201e-05, + "loss": 96.2816, + "step": 77430 + }, + { + "epoch": 0.3128673990069369, + "grad_norm": 582.9907836914062, + "learning_rate": 3.487410268347856e-05, + "loss": 65.817, + "step": 77440 + }, + { + "epoch": 0.3129078002723045, + "grad_norm": 662.0589599609375, + "learning_rate": 3.487223571255682e-05, + "loss": 152.8795, + "step": 77450 + }, + { + "epoch": 0.31294820153767217, + "grad_norm": 623.9113159179688, + "learning_rate": 3.4870368451693184e-05, + "loss": 97.6626, + "step": 77460 + }, + { + "epoch": 0.3129886028030398, + "grad_norm": 536.98876953125, + "learning_rate": 3.486850090092407e-05, + "loss": 101.7488, + "step": 77470 + }, + { + "epoch": 0.31302900406840745, + "grad_norm": 731.0277709960938, + "learning_rate": 3.486663306028587e-05, + "loss": 143.5821, + "step": 77480 + }, + { + "epoch": 0.31306940533377503, + "grad_norm": 661.9712524414062, + "learning_rate": 3.4864764929815e-05, + "loss": 52.5121, + "step": 77490 + }, + { + "epoch": 0.31310980659914267, + "grad_norm": 1293.75439453125, + "learning_rate": 3.4862896509547886e-05, + "loss": 132.8449, + "step": 77500 + }, + { + "epoch": 0.3131502078645103, + "grad_norm": 2692.822021484375, + "learning_rate": 3.4861027799520956e-05, + "loss": 153.305, + "step": 77510 + }, + { + "epoch": 0.31319060912987795, + "grad_norm": 990.052734375, + "learning_rate": 3.4859158799770635e-05, + "loss": 84.5507, + "step": 77520 + }, + { + "epoch": 0.3132310103952456, + "grad_norm": 657.1859130859375, + "learning_rate": 3.4857289510333365e-05, + "loss": 51.9203, + "step": 77530 + }, + { + "epoch": 0.31327141166061323, + "grad_norm": 1808.2978515625, + "learning_rate": 3.485541993124559e-05, + "loss": 102.8697, + "step": 77540 + }, + { + "epoch": 0.3133118129259808, + "grad_norm": 1157.9005126953125, + "learning_rate": 3.485355006254375e-05, + "loss": 92.1138, + "step": 77550 + }, + { + "epoch": 0.31335221419134845, + "grad_norm": 687.4346923828125, + "learning_rate": 3.4851679904264314e-05, + "loss": 72.2939, + "step": 77560 + }, + { + "epoch": 0.3133926154567161, + "grad_norm": 459.27734375, + "learning_rate": 3.4849809456443725e-05, + "loss": 59.809, + "step": 77570 + }, + { + "epoch": 0.31343301672208373, + "grad_norm": 1244.100341796875, + "learning_rate": 3.484793871911845e-05, + "loss": 103.7575, + "step": 77580 + }, + { + "epoch": 0.3134734179874514, + "grad_norm": 1192.069091796875, + "learning_rate": 3.4846067692324976e-05, + "loss": 80.5278, + "step": 77590 + }, + { + "epoch": 0.313513819252819, + "grad_norm": 931.0227661132812, + "learning_rate": 3.484419637609977e-05, + "loss": 103.4596, + "step": 77600 + }, + { + "epoch": 0.31355422051818665, + "grad_norm": 477.1709899902344, + "learning_rate": 3.48423247704793e-05, + "loss": 75.4355, + "step": 77610 + }, + { + "epoch": 0.31359462178355424, + "grad_norm": 562.46923828125, + "learning_rate": 3.484045287550007e-05, + "loss": 89.7808, + "step": 77620 + }, + { + "epoch": 0.3136350230489219, + "grad_norm": 739.185546875, + "learning_rate": 3.4838580691198584e-05, + "loss": 106.0625, + "step": 77630 + }, + { + "epoch": 0.3136754243142895, + "grad_norm": 789.5540161132812, + "learning_rate": 3.4836708217611316e-05, + "loss": 130.853, + "step": 77640 + }, + { + "epoch": 0.31371582557965716, + "grad_norm": 905.3645629882812, + "learning_rate": 3.4834835454774784e-05, + "loss": 74.479, + "step": 77650 + }, + { + "epoch": 0.3137562268450248, + "grad_norm": 1567.3680419921875, + "learning_rate": 3.48329624027255e-05, + "loss": 101.3008, + "step": 77660 + }, + { + "epoch": 0.31379662811039244, + "grad_norm": 824.4033203125, + "learning_rate": 3.4831089061499975e-05, + "loss": 62.5018, + "step": 77670 + }, + { + "epoch": 0.31383702937576, + "grad_norm": 432.3334045410156, + "learning_rate": 3.482921543113474e-05, + "loss": 101.7515, + "step": 77680 + }, + { + "epoch": 0.31387743064112766, + "grad_norm": 914.857666015625, + "learning_rate": 3.4827341511666315e-05, + "loss": 76.1613, + "step": 77690 + }, + { + "epoch": 0.3139178319064953, + "grad_norm": 901.6448364257812, + "learning_rate": 3.482546730313122e-05, + "loss": 61.8378, + "step": 77700 + }, + { + "epoch": 0.31395823317186294, + "grad_norm": 795.8719482421875, + "learning_rate": 3.482359280556602e-05, + "loss": 100.6263, + "step": 77710 + }, + { + "epoch": 0.3139986344372306, + "grad_norm": 743.9913940429688, + "learning_rate": 3.482171801900725e-05, + "loss": 122.5955, + "step": 77720 + }, + { + "epoch": 0.3140390357025982, + "grad_norm": 938.9588012695312, + "learning_rate": 3.481984294349145e-05, + "loss": 128.5234, + "step": 77730 + }, + { + "epoch": 0.31407943696796586, + "grad_norm": 692.3013916015625, + "learning_rate": 3.4817967579055176e-05, + "loss": 93.2129, + "step": 77740 + }, + { + "epoch": 0.31411983823333345, + "grad_norm": 844.73681640625, + "learning_rate": 3.4816091925735e-05, + "loss": 89.2146, + "step": 77750 + }, + { + "epoch": 0.3141602394987011, + "grad_norm": 825.7650146484375, + "learning_rate": 3.481421598356749e-05, + "loss": 104.3436, + "step": 77760 + }, + { + "epoch": 0.31420064076406873, + "grad_norm": 1027.826904296875, + "learning_rate": 3.4812339752589206e-05, + "loss": 90.9697, + "step": 77770 + }, + { + "epoch": 0.31424104202943637, + "grad_norm": 544.6959838867188, + "learning_rate": 3.481046323283674e-05, + "loss": 71.616, + "step": 77780 + }, + { + "epoch": 0.314281443294804, + "grad_norm": 1894.4010009765625, + "learning_rate": 3.480858642434666e-05, + "loss": 106.7203, + "step": 77790 + }, + { + "epoch": 0.31432184456017165, + "grad_norm": 1159.7000732421875, + "learning_rate": 3.4806709327155564e-05, + "loss": 82.139, + "step": 77800 + }, + { + "epoch": 0.31436224582553923, + "grad_norm": 1989.4466552734375, + "learning_rate": 3.480483194130005e-05, + "loss": 82.4573, + "step": 77810 + }, + { + "epoch": 0.3144026470909069, + "grad_norm": 912.6419067382812, + "learning_rate": 3.480295426681671e-05, + "loss": 78.801, + "step": 77820 + }, + { + "epoch": 0.3144430483562745, + "grad_norm": 414.7445373535156, + "learning_rate": 3.480107630374217e-05, + "loss": 74.3807, + "step": 77830 + }, + { + "epoch": 0.31448344962164215, + "grad_norm": 710.1405029296875, + "learning_rate": 3.479919805211301e-05, + "loss": 69.7373, + "step": 77840 + }, + { + "epoch": 0.3145238508870098, + "grad_norm": 641.68798828125, + "learning_rate": 3.4797319511965875e-05, + "loss": 97.9844, + "step": 77850 + }, + { + "epoch": 0.31456425215237743, + "grad_norm": 1012.924560546875, + "learning_rate": 3.479544068333737e-05, + "loss": 115.1293, + "step": 77860 + }, + { + "epoch": 0.314604653417745, + "grad_norm": 778.8377685546875, + "learning_rate": 3.479356156626414e-05, + "loss": 68.7793, + "step": 77870 + }, + { + "epoch": 0.31464505468311266, + "grad_norm": 1125.3165283203125, + "learning_rate": 3.479168216078281e-05, + "loss": 85.359, + "step": 77880 + }, + { + "epoch": 0.3146854559484803, + "grad_norm": 759.9095458984375, + "learning_rate": 3.478980246693001e-05, + "loss": 104.7082, + "step": 77890 + }, + { + "epoch": 0.31472585721384794, + "grad_norm": 1693.17626953125, + "learning_rate": 3.478792248474241e-05, + "loss": 119.3115, + "step": 77900 + }, + { + "epoch": 0.3147662584792156, + "grad_norm": 937.4721069335938, + "learning_rate": 3.478604221425665e-05, + "loss": 77.7233, + "step": 77910 + }, + { + "epoch": 0.3148066597445832, + "grad_norm": 809.5877685546875, + "learning_rate": 3.478416165550938e-05, + "loss": 107.9146, + "step": 77920 + }, + { + "epoch": 0.31484706100995086, + "grad_norm": 567.4949340820312, + "learning_rate": 3.478228080853726e-05, + "loss": 84.1518, + "step": 77930 + }, + { + "epoch": 0.31488746227531844, + "grad_norm": 721.0872192382812, + "learning_rate": 3.478039967337697e-05, + "loss": 110.4732, + "step": 77940 + }, + { + "epoch": 0.3149278635406861, + "grad_norm": 1356.9036865234375, + "learning_rate": 3.477851825006518e-05, + "loss": 97.6304, + "step": 77950 + }, + { + "epoch": 0.3149682648060537, + "grad_norm": 3366.818359375, + "learning_rate": 3.4776636538638565e-05, + "loss": 110.8249, + "step": 77960 + }, + { + "epoch": 0.31500866607142136, + "grad_norm": 694.39013671875, + "learning_rate": 3.477475453913381e-05, + "loss": 72.7014, + "step": 77970 + }, + { + "epoch": 0.315049067336789, + "grad_norm": 517.5498657226562, + "learning_rate": 3.477287225158762e-05, + "loss": 56.2855, + "step": 77980 + }, + { + "epoch": 0.31508946860215664, + "grad_norm": 716.9921875, + "learning_rate": 3.477098967603667e-05, + "loss": 88.6222, + "step": 77990 + }, + { + "epoch": 0.3151298698675242, + "grad_norm": 1812.595458984375, + "learning_rate": 3.4769106812517685e-05, + "loss": 102.4695, + "step": 78000 + }, + { + "epoch": 0.31517027113289187, + "grad_norm": 478.2683410644531, + "learning_rate": 3.476722366106734e-05, + "loss": 68.2268, + "step": 78010 + }, + { + "epoch": 0.3152106723982595, + "grad_norm": 883.70068359375, + "learning_rate": 3.476534022172238e-05, + "loss": 81.3613, + "step": 78020 + }, + { + "epoch": 0.31525107366362715, + "grad_norm": 550.927001953125, + "learning_rate": 3.4763456494519505e-05, + "loss": 99.5034, + "step": 78030 + }, + { + "epoch": 0.3152914749289948, + "grad_norm": 977.967529296875, + "learning_rate": 3.476157247949545e-05, + "loss": 101.8803, + "step": 78040 + }, + { + "epoch": 0.3153318761943624, + "grad_norm": 618.2531127929688, + "learning_rate": 3.475968817668694e-05, + "loss": 94.9827, + "step": 78050 + }, + { + "epoch": 0.31537227745973007, + "grad_norm": 481.7696533203125, + "learning_rate": 3.47578035861307e-05, + "loss": 57.5695, + "step": 78060 + }, + { + "epoch": 0.31541267872509765, + "grad_norm": 402.8597106933594, + "learning_rate": 3.475591870786349e-05, + "loss": 81.9191, + "step": 78070 + }, + { + "epoch": 0.3154530799904653, + "grad_norm": 598.0117797851562, + "learning_rate": 3.4754033541922054e-05, + "loss": 71.7262, + "step": 78080 + }, + { + "epoch": 0.31549348125583293, + "grad_norm": 686.9157104492188, + "learning_rate": 3.475214808834313e-05, + "loss": 77.7943, + "step": 78090 + }, + { + "epoch": 0.31553388252120057, + "grad_norm": 498.7819519042969, + "learning_rate": 3.475026234716348e-05, + "loss": 86.212, + "step": 78100 + }, + { + "epoch": 0.3155742837865682, + "grad_norm": 914.6249389648438, + "learning_rate": 3.474837631841988e-05, + "loss": 102.1395, + "step": 78110 + }, + { + "epoch": 0.31561468505193585, + "grad_norm": 569.9537353515625, + "learning_rate": 3.474649000214909e-05, + "loss": 75.1848, + "step": 78120 + }, + { + "epoch": 0.31565508631730343, + "grad_norm": 717.9462890625, + "learning_rate": 3.474460339838788e-05, + "loss": 80.5611, + "step": 78130 + }, + { + "epoch": 0.3156954875826711, + "grad_norm": 569.4465942382812, + "learning_rate": 3.474271650717303e-05, + "loss": 101.5632, + "step": 78140 + }, + { + "epoch": 0.3157358888480387, + "grad_norm": 439.73687744140625, + "learning_rate": 3.474082932854135e-05, + "loss": 102.4823, + "step": 78150 + }, + { + "epoch": 0.31577629011340635, + "grad_norm": 734.2001342773438, + "learning_rate": 3.47389418625296e-05, + "loss": 91.6151, + "step": 78160 + }, + { + "epoch": 0.315816691378774, + "grad_norm": 688.1524047851562, + "learning_rate": 3.4737054109174596e-05, + "loss": 86.237, + "step": 78170 + }, + { + "epoch": 0.31585709264414163, + "grad_norm": 985.9812622070312, + "learning_rate": 3.473516606851313e-05, + "loss": 87.9724, + "step": 78180 + }, + { + "epoch": 0.3158974939095092, + "grad_norm": 883.1956787109375, + "learning_rate": 3.473327774058201e-05, + "loss": 110.9215, + "step": 78190 + }, + { + "epoch": 0.31593789517487686, + "grad_norm": 1497.2152099609375, + "learning_rate": 3.473138912541807e-05, + "loss": 169.68, + "step": 78200 + }, + { + "epoch": 0.3159782964402445, + "grad_norm": 997.1405639648438, + "learning_rate": 3.472950022305811e-05, + "loss": 83.2763, + "step": 78210 + }, + { + "epoch": 0.31601869770561214, + "grad_norm": 507.517578125, + "learning_rate": 3.472761103353895e-05, + "loss": 144.3234, + "step": 78220 + }, + { + "epoch": 0.3160590989709798, + "grad_norm": 585.2775268554688, + "learning_rate": 3.472572155689744e-05, + "loss": 94.692, + "step": 78230 + }, + { + "epoch": 0.3160995002363474, + "grad_norm": 807.8973999023438, + "learning_rate": 3.4723831793170406e-05, + "loss": 67.1075, + "step": 78240 + }, + { + "epoch": 0.31613990150171506, + "grad_norm": 469.02655029296875, + "learning_rate": 3.4721941742394694e-05, + "loss": 66.7324, + "step": 78250 + }, + { + "epoch": 0.31618030276708264, + "grad_norm": 1416.3599853515625, + "learning_rate": 3.472005140460714e-05, + "loss": 70.483, + "step": 78260 + }, + { + "epoch": 0.3162207040324503, + "grad_norm": 587.4964599609375, + "learning_rate": 3.471816077984461e-05, + "loss": 92.1445, + "step": 78270 + }, + { + "epoch": 0.3162611052978179, + "grad_norm": 734.6630249023438, + "learning_rate": 3.4716269868143956e-05, + "loss": 104.0885, + "step": 78280 + }, + { + "epoch": 0.31630150656318556, + "grad_norm": 842.4596557617188, + "learning_rate": 3.4714378669542046e-05, + "loss": 78.2841, + "step": 78290 + }, + { + "epoch": 0.3163419078285532, + "grad_norm": 525.1878662109375, + "learning_rate": 3.471248718407575e-05, + "loss": 112.7872, + "step": 78300 + }, + { + "epoch": 0.31638230909392084, + "grad_norm": 668.1328125, + "learning_rate": 3.471059541178194e-05, + "loss": 71.4249, + "step": 78310 + }, + { + "epoch": 0.31642271035928843, + "grad_norm": 299.9229736328125, + "learning_rate": 3.4708703352697496e-05, + "loss": 84.3567, + "step": 78320 + }, + { + "epoch": 0.31646311162465607, + "grad_norm": 923.4268188476562, + "learning_rate": 3.4706811006859315e-05, + "loss": 98.1253, + "step": 78330 + }, + { + "epoch": 0.3165035128900237, + "grad_norm": 1233.6376953125, + "learning_rate": 3.470491837430428e-05, + "loss": 167.3799, + "step": 78340 + }, + { + "epoch": 0.31654391415539135, + "grad_norm": 380.12677001953125, + "learning_rate": 3.470302545506929e-05, + "loss": 76.2696, + "step": 78350 + }, + { + "epoch": 0.316584315420759, + "grad_norm": 507.4176025390625, + "learning_rate": 3.4701132249191245e-05, + "loss": 84.2043, + "step": 78360 + }, + { + "epoch": 0.31662471668612663, + "grad_norm": 573.1121215820312, + "learning_rate": 3.469923875670706e-05, + "loss": 119.8759, + "step": 78370 + }, + { + "epoch": 0.31666511795149427, + "grad_norm": 706.2188720703125, + "learning_rate": 3.469734497765365e-05, + "loss": 103.6108, + "step": 78380 + }, + { + "epoch": 0.31670551921686185, + "grad_norm": 653.966064453125, + "learning_rate": 3.469545091206793e-05, + "loss": 91.322, + "step": 78390 + }, + { + "epoch": 0.3167459204822295, + "grad_norm": 688.5867919921875, + "learning_rate": 3.469355655998683e-05, + "loss": 111.7998, + "step": 78400 + }, + { + "epoch": 0.31678632174759713, + "grad_norm": 594.2401123046875, + "learning_rate": 3.4691661921447284e-05, + "loss": 107.1984, + "step": 78410 + }, + { + "epoch": 0.3168267230129648, + "grad_norm": 1211.953369140625, + "learning_rate": 3.468976699648622e-05, + "loss": 99.6002, + "step": 78420 + }, + { + "epoch": 0.3168671242783324, + "grad_norm": 274.7460021972656, + "learning_rate": 3.4687871785140586e-05, + "loss": 67.7114, + "step": 78430 + }, + { + "epoch": 0.31690752554370005, + "grad_norm": 305.4955749511719, + "learning_rate": 3.4685976287447326e-05, + "loss": 79.9992, + "step": 78440 + }, + { + "epoch": 0.31694792680906764, + "grad_norm": 474.1764221191406, + "learning_rate": 3.4684080503443405e-05, + "loss": 108.5068, + "step": 78450 + }, + { + "epoch": 0.3169883280744353, + "grad_norm": 662.6573486328125, + "learning_rate": 3.468218443316577e-05, + "loss": 106.6133, + "step": 78460 + }, + { + "epoch": 0.3170287293398029, + "grad_norm": 1294.34521484375, + "learning_rate": 3.46802880766514e-05, + "loss": 91.0747, + "step": 78470 + }, + { + "epoch": 0.31706913060517056, + "grad_norm": 717.0211791992188, + "learning_rate": 3.467839143393724e-05, + "loss": 92.1435, + "step": 78480 + }, + { + "epoch": 0.3171095318705382, + "grad_norm": 404.4720764160156, + "learning_rate": 3.46764945050603e-05, + "loss": 102.284, + "step": 78490 + }, + { + "epoch": 0.31714993313590584, + "grad_norm": 1606.381591796875, + "learning_rate": 3.467459729005753e-05, + "loss": 100.9153, + "step": 78500 + }, + { + "epoch": 0.3171903344012734, + "grad_norm": 1482.5928955078125, + "learning_rate": 3.467269978896594e-05, + "loss": 133.3602, + "step": 78510 + }, + { + "epoch": 0.31723073566664106, + "grad_norm": 457.95294189453125, + "learning_rate": 3.467080200182251e-05, + "loss": 75.8664, + "step": 78520 + }, + { + "epoch": 0.3172711369320087, + "grad_norm": 472.1038818359375, + "learning_rate": 3.4668903928664234e-05, + "loss": 82.9772, + "step": 78530 + }, + { + "epoch": 0.31731153819737634, + "grad_norm": 1395.8800048828125, + "learning_rate": 3.4667005569528134e-05, + "loss": 65.3352, + "step": 78540 + }, + { + "epoch": 0.317351939462744, + "grad_norm": 1291.5982666015625, + "learning_rate": 3.466510692445121e-05, + "loss": 86.7709, + "step": 78550 + }, + { + "epoch": 0.3173923407281116, + "grad_norm": 1256.736572265625, + "learning_rate": 3.4663207993470466e-05, + "loss": 78.3439, + "step": 78560 + }, + { + "epoch": 0.31743274199347926, + "grad_norm": 500.3243713378906, + "learning_rate": 3.466130877662294e-05, + "loss": 65.9436, + "step": 78570 + }, + { + "epoch": 0.31747314325884685, + "grad_norm": 602.9740600585938, + "learning_rate": 3.465940927394565e-05, + "loss": 89.1936, + "step": 78580 + }, + { + "epoch": 0.3175135445242145, + "grad_norm": 856.4161376953125, + "learning_rate": 3.465750948547563e-05, + "loss": 81.6979, + "step": 78590 + }, + { + "epoch": 0.3175539457895821, + "grad_norm": 745.029541015625, + "learning_rate": 3.465560941124992e-05, + "loss": 78.8543, + "step": 78600 + }, + { + "epoch": 0.31759434705494977, + "grad_norm": 614.9755859375, + "learning_rate": 3.4653709051305546e-05, + "loss": 61.0767, + "step": 78610 + }, + { + "epoch": 0.3176347483203174, + "grad_norm": 835.8876953125, + "learning_rate": 3.465180840567958e-05, + "loss": 111.5914, + "step": 78620 + }, + { + "epoch": 0.31767514958568505, + "grad_norm": 581.6763305664062, + "learning_rate": 3.4649907474409074e-05, + "loss": 101.0512, + "step": 78630 + }, + { + "epoch": 0.31771555085105263, + "grad_norm": 878.210693359375, + "learning_rate": 3.464800625753107e-05, + "loss": 75.3421, + "step": 78640 + }, + { + "epoch": 0.31775595211642027, + "grad_norm": 1088.645751953125, + "learning_rate": 3.464610475508264e-05, + "loss": 105.6938, + "step": 78650 + }, + { + "epoch": 0.3177963533817879, + "grad_norm": 616.7958984375, + "learning_rate": 3.464420296710086e-05, + "loss": 64.6855, + "step": 78660 + }, + { + "epoch": 0.31783675464715555, + "grad_norm": 655.6940307617188, + "learning_rate": 3.46423008936228e-05, + "loss": 100.9472, + "step": 78670 + }, + { + "epoch": 0.3178771559125232, + "grad_norm": 919.3386840820312, + "learning_rate": 3.464039853468555e-05, + "loss": 108.3443, + "step": 78680 + }, + { + "epoch": 0.31791755717789083, + "grad_norm": 1560.745849609375, + "learning_rate": 3.4638495890326194e-05, + "loss": 154.8582, + "step": 78690 + }, + { + "epoch": 0.31795795844325847, + "grad_norm": 1223.447998046875, + "learning_rate": 3.4636592960581825e-05, + "loss": 83.7749, + "step": 78700 + }, + { + "epoch": 0.31799835970862605, + "grad_norm": 1024.059326171875, + "learning_rate": 3.463468974548954e-05, + "loss": 77.0066, + "step": 78710 + }, + { + "epoch": 0.3180387609739937, + "grad_norm": 678.9048461914062, + "learning_rate": 3.463278624508644e-05, + "loss": 96.1339, + "step": 78720 + }, + { + "epoch": 0.31807916223936133, + "grad_norm": 655.9168701171875, + "learning_rate": 3.463088245940965e-05, + "loss": 133.9232, + "step": 78730 + }, + { + "epoch": 0.318119563504729, + "grad_norm": 659.2313842773438, + "learning_rate": 3.4628978388496266e-05, + "loss": 73.4488, + "step": 78740 + }, + { + "epoch": 0.3181599647700966, + "grad_norm": 521.6857299804688, + "learning_rate": 3.462707403238341e-05, + "loss": 108.7611, + "step": 78750 + }, + { + "epoch": 0.31820036603546425, + "grad_norm": 2553.34814453125, + "learning_rate": 3.4625169391108224e-05, + "loss": 96.0591, + "step": 78760 + }, + { + "epoch": 0.31824076730083184, + "grad_norm": 667.6730346679688, + "learning_rate": 3.4623264464707834e-05, + "loss": 77.1103, + "step": 78770 + }, + { + "epoch": 0.3182811685661995, + "grad_norm": 983.7138671875, + "learning_rate": 3.462135925321937e-05, + "loss": 107.7247, + "step": 78780 + }, + { + "epoch": 0.3183215698315671, + "grad_norm": 513.5236206054688, + "learning_rate": 3.461945375667998e-05, + "loss": 122.781, + "step": 78790 + }, + { + "epoch": 0.31836197109693476, + "grad_norm": 1069.686767578125, + "learning_rate": 3.461754797512681e-05, + "loss": 91.2675, + "step": 78800 + }, + { + "epoch": 0.3184023723623024, + "grad_norm": 675.5418701171875, + "learning_rate": 3.4615641908597016e-05, + "loss": 64.8059, + "step": 78810 + }, + { + "epoch": 0.31844277362767004, + "grad_norm": 400.1224670410156, + "learning_rate": 3.461373555712776e-05, + "loss": 77.2355, + "step": 78820 + }, + { + "epoch": 0.3184831748930376, + "grad_norm": 918.5543823242188, + "learning_rate": 3.4611828920756204e-05, + "loss": 62.2328, + "step": 78830 + }, + { + "epoch": 0.31852357615840526, + "grad_norm": 1329.5029296875, + "learning_rate": 3.4609921999519525e-05, + "loss": 112.6708, + "step": 78840 + }, + { + "epoch": 0.3185639774237729, + "grad_norm": 1077.7698974609375, + "learning_rate": 3.46080147934549e-05, + "loss": 80.5532, + "step": 78850 + }, + { + "epoch": 0.31860437868914054, + "grad_norm": 351.932373046875, + "learning_rate": 3.46061073025995e-05, + "loss": 141.8121, + "step": 78860 + }, + { + "epoch": 0.3186447799545082, + "grad_norm": 473.2344970703125, + "learning_rate": 3.4604199526990514e-05, + "loss": 109.4862, + "step": 78870 + }, + { + "epoch": 0.3186851812198758, + "grad_norm": 589.0631103515625, + "learning_rate": 3.460229146666514e-05, + "loss": 87.6405, + "step": 78880 + }, + { + "epoch": 0.31872558248524346, + "grad_norm": 850.1858520507812, + "learning_rate": 3.460038312166058e-05, + "loss": 82.7815, + "step": 78890 + }, + { + "epoch": 0.31876598375061105, + "grad_norm": 952.010986328125, + "learning_rate": 3.4598474492014036e-05, + "loss": 84.6508, + "step": 78900 + }, + { + "epoch": 0.3188063850159787, + "grad_norm": 1332.180419921875, + "learning_rate": 3.459656557776271e-05, + "loss": 91.6532, + "step": 78910 + }, + { + "epoch": 0.31884678628134633, + "grad_norm": 739.921875, + "learning_rate": 3.459465637894383e-05, + "loss": 81.8229, + "step": 78920 + }, + { + "epoch": 0.31888718754671397, + "grad_norm": 999.6199951171875, + "learning_rate": 3.4592746895594604e-05, + "loss": 78.8703, + "step": 78930 + }, + { + "epoch": 0.3189275888120816, + "grad_norm": 653.5254516601562, + "learning_rate": 3.459083712775226e-05, + "loss": 83.4766, + "step": 78940 + }, + { + "epoch": 0.31896799007744925, + "grad_norm": 599.1195678710938, + "learning_rate": 3.458892707545405e-05, + "loss": 98.3483, + "step": 78950 + }, + { + "epoch": 0.31900839134281683, + "grad_norm": 788.4080810546875, + "learning_rate": 3.4587016738737186e-05, + "loss": 78.8812, + "step": 78960 + }, + { + "epoch": 0.3190487926081845, + "grad_norm": 435.0540771484375, + "learning_rate": 3.4585106117638916e-05, + "loss": 103.3705, + "step": 78970 + }, + { + "epoch": 0.3190891938735521, + "grad_norm": 1103.976806640625, + "learning_rate": 3.45831952121965e-05, + "loss": 116.3792, + "step": 78980 + }, + { + "epoch": 0.31912959513891975, + "grad_norm": 1191.410888671875, + "learning_rate": 3.458128402244719e-05, + "loss": 90.7627, + "step": 78990 + }, + { + "epoch": 0.3191699964042874, + "grad_norm": 656.80419921875, + "learning_rate": 3.457937254842823e-05, + "loss": 76.6605, + "step": 79000 + }, + { + "epoch": 0.31921039766965503, + "grad_norm": 760.5332641601562, + "learning_rate": 3.457746079017691e-05, + "loss": 63.7328, + "step": 79010 + }, + { + "epoch": 0.3192507989350226, + "grad_norm": 458.7794494628906, + "learning_rate": 3.457554874773047e-05, + "loss": 105.1713, + "step": 79020 + }, + { + "epoch": 0.31929120020039026, + "grad_norm": 939.69384765625, + "learning_rate": 3.457363642112622e-05, + "loss": 93.2941, + "step": 79030 + }, + { + "epoch": 0.3193316014657579, + "grad_norm": 1117.4151611328125, + "learning_rate": 3.457172381040141e-05, + "loss": 136.8451, + "step": 79040 + }, + { + "epoch": 0.31937200273112554, + "grad_norm": 844.03857421875, + "learning_rate": 3.4569810915593356e-05, + "loss": 92.9883, + "step": 79050 + }, + { + "epoch": 0.3194124039964932, + "grad_norm": 476.1003112792969, + "learning_rate": 3.456789773673933e-05, + "loss": 52.3132, + "step": 79060 + }, + { + "epoch": 0.3194528052618608, + "grad_norm": 495.0113525390625, + "learning_rate": 3.4565984273876635e-05, + "loss": 78.1617, + "step": 79070 + }, + { + "epoch": 0.31949320652722846, + "grad_norm": 1340.326171875, + "learning_rate": 3.456407052704258e-05, + "loss": 91.1518, + "step": 79080 + }, + { + "epoch": 0.31953360779259604, + "grad_norm": 683.9920043945312, + "learning_rate": 3.456215649627447e-05, + "loss": 65.716, + "step": 79090 + }, + { + "epoch": 0.3195740090579637, + "grad_norm": 918.5289916992188, + "learning_rate": 3.456024218160963e-05, + "loss": 73.6551, + "step": 79100 + }, + { + "epoch": 0.3196144103233313, + "grad_norm": 677.2937622070312, + "learning_rate": 3.455832758308536e-05, + "loss": 113.6809, + "step": 79110 + }, + { + "epoch": 0.31965481158869896, + "grad_norm": 383.8507995605469, + "learning_rate": 3.455641270073901e-05, + "loss": 47.167, + "step": 79120 + }, + { + "epoch": 0.3196952128540666, + "grad_norm": 584.7449951171875, + "learning_rate": 3.4554497534607895e-05, + "loss": 100.5452, + "step": 79130 + }, + { + "epoch": 0.31973561411943424, + "grad_norm": 693.1851806640625, + "learning_rate": 3.455258208472936e-05, + "loss": 96.7102, + "step": 79140 + }, + { + "epoch": 0.3197760153848018, + "grad_norm": 1066.388916015625, + "learning_rate": 3.455066635114074e-05, + "loss": 84.2545, + "step": 79150 + }, + { + "epoch": 0.31981641665016947, + "grad_norm": 1270.8572998046875, + "learning_rate": 3.4548750333879395e-05, + "loss": 102.0026, + "step": 79160 + }, + { + "epoch": 0.3198568179155371, + "grad_norm": 1582.648681640625, + "learning_rate": 3.454683403298266e-05, + "loss": 132.6304, + "step": 79170 + }, + { + "epoch": 0.31989721918090475, + "grad_norm": 903.9878540039062, + "learning_rate": 3.4544917448487915e-05, + "loss": 55.2329, + "step": 79180 + }, + { + "epoch": 0.3199376204462724, + "grad_norm": 506.0345764160156, + "learning_rate": 3.454300058043252e-05, + "loss": 78.5273, + "step": 79190 + }, + { + "epoch": 0.31997802171164, + "grad_norm": 1020.0081176757812, + "learning_rate": 3.454108342885384e-05, + "loss": 110.9629, + "step": 79200 + }, + { + "epoch": 0.32001842297700767, + "grad_norm": 887.15625, + "learning_rate": 3.453916599378925e-05, + "loss": 81.3776, + "step": 79210 + }, + { + "epoch": 0.32005882424237525, + "grad_norm": 1415.11279296875, + "learning_rate": 3.453724827527613e-05, + "loss": 133.4335, + "step": 79220 + }, + { + "epoch": 0.3200992255077429, + "grad_norm": 1774.2027587890625, + "learning_rate": 3.453533027335188e-05, + "loss": 99.9783, + "step": 79230 + }, + { + "epoch": 0.32013962677311053, + "grad_norm": 697.572998046875, + "learning_rate": 3.453341198805388e-05, + "loss": 96.2494, + "step": 79240 + }, + { + "epoch": 0.32018002803847817, + "grad_norm": 566.7346801757812, + "learning_rate": 3.4531493419419525e-05, + "loss": 84.6911, + "step": 79250 + }, + { + "epoch": 0.3202204293038458, + "grad_norm": 564.7448120117188, + "learning_rate": 3.452957456748622e-05, + "loss": 99.6181, + "step": 79260 + }, + { + "epoch": 0.32026083056921345, + "grad_norm": 1471.2198486328125, + "learning_rate": 3.4527655432291384e-05, + "loss": 120.9379, + "step": 79270 + }, + { + "epoch": 0.32030123183458103, + "grad_norm": 735.0042724609375, + "learning_rate": 3.4525736013872433e-05, + "loss": 86.2898, + "step": 79280 + }, + { + "epoch": 0.3203416330999487, + "grad_norm": 636.0916137695312, + "learning_rate": 3.4523816312266773e-05, + "loss": 74.47, + "step": 79290 + }, + { + "epoch": 0.3203820343653163, + "grad_norm": 9818.4365234375, + "learning_rate": 3.4521896327511836e-05, + "loss": 134.5169, + "step": 79300 + }, + { + "epoch": 0.32042243563068395, + "grad_norm": 591.2630615234375, + "learning_rate": 3.451997605964506e-05, + "loss": 173.49, + "step": 79310 + }, + { + "epoch": 0.3204628368960516, + "grad_norm": 1553.7625732421875, + "learning_rate": 3.451805550870387e-05, + "loss": 64.8282, + "step": 79320 + }, + { + "epoch": 0.32050323816141923, + "grad_norm": 682.6858520507812, + "learning_rate": 3.4516134674725723e-05, + "loss": 73.6441, + "step": 79330 + }, + { + "epoch": 0.3205436394267868, + "grad_norm": 526.0022583007812, + "learning_rate": 3.4514213557748046e-05, + "loss": 119.2124, + "step": 79340 + }, + { + "epoch": 0.32058404069215446, + "grad_norm": 968.0880126953125, + "learning_rate": 3.4512292157808306e-05, + "loss": 112.6707, + "step": 79350 + }, + { + "epoch": 0.3206244419575221, + "grad_norm": 1211.839599609375, + "learning_rate": 3.4510370474943956e-05, + "loss": 79.4202, + "step": 79360 + }, + { + "epoch": 0.32066484322288974, + "grad_norm": 745.1864013671875, + "learning_rate": 3.450844850919247e-05, + "loss": 96.7271, + "step": 79370 + }, + { + "epoch": 0.3207052444882574, + "grad_norm": 822.1251220703125, + "learning_rate": 3.450652626059131e-05, + "loss": 90.0133, + "step": 79380 + }, + { + "epoch": 0.320745645753625, + "grad_norm": 1192.0633544921875, + "learning_rate": 3.4504603729177945e-05, + "loss": 103.1762, + "step": 79390 + }, + { + "epoch": 0.32078604701899266, + "grad_norm": 582.9888916015625, + "learning_rate": 3.450268091498987e-05, + "loss": 164.0327, + "step": 79400 + }, + { + "epoch": 0.32082644828436024, + "grad_norm": 1200.037109375, + "learning_rate": 3.4500757818064565e-05, + "loss": 98.381, + "step": 79410 + }, + { + "epoch": 0.3208668495497279, + "grad_norm": 999.96826171875, + "learning_rate": 3.4498834438439516e-05, + "loss": 123.1332, + "step": 79420 + }, + { + "epoch": 0.3209072508150955, + "grad_norm": 345.7852783203125, + "learning_rate": 3.4496910776152226e-05, + "loss": 100.085, + "step": 79430 + }, + { + "epoch": 0.32094765208046316, + "grad_norm": 790.6015625, + "learning_rate": 3.44949868312402e-05, + "loss": 105.9085, + "step": 79440 + }, + { + "epoch": 0.3209880533458308, + "grad_norm": 693.493896484375, + "learning_rate": 3.4493062603740934e-05, + "loss": 111.2117, + "step": 79450 + }, + { + "epoch": 0.32102845461119844, + "grad_norm": 521.7107543945312, + "learning_rate": 3.449113809369196e-05, + "loss": 118.5874, + "step": 79460 + }, + { + "epoch": 0.32106885587656603, + "grad_norm": 425.9333190917969, + "learning_rate": 3.448921330113079e-05, + "loss": 60.3923, + "step": 79470 + }, + { + "epoch": 0.32110925714193367, + "grad_norm": 557.01220703125, + "learning_rate": 3.448728822609494e-05, + "loss": 92.3102, + "step": 79480 + }, + { + "epoch": 0.3211496584073013, + "grad_norm": 949.6359252929688, + "learning_rate": 3.448536286862195e-05, + "loss": 84.0448, + "step": 79490 + }, + { + "epoch": 0.32119005967266895, + "grad_norm": 364.330078125, + "learning_rate": 3.4483437228749356e-05, + "loss": 81.7577, + "step": 79500 + }, + { + "epoch": 0.3212304609380366, + "grad_norm": 785.3282470703125, + "learning_rate": 3.448151130651469e-05, + "loss": 85.2947, + "step": 79510 + }, + { + "epoch": 0.32127086220340423, + "grad_norm": 627.1854858398438, + "learning_rate": 3.4479585101955506e-05, + "loss": 98.7261, + "step": 79520 + }, + { + "epoch": 0.32131126346877187, + "grad_norm": 1733.3653564453125, + "learning_rate": 3.4477658615109365e-05, + "loss": 110.6081, + "step": 79530 + }, + { + "epoch": 0.32135166473413945, + "grad_norm": 616.47412109375, + "learning_rate": 3.447573184601381e-05, + "loss": 73.1421, + "step": 79540 + }, + { + "epoch": 0.3213920659995071, + "grad_norm": 710.7804565429688, + "learning_rate": 3.447380479470641e-05, + "loss": 86.9677, + "step": 79550 + }, + { + "epoch": 0.32143246726487473, + "grad_norm": 968.6766357421875, + "learning_rate": 3.4471877461224735e-05, + "loss": 68.3213, + "step": 79560 + }, + { + "epoch": 0.3214728685302424, + "grad_norm": 899.9481201171875, + "learning_rate": 3.446994984560636e-05, + "loss": 69.9163, + "step": 79570 + }, + { + "epoch": 0.32151326979561, + "grad_norm": 692.638916015625, + "learning_rate": 3.4468021947888855e-05, + "loss": 95.5147, + "step": 79580 + }, + { + "epoch": 0.32155367106097765, + "grad_norm": 506.0480651855469, + "learning_rate": 3.4466093768109825e-05, + "loss": 79.3498, + "step": 79590 + }, + { + "epoch": 0.32159407232634524, + "grad_norm": 525.765869140625, + "learning_rate": 3.4464165306306845e-05, + "loss": 82.4771, + "step": 79600 + }, + { + "epoch": 0.3216344735917129, + "grad_norm": 756.1973266601562, + "learning_rate": 3.446223656251751e-05, + "loss": 74.7191, + "step": 79610 + }, + { + "epoch": 0.3216748748570805, + "grad_norm": 459.81134033203125, + "learning_rate": 3.4460307536779434e-05, + "loss": 69.5427, + "step": 79620 + }, + { + "epoch": 0.32171527612244816, + "grad_norm": 979.8378295898438, + "learning_rate": 3.4458378229130214e-05, + "loss": 76.3073, + "step": 79630 + }, + { + "epoch": 0.3217556773878158, + "grad_norm": 683.4526977539062, + "learning_rate": 3.4456448639607476e-05, + "loss": 79.4356, + "step": 79640 + }, + { + "epoch": 0.32179607865318344, + "grad_norm": 931.3280639648438, + "learning_rate": 3.4454518768248816e-05, + "loss": 88.9811, + "step": 79650 + }, + { + "epoch": 0.321836479918551, + "grad_norm": 1101.5023193359375, + "learning_rate": 3.445258861509188e-05, + "loss": 121.1862, + "step": 79660 + }, + { + "epoch": 0.32187688118391866, + "grad_norm": 574.8599853515625, + "learning_rate": 3.4450658180174286e-05, + "loss": 54.8537, + "step": 79670 + }, + { + "epoch": 0.3219172824492863, + "grad_norm": 617.5059814453125, + "learning_rate": 3.4448727463533666e-05, + "loss": 64.263, + "step": 79680 + }, + { + "epoch": 0.32195768371465394, + "grad_norm": 565.4827270507812, + "learning_rate": 3.4446796465207665e-05, + "loss": 118.5414, + "step": 79690 + }, + { + "epoch": 0.3219980849800216, + "grad_norm": 383.80859375, + "learning_rate": 3.444486518523394e-05, + "loss": 75.632, + "step": 79700 + }, + { + "epoch": 0.3220384862453892, + "grad_norm": 1264.3240966796875, + "learning_rate": 3.4442933623650124e-05, + "loss": 117.4035, + "step": 79710 + }, + { + "epoch": 0.32207888751075686, + "grad_norm": 590.640380859375, + "learning_rate": 3.444100178049389e-05, + "loss": 79.3299, + "step": 79720 + }, + { + "epoch": 0.32211928877612445, + "grad_norm": 533.0899658203125, + "learning_rate": 3.4439069655802875e-05, + "loss": 60.5502, + "step": 79730 + }, + { + "epoch": 0.3221596900414921, + "grad_norm": 893.04443359375, + "learning_rate": 3.443713724961478e-05, + "loss": 101.2506, + "step": 79740 + }, + { + "epoch": 0.3222000913068597, + "grad_norm": 1271.2647705078125, + "learning_rate": 3.4435204561967244e-05, + "loss": 68.6259, + "step": 79750 + }, + { + "epoch": 0.32224049257222737, + "grad_norm": 1344.268310546875, + "learning_rate": 3.443327159289798e-05, + "loss": 103.1404, + "step": 79760 + }, + { + "epoch": 0.322280893837595, + "grad_norm": 1319.7366943359375, + "learning_rate": 3.443133834244465e-05, + "loss": 100.6198, + "step": 79770 + }, + { + "epoch": 0.32232129510296265, + "grad_norm": 342.3222961425781, + "learning_rate": 3.4429404810644944e-05, + "loss": 63.8693, + "step": 79780 + }, + { + "epoch": 0.32236169636833023, + "grad_norm": 477.5200500488281, + "learning_rate": 3.4427470997536567e-05, + "loss": 83.7547, + "step": 79790 + }, + { + "epoch": 0.32240209763369787, + "grad_norm": 597.511474609375, + "learning_rate": 3.442553690315722e-05, + "loss": 98.8691, + "step": 79800 + }, + { + "epoch": 0.3224424988990655, + "grad_norm": 724.4496459960938, + "learning_rate": 3.4423602527544594e-05, + "loss": 153.3855, + "step": 79810 + }, + { + "epoch": 0.32248290016443315, + "grad_norm": 445.2489929199219, + "learning_rate": 3.442166787073642e-05, + "loss": 62.5845, + "step": 79820 + }, + { + "epoch": 0.3225233014298008, + "grad_norm": 372.920654296875, + "learning_rate": 3.4419732932770394e-05, + "loss": 67.3124, + "step": 79830 + }, + { + "epoch": 0.32256370269516843, + "grad_norm": 591.730712890625, + "learning_rate": 3.441779771368426e-05, + "loss": 79.6103, + "step": 79840 + }, + { + "epoch": 0.32260410396053607, + "grad_norm": 703.0748291015625, + "learning_rate": 3.4415862213515735e-05, + "loss": 63.5258, + "step": 79850 + }, + { + "epoch": 0.32264450522590365, + "grad_norm": 808.2622680664062, + "learning_rate": 3.4413926432302554e-05, + "loss": 87.214, + "step": 79860 + }, + { + "epoch": 0.3226849064912713, + "grad_norm": 1029.551513671875, + "learning_rate": 3.441199037008246e-05, + "loss": 76.2321, + "step": 79870 + }, + { + "epoch": 0.32272530775663893, + "grad_norm": 1756.5548095703125, + "learning_rate": 3.441005402689319e-05, + "loss": 137.9453, + "step": 79880 + }, + { + "epoch": 0.3227657090220066, + "grad_norm": 768.9036865234375, + "learning_rate": 3.4408117402772494e-05, + "loss": 133.0097, + "step": 79890 + }, + { + "epoch": 0.3228061102873742, + "grad_norm": 566.4799194335938, + "learning_rate": 3.440618049775814e-05, + "loss": 57.3808, + "step": 79900 + }, + { + "epoch": 0.32284651155274185, + "grad_norm": 405.2076721191406, + "learning_rate": 3.440424331188788e-05, + "loss": 81.5135, + "step": 79910 + }, + { + "epoch": 0.32288691281810944, + "grad_norm": 766.485595703125, + "learning_rate": 3.4402305845199475e-05, + "loss": 124.7188, + "step": 79920 + }, + { + "epoch": 0.3229273140834771, + "grad_norm": 520.5372314453125, + "learning_rate": 3.4400368097730705e-05, + "loss": 66.2148, + "step": 79930 + }, + { + "epoch": 0.3229677153488447, + "grad_norm": 1012.08203125, + "learning_rate": 3.439843006951935e-05, + "loss": 101.0135, + "step": 79940 + }, + { + "epoch": 0.32300811661421236, + "grad_norm": 746.4867553710938, + "learning_rate": 3.439649176060318e-05, + "loss": 113.9434, + "step": 79950 + }, + { + "epoch": 0.32304851787958, + "grad_norm": 597.4169311523438, + "learning_rate": 3.439455317102e-05, + "loss": 81.928, + "step": 79960 + }, + { + "epoch": 0.32308891914494764, + "grad_norm": 619.015625, + "learning_rate": 3.439261430080759e-05, + "loss": 93.9774, + "step": 79970 + }, + { + "epoch": 0.3231293204103152, + "grad_norm": 1873.0609130859375, + "learning_rate": 3.439067515000375e-05, + "loss": 87.6726, + "step": 79980 + }, + { + "epoch": 0.32316972167568286, + "grad_norm": 556.542236328125, + "learning_rate": 3.4388735718646294e-05, + "loss": 134.6536, + "step": 79990 + }, + { + "epoch": 0.3232101229410505, + "grad_norm": 731.04638671875, + "learning_rate": 3.438679600677303e-05, + "loss": 59.8982, + "step": 80000 + }, + { + "epoch": 0.32325052420641814, + "grad_norm": 640.154052734375, + "learning_rate": 3.438485601442176e-05, + "loss": 99.9659, + "step": 80010 + }, + { + "epoch": 0.3232909254717858, + "grad_norm": 965.4467163085938, + "learning_rate": 3.438291574163032e-05, + "loss": 66.7173, + "step": 80020 + }, + { + "epoch": 0.3233313267371534, + "grad_norm": 651.7171020507812, + "learning_rate": 3.4380975188436547e-05, + "loss": 94.6211, + "step": 80030 + }, + { + "epoch": 0.32337172800252106, + "grad_norm": 1380.2755126953125, + "learning_rate": 3.437903435487825e-05, + "loss": 91.9537, + "step": 80040 + }, + { + "epoch": 0.32341212926788865, + "grad_norm": 1061.1014404296875, + "learning_rate": 3.437709324099327e-05, + "loss": 84.3723, + "step": 80050 + }, + { + "epoch": 0.3234525305332563, + "grad_norm": 556.1154174804688, + "learning_rate": 3.4375151846819456e-05, + "loss": 98.0456, + "step": 80060 + }, + { + "epoch": 0.32349293179862393, + "grad_norm": 798.487060546875, + "learning_rate": 3.437321017239466e-05, + "loss": 95.3656, + "step": 80070 + }, + { + "epoch": 0.32353333306399157, + "grad_norm": 463.6957092285156, + "learning_rate": 3.4371268217756734e-05, + "loss": 133.5984, + "step": 80080 + }, + { + "epoch": 0.3235737343293592, + "grad_norm": 786.9575805664062, + "learning_rate": 3.4369325982943536e-05, + "loss": 58.7106, + "step": 80090 + }, + { + "epoch": 0.32361413559472685, + "grad_norm": 773.2098388671875, + "learning_rate": 3.4367383467992926e-05, + "loss": 91.3833, + "step": 80100 + }, + { + "epoch": 0.32365453686009443, + "grad_norm": 295.60321044921875, + "learning_rate": 3.436544067294278e-05, + "loss": 76.8304, + "step": 80110 + }, + { + "epoch": 0.3236949381254621, + "grad_norm": 900.93701171875, + "learning_rate": 3.436349759783097e-05, + "loss": 106.5492, + "step": 80120 + }, + { + "epoch": 0.3237353393908297, + "grad_norm": 710.2637939453125, + "learning_rate": 3.4361554242695384e-05, + "loss": 101.4259, + "step": 80130 + }, + { + "epoch": 0.32377574065619735, + "grad_norm": 1265.6707763671875, + "learning_rate": 3.43596106075739e-05, + "loss": 119.5903, + "step": 80140 + }, + { + "epoch": 0.323816141921565, + "grad_norm": 684.5209350585938, + "learning_rate": 3.4357666692504415e-05, + "loss": 93.0503, + "step": 80150 + }, + { + "epoch": 0.32385654318693263, + "grad_norm": 698.9592895507812, + "learning_rate": 3.4355722497524826e-05, + "loss": 64.5384, + "step": 80160 + }, + { + "epoch": 0.3238969444523003, + "grad_norm": 563.0949096679688, + "learning_rate": 3.4353778022673035e-05, + "loss": 97.502, + "step": 80170 + }, + { + "epoch": 0.32393734571766786, + "grad_norm": 1521.280517578125, + "learning_rate": 3.4351833267986956e-05, + "loss": 108.2745, + "step": 80180 + }, + { + "epoch": 0.3239777469830355, + "grad_norm": 442.360595703125, + "learning_rate": 3.434988823350449e-05, + "loss": 83.5065, + "step": 80190 + }, + { + "epoch": 0.32401814824840314, + "grad_norm": 640.2064819335938, + "learning_rate": 3.434794291926358e-05, + "loss": 103.1669, + "step": 80200 + }, + { + "epoch": 0.3240585495137708, + "grad_norm": 794.4244995117188, + "learning_rate": 3.434599732530212e-05, + "loss": 73.1422, + "step": 80210 + }, + { + "epoch": 0.3240989507791384, + "grad_norm": 931.4889526367188, + "learning_rate": 3.434405145165807e-05, + "loss": 114.5076, + "step": 80220 + }, + { + "epoch": 0.32413935204450606, + "grad_norm": 570.8517456054688, + "learning_rate": 3.434210529836934e-05, + "loss": 100.1806, + "step": 80230 + }, + { + "epoch": 0.32417975330987364, + "grad_norm": 423.1470947265625, + "learning_rate": 3.434015886547389e-05, + "loss": 75.0921, + "step": 80240 + }, + { + "epoch": 0.3242201545752413, + "grad_norm": 727.3602294921875, + "learning_rate": 3.433821215300966e-05, + "loss": 81.3253, + "step": 80250 + }, + { + "epoch": 0.3242605558406089, + "grad_norm": 1032.52783203125, + "learning_rate": 3.4336265161014596e-05, + "loss": 83.088, + "step": 80260 + }, + { + "epoch": 0.32430095710597656, + "grad_norm": 491.6406555175781, + "learning_rate": 3.433431788952667e-05, + "loss": 77.5417, + "step": 80270 + }, + { + "epoch": 0.3243413583713442, + "grad_norm": 625.5296020507812, + "learning_rate": 3.433237033858384e-05, + "loss": 148.9318, + "step": 80280 + }, + { + "epoch": 0.32438175963671184, + "grad_norm": 584.1459350585938, + "learning_rate": 3.4330422508224064e-05, + "loss": 67.4387, + "step": 80290 + }, + { + "epoch": 0.3244221609020794, + "grad_norm": 801.6712646484375, + "learning_rate": 3.432847439848532e-05, + "loss": 74.7513, + "step": 80300 + }, + { + "epoch": 0.32446256216744707, + "grad_norm": 321.0269775390625, + "learning_rate": 3.43265260094056e-05, + "loss": 90.3115, + "step": 80310 + }, + { + "epoch": 0.3245029634328147, + "grad_norm": 442.7547302246094, + "learning_rate": 3.432457734102287e-05, + "loss": 83.1057, + "step": 80320 + }, + { + "epoch": 0.32454336469818235, + "grad_norm": 996.4345092773438, + "learning_rate": 3.4322628393375144e-05, + "loss": 80.0434, + "step": 80330 + }, + { + "epoch": 0.32458376596355, + "grad_norm": 519.3197631835938, + "learning_rate": 3.4320679166500386e-05, + "loss": 73.227, + "step": 80340 + }, + { + "epoch": 0.3246241672289176, + "grad_norm": 760.0364990234375, + "learning_rate": 3.4318729660436624e-05, + "loss": 144.6917, + "step": 80350 + }, + { + "epoch": 0.32466456849428527, + "grad_norm": 569.9627075195312, + "learning_rate": 3.431677987522186e-05, + "loss": 84.7361, + "step": 80360 + }, + { + "epoch": 0.32470496975965285, + "grad_norm": 579.46240234375, + "learning_rate": 3.4314829810894095e-05, + "loss": 101.3262, + "step": 80370 + }, + { + "epoch": 0.3247453710250205, + "grad_norm": 619.5802001953125, + "learning_rate": 3.431287946749136e-05, + "loss": 76.5642, + "step": 80380 + }, + { + "epoch": 0.32478577229038813, + "grad_norm": 907.4180297851562, + "learning_rate": 3.4310928845051656e-05, + "loss": 76.6683, + "step": 80390 + }, + { + "epoch": 0.32482617355575577, + "grad_norm": 882.3404541015625, + "learning_rate": 3.430897794361304e-05, + "loss": 95.0159, + "step": 80400 + }, + { + "epoch": 0.3248665748211234, + "grad_norm": 695.9164428710938, + "learning_rate": 3.430702676321353e-05, + "loss": 60.6096, + "step": 80410 + }, + { + "epoch": 0.32490697608649105, + "grad_norm": 543.0311889648438, + "learning_rate": 3.4305075303891165e-05, + "loss": 110.8082, + "step": 80420 + }, + { + "epoch": 0.32494737735185864, + "grad_norm": 1037.7047119140625, + "learning_rate": 3.430312356568399e-05, + "loss": 113.455, + "step": 80430 + }, + { + "epoch": 0.3249877786172263, + "grad_norm": 549.5657958984375, + "learning_rate": 3.430117154863006e-05, + "loss": 49.9275, + "step": 80440 + }, + { + "epoch": 0.3250281798825939, + "grad_norm": 598.3822021484375, + "learning_rate": 3.4299219252767436e-05, + "loss": 72.9015, + "step": 80450 + }, + { + "epoch": 0.32506858114796156, + "grad_norm": 1266.7669677734375, + "learning_rate": 3.429726667813416e-05, + "loss": 90.4811, + "step": 80460 + }, + { + "epoch": 0.3251089824133292, + "grad_norm": 666.5301513671875, + "learning_rate": 3.429531382476832e-05, + "loss": 72.6916, + "step": 80470 + }, + { + "epoch": 0.32514938367869683, + "grad_norm": 574.971923828125, + "learning_rate": 3.429336069270796e-05, + "loss": 94.6429, + "step": 80480 + }, + { + "epoch": 0.3251897849440645, + "grad_norm": 874.7699584960938, + "learning_rate": 3.429140728199119e-05, + "loss": 95.6774, + "step": 80490 + }, + { + "epoch": 0.32523018620943206, + "grad_norm": 1087.751953125, + "learning_rate": 3.428945359265607e-05, + "loss": 103.3831, + "step": 80500 + }, + { + "epoch": 0.3252705874747997, + "grad_norm": 1198.3187255859375, + "learning_rate": 3.42874996247407e-05, + "loss": 91.6319, + "step": 80510 + }, + { + "epoch": 0.32531098874016734, + "grad_norm": 784.3353881835938, + "learning_rate": 3.4285545378283165e-05, + "loss": 87.1022, + "step": 80520 + }, + { + "epoch": 0.325351390005535, + "grad_norm": 589.7217407226562, + "learning_rate": 3.428359085332157e-05, + "loss": 100.3041, + "step": 80530 + }, + { + "epoch": 0.3253917912709026, + "grad_norm": 868.566650390625, + "learning_rate": 3.4281636049894014e-05, + "loss": 75.5045, + "step": 80540 + }, + { + "epoch": 0.32543219253627026, + "grad_norm": 995.0790405273438, + "learning_rate": 3.427968096803862e-05, + "loss": 93.7313, + "step": 80550 + }, + { + "epoch": 0.32547259380163784, + "grad_norm": 1274.196044921875, + "learning_rate": 3.4277725607793486e-05, + "loss": 129.7355, + "step": 80560 + }, + { + "epoch": 0.3255129950670055, + "grad_norm": 518.7147216796875, + "learning_rate": 3.4275769969196745e-05, + "loss": 98.0712, + "step": 80570 + }, + { + "epoch": 0.3255533963323731, + "grad_norm": 1208.1591796875, + "learning_rate": 3.427381405228651e-05, + "loss": 127.4496, + "step": 80580 + }, + { + "epoch": 0.32559379759774076, + "grad_norm": 582.1820678710938, + "learning_rate": 3.427185785710093e-05, + "loss": 66.4811, + "step": 80590 + }, + { + "epoch": 0.3256341988631084, + "grad_norm": 1634.705322265625, + "learning_rate": 3.426990138367813e-05, + "loss": 88.8994, + "step": 80600 + }, + { + "epoch": 0.32567460012847604, + "grad_norm": 750.9053955078125, + "learning_rate": 3.426794463205626e-05, + "loss": 64.2586, + "step": 80610 + }, + { + "epoch": 0.32571500139384363, + "grad_norm": 1021.0225830078125, + "learning_rate": 3.426598760227346e-05, + "loss": 71.6834, + "step": 80620 + }, + { + "epoch": 0.32575540265921127, + "grad_norm": 456.2236328125, + "learning_rate": 3.426403029436789e-05, + "loss": 109.4951, + "step": 80630 + }, + { + "epoch": 0.3257958039245789, + "grad_norm": 1051.0357666015625, + "learning_rate": 3.42620727083777e-05, + "loss": 82.0394, + "step": 80640 + }, + { + "epoch": 0.32583620518994655, + "grad_norm": 670.3539428710938, + "learning_rate": 3.426011484434106e-05, + "loss": 105.8032, + "step": 80650 + }, + { + "epoch": 0.3258766064553142, + "grad_norm": 1280.7864990234375, + "learning_rate": 3.425815670229614e-05, + "loss": 69.5362, + "step": 80660 + }, + { + "epoch": 0.32591700772068183, + "grad_norm": 306.1927490234375, + "learning_rate": 3.425619828228112e-05, + "loss": 64.9585, + "step": 80670 + }, + { + "epoch": 0.32595740898604947, + "grad_norm": 996.3340454101562, + "learning_rate": 3.425423958433418e-05, + "loss": 77.8497, + "step": 80680 + }, + { + "epoch": 0.32599781025141705, + "grad_norm": 620.8635864257812, + "learning_rate": 3.425228060849349e-05, + "loss": 77.1932, + "step": 80690 + }, + { + "epoch": 0.3260382115167847, + "grad_norm": 777.19921875, + "learning_rate": 3.425032135479725e-05, + "loss": 77.7201, + "step": 80700 + }, + { + "epoch": 0.32607861278215233, + "grad_norm": 816.085693359375, + "learning_rate": 3.424836182328367e-05, + "loss": 97.9204, + "step": 80710 + }, + { + "epoch": 0.32611901404752, + "grad_norm": 1513.698974609375, + "learning_rate": 3.4246402013990935e-05, + "loss": 119.9163, + "step": 80720 + }, + { + "epoch": 0.3261594153128876, + "grad_norm": 144.25094604492188, + "learning_rate": 3.4244441926957254e-05, + "loss": 88.005, + "step": 80730 + }, + { + "epoch": 0.32619981657825525, + "grad_norm": 834.830322265625, + "learning_rate": 3.424248156222085e-05, + "loss": 101.9627, + "step": 80740 + }, + { + "epoch": 0.32624021784362284, + "grad_norm": 710.5476684570312, + "learning_rate": 3.424052091981994e-05, + "loss": 71.3839, + "step": 80750 + }, + { + "epoch": 0.3262806191089905, + "grad_norm": 550.9486083984375, + "learning_rate": 3.4238559999792726e-05, + "loss": 76.6738, + "step": 80760 + }, + { + "epoch": 0.3263210203743581, + "grad_norm": 572.1713256835938, + "learning_rate": 3.423659880217747e-05, + "loss": 91.9371, + "step": 80770 + }, + { + "epoch": 0.32636142163972576, + "grad_norm": 916.757568359375, + "learning_rate": 3.4234637327012384e-05, + "loss": 77.3964, + "step": 80780 + }, + { + "epoch": 0.3264018229050934, + "grad_norm": 453.0341491699219, + "learning_rate": 3.423267557433572e-05, + "loss": 69.498, + "step": 80790 + }, + { + "epoch": 0.32644222417046104, + "grad_norm": 1253.4246826171875, + "learning_rate": 3.4230713544185715e-05, + "loss": 115.8848, + "step": 80800 + }, + { + "epoch": 0.3264826254358287, + "grad_norm": 594.1629638671875, + "learning_rate": 3.4228751236600615e-05, + "loss": 58.6786, + "step": 80810 + }, + { + "epoch": 0.32652302670119626, + "grad_norm": 1456.5338134765625, + "learning_rate": 3.4226788651618695e-05, + "loss": 102.4919, + "step": 80820 + }, + { + "epoch": 0.3265634279665639, + "grad_norm": 805.6611328125, + "learning_rate": 3.42248257892782e-05, + "loss": 92.7382, + "step": 80830 + }, + { + "epoch": 0.32660382923193154, + "grad_norm": 654.2955932617188, + "learning_rate": 3.422286264961741e-05, + "loss": 60.4613, + "step": 80840 + }, + { + "epoch": 0.3266442304972992, + "grad_norm": 864.994140625, + "learning_rate": 3.422089923267458e-05, + "loss": 92.7002, + "step": 80850 + }, + { + "epoch": 0.3266846317626668, + "grad_norm": 480.80206298828125, + "learning_rate": 3.421893553848801e-05, + "loss": 76.6786, + "step": 80860 + }, + { + "epoch": 0.32672503302803446, + "grad_norm": 532.7603149414062, + "learning_rate": 3.421697156709596e-05, + "loss": 94.8448, + "step": 80870 + }, + { + "epoch": 0.32676543429340205, + "grad_norm": 144.42471313476562, + "learning_rate": 3.421500731853674e-05, + "loss": 54.0087, + "step": 80880 + }, + { + "epoch": 0.3268058355587697, + "grad_norm": 723.5667724609375, + "learning_rate": 3.421304279284862e-05, + "loss": 85.0045, + "step": 80890 + }, + { + "epoch": 0.3268462368241373, + "grad_norm": 1158.078857421875, + "learning_rate": 3.421107799006992e-05, + "loss": 100.9421, + "step": 80900 + }, + { + "epoch": 0.32688663808950497, + "grad_norm": 823.580810546875, + "learning_rate": 3.420911291023894e-05, + "loss": 110.4574, + "step": 80910 + }, + { + "epoch": 0.3269270393548726, + "grad_norm": 879.3858032226562, + "learning_rate": 3.4207147553393996e-05, + "loss": 71.4737, + "step": 80920 + }, + { + "epoch": 0.32696744062024025, + "grad_norm": 1061.5045166015625, + "learning_rate": 3.420518191957339e-05, + "loss": 120.5221, + "step": 80930 + }, + { + "epoch": 0.32700784188560783, + "grad_norm": 536.9631958007812, + "learning_rate": 3.4203216008815446e-05, + "loss": 103.0346, + "step": 80940 + }, + { + "epoch": 0.32704824315097547, + "grad_norm": 580.5114135742188, + "learning_rate": 3.42012498211585e-05, + "loss": 70.1782, + "step": 80950 + }, + { + "epoch": 0.3270886444163431, + "grad_norm": 1218.1865234375, + "learning_rate": 3.419928335664087e-05, + "loss": 96.6035, + "step": 80960 + }, + { + "epoch": 0.32712904568171075, + "grad_norm": 713.701904296875, + "learning_rate": 3.4197316615300904e-05, + "loss": 92.5454, + "step": 80970 + }, + { + "epoch": 0.3271694469470784, + "grad_norm": 880.3414306640625, + "learning_rate": 3.419534959717694e-05, + "loss": 114.4208, + "step": 80980 + }, + { + "epoch": 0.32720984821244603, + "grad_norm": 1198.697509765625, + "learning_rate": 3.419338230230733e-05, + "loss": 109.8544, + "step": 80990 + }, + { + "epoch": 0.32725024947781367, + "grad_norm": 514.99951171875, + "learning_rate": 3.419141473073042e-05, + "loss": 61.5656, + "step": 81000 + }, + { + "epoch": 0.32729065074318126, + "grad_norm": 1353.1357421875, + "learning_rate": 3.418944688248458e-05, + "loss": 97.208, + "step": 81010 + }, + { + "epoch": 0.3273310520085489, + "grad_norm": 448.7248840332031, + "learning_rate": 3.4187478757608166e-05, + "loss": 74.2182, + "step": 81020 + }, + { + "epoch": 0.32737145327391654, + "grad_norm": 252.83009338378906, + "learning_rate": 3.418551035613954e-05, + "loss": 96.7466, + "step": 81030 + }, + { + "epoch": 0.3274118545392842, + "grad_norm": 680.1029052734375, + "learning_rate": 3.41835416781171e-05, + "loss": 101.2008, + "step": 81040 + }, + { + "epoch": 0.3274522558046518, + "grad_norm": 752.3997192382812, + "learning_rate": 3.4181572723579205e-05, + "loss": 84.8339, + "step": 81050 + }, + { + "epoch": 0.32749265707001946, + "grad_norm": 983.0833129882812, + "learning_rate": 3.417960349256425e-05, + "loss": 72.8283, + "step": 81060 + }, + { + "epoch": 0.32753305833538704, + "grad_norm": 1225.8087158203125, + "learning_rate": 3.417763398511063e-05, + "loss": 103.251, + "step": 81070 + }, + { + "epoch": 0.3275734596007547, + "grad_norm": 500.0487365722656, + "learning_rate": 3.417566420125673e-05, + "loss": 95.7342, + "step": 81080 + }, + { + "epoch": 0.3276138608661223, + "grad_norm": 605.1746826171875, + "learning_rate": 3.417369414104096e-05, + "loss": 56.1085, + "step": 81090 + }, + { + "epoch": 0.32765426213148996, + "grad_norm": 920.7807006835938, + "learning_rate": 3.417172380450173e-05, + "loss": 100.4185, + "step": 81100 + }, + { + "epoch": 0.3276946633968576, + "grad_norm": 681.1477661132812, + "learning_rate": 3.416975319167744e-05, + "loss": 110.3631, + "step": 81110 + }, + { + "epoch": 0.32773506466222524, + "grad_norm": 880.917724609375, + "learning_rate": 3.416778230260652e-05, + "loss": 83.8587, + "step": 81120 + }, + { + "epoch": 0.3277754659275929, + "grad_norm": 3794.484130859375, + "learning_rate": 3.416581113732739e-05, + "loss": 90.1029, + "step": 81130 + }, + { + "epoch": 0.32781586719296046, + "grad_norm": 605.5606689453125, + "learning_rate": 3.416383969587848e-05, + "loss": 109.6089, + "step": 81140 + }, + { + "epoch": 0.3278562684583281, + "grad_norm": 1181.2374267578125, + "learning_rate": 3.4161867978298225e-05, + "loss": 91.0324, + "step": 81150 + }, + { + "epoch": 0.32789666972369574, + "grad_norm": 548.6836547851562, + "learning_rate": 3.415989598462506e-05, + "loss": 73.4884, + "step": 81160 + }, + { + "epoch": 0.3279370709890634, + "grad_norm": 825.3505249023438, + "learning_rate": 3.415792371489743e-05, + "loss": 90.9006, + "step": 81170 + }, + { + "epoch": 0.327977472254431, + "grad_norm": 3280.72021484375, + "learning_rate": 3.415595116915379e-05, + "loss": 85.3079, + "step": 81180 + }, + { + "epoch": 0.32801787351979866, + "grad_norm": 1123.3895263671875, + "learning_rate": 3.41539783474326e-05, + "loss": 91.0187, + "step": 81190 + }, + { + "epoch": 0.32805827478516625, + "grad_norm": 507.9526062011719, + "learning_rate": 3.415200524977231e-05, + "loss": 120.7857, + "step": 81200 + }, + { + "epoch": 0.3280986760505339, + "grad_norm": 538.5643920898438, + "learning_rate": 3.415003187621139e-05, + "loss": 77.7316, + "step": 81210 + }, + { + "epoch": 0.32813907731590153, + "grad_norm": 1080.7686767578125, + "learning_rate": 3.414805822678831e-05, + "loss": 53.6483, + "step": 81220 + }, + { + "epoch": 0.32817947858126917, + "grad_norm": 326.1369934082031, + "learning_rate": 3.4146084301541565e-05, + "loss": 94.1041, + "step": 81230 + }, + { + "epoch": 0.3282198798466368, + "grad_norm": 1239.002685546875, + "learning_rate": 3.414411010050962e-05, + "loss": 98.7519, + "step": 81240 + }, + { + "epoch": 0.32826028111200445, + "grad_norm": 1970.9263916015625, + "learning_rate": 3.4142135623730954e-05, + "loss": 143.7692, + "step": 81250 + }, + { + "epoch": 0.32830068237737203, + "grad_norm": 749.7283325195312, + "learning_rate": 3.4140160871244076e-05, + "loss": 69.9376, + "step": 81260 + }, + { + "epoch": 0.3283410836427397, + "grad_norm": 1026.7694091796875, + "learning_rate": 3.413818584308749e-05, + "loss": 96.9799, + "step": 81270 + }, + { + "epoch": 0.3283814849081073, + "grad_norm": 562.2042846679688, + "learning_rate": 3.413621053929969e-05, + "loss": 73.3324, + "step": 81280 + }, + { + "epoch": 0.32842188617347495, + "grad_norm": 295.7390441894531, + "learning_rate": 3.4134234959919185e-05, + "loss": 69.7105, + "step": 81290 + }, + { + "epoch": 0.3284622874388426, + "grad_norm": 865.0875244140625, + "learning_rate": 3.41322591049845e-05, + "loss": 134.7197, + "step": 81300 + }, + { + "epoch": 0.32850268870421023, + "grad_norm": 642.0349731445312, + "learning_rate": 3.413028297453414e-05, + "loss": 102.1331, + "step": 81310 + }, + { + "epoch": 0.3285430899695779, + "grad_norm": 869.1055908203125, + "learning_rate": 3.4128306568606644e-05, + "loss": 78.2881, + "step": 81320 + }, + { + "epoch": 0.32858349123494546, + "grad_norm": 721.7630615234375, + "learning_rate": 3.4126329887240536e-05, + "loss": 89.0291, + "step": 81330 + }, + { + "epoch": 0.3286238925003131, + "grad_norm": 542.7608032226562, + "learning_rate": 3.412435293047435e-05, + "loss": 107.6715, + "step": 81340 + }, + { + "epoch": 0.32866429376568074, + "grad_norm": 1634.424560546875, + "learning_rate": 3.4122375698346636e-05, + "loss": 76.1174, + "step": 81350 + }, + { + "epoch": 0.3287046950310484, + "grad_norm": 616.1383056640625, + "learning_rate": 3.412039819089593e-05, + "loss": 98.9129, + "step": 81360 + }, + { + "epoch": 0.328745096296416, + "grad_norm": 1188.3433837890625, + "learning_rate": 3.41184204081608e-05, + "loss": 153.9769, + "step": 81370 + }, + { + "epoch": 0.32878549756178366, + "grad_norm": 537.6582641601562, + "learning_rate": 3.41164423501798e-05, + "loss": 82.9088, + "step": 81380 + }, + { + "epoch": 0.32882589882715124, + "grad_norm": 1194.1798095703125, + "learning_rate": 3.4114464016991476e-05, + "loss": 78.9409, + "step": 81390 + }, + { + "epoch": 0.3288663000925189, + "grad_norm": 767.0038452148438, + "learning_rate": 3.411248540863442e-05, + "loss": 75.4869, + "step": 81400 + }, + { + "epoch": 0.3289067013578865, + "grad_norm": 1243.857177734375, + "learning_rate": 3.411050652514719e-05, + "loss": 82.6459, + "step": 81410 + }, + { + "epoch": 0.32894710262325416, + "grad_norm": 595.5067749023438, + "learning_rate": 3.410852736656837e-05, + "loss": 69.3079, + "step": 81420 + }, + { + "epoch": 0.3289875038886218, + "grad_norm": 389.2837219238281, + "learning_rate": 3.410654793293654e-05, + "loss": 97.0277, + "step": 81430 + }, + { + "epoch": 0.32902790515398944, + "grad_norm": 714.5917358398438, + "learning_rate": 3.410456822429031e-05, + "loss": 92.2672, + "step": 81440 + }, + { + "epoch": 0.3290683064193571, + "grad_norm": 747.3195190429688, + "learning_rate": 3.410258824066825e-05, + "loss": 79.1761, + "step": 81450 + }, + { + "epoch": 0.32910870768472467, + "grad_norm": 1077.8055419921875, + "learning_rate": 3.4100607982108975e-05, + "loss": 104.3825, + "step": 81460 + }, + { + "epoch": 0.3291491089500923, + "grad_norm": 974.7617797851562, + "learning_rate": 3.409862744865109e-05, + "loss": 116.469, + "step": 81470 + }, + { + "epoch": 0.32918951021545995, + "grad_norm": 644.7344970703125, + "learning_rate": 3.4096646640333205e-05, + "loss": 74.1807, + "step": 81480 + }, + { + "epoch": 0.3292299114808276, + "grad_norm": 447.3160095214844, + "learning_rate": 3.4094665557193934e-05, + "loss": 96.8065, + "step": 81490 + }, + { + "epoch": 0.3292703127461952, + "grad_norm": 1215.2684326171875, + "learning_rate": 3.4092684199271896e-05, + "loss": 93.6477, + "step": 81500 + }, + { + "epoch": 0.32931071401156287, + "grad_norm": 1265.0263671875, + "learning_rate": 3.409070256660573e-05, + "loss": 105.2999, + "step": 81510 + }, + { + "epoch": 0.32935111527693045, + "grad_norm": 719.319091796875, + "learning_rate": 3.408872065923406e-05, + "loss": 91.0087, + "step": 81520 + }, + { + "epoch": 0.3293915165422981, + "grad_norm": 383.0025329589844, + "learning_rate": 3.408673847719553e-05, + "loss": 86.5457, + "step": 81530 + }, + { + "epoch": 0.32943191780766573, + "grad_norm": 691.1478881835938, + "learning_rate": 3.408475602052878e-05, + "loss": 75.2497, + "step": 81540 + }, + { + "epoch": 0.32947231907303337, + "grad_norm": 577.31982421875, + "learning_rate": 3.408277328927246e-05, + "loss": 81.2548, + "step": 81550 + }, + { + "epoch": 0.329512720338401, + "grad_norm": 1709.5948486328125, + "learning_rate": 3.408079028346523e-05, + "loss": 79.5622, + "step": 81560 + }, + { + "epoch": 0.32955312160376865, + "grad_norm": 1045.0361328125, + "learning_rate": 3.407880700314574e-05, + "loss": 99.8883, + "step": 81570 + }, + { + "epoch": 0.32959352286913624, + "grad_norm": 1143.74609375, + "learning_rate": 3.407682344835266e-05, + "loss": 93.4449, + "step": 81580 + }, + { + "epoch": 0.3296339241345039, + "grad_norm": 541.8212280273438, + "learning_rate": 3.407483961912465e-05, + "loss": 91.8025, + "step": 81590 + }, + { + "epoch": 0.3296743253998715, + "grad_norm": 1524.729736328125, + "learning_rate": 3.407285551550041e-05, + "loss": 89.4133, + "step": 81600 + }, + { + "epoch": 0.32971472666523916, + "grad_norm": 455.4547424316406, + "learning_rate": 3.40708711375186e-05, + "loss": 111.9685, + "step": 81610 + }, + { + "epoch": 0.3297551279306068, + "grad_norm": 670.6732177734375, + "learning_rate": 3.4068886485217915e-05, + "loss": 68.1608, + "step": 81620 + }, + { + "epoch": 0.32979552919597444, + "grad_norm": 684.7296752929688, + "learning_rate": 3.406690155863704e-05, + "loss": 86.8711, + "step": 81630 + }, + { + "epoch": 0.3298359304613421, + "grad_norm": 484.4308166503906, + "learning_rate": 3.406491635781468e-05, + "loss": 85.4779, + "step": 81640 + }, + { + "epoch": 0.32987633172670966, + "grad_norm": 764.1007690429688, + "learning_rate": 3.406293088278953e-05, + "loss": 77.848, + "step": 81650 + }, + { + "epoch": 0.3299167329920773, + "grad_norm": 282.45166015625, + "learning_rate": 3.406094513360031e-05, + "loss": 76.6384, + "step": 81660 + }, + { + "epoch": 0.32995713425744494, + "grad_norm": 911.144775390625, + "learning_rate": 3.4058959110285724e-05, + "loss": 105.8706, + "step": 81670 + }, + { + "epoch": 0.3299975355228126, + "grad_norm": 1107.4212646484375, + "learning_rate": 3.4056972812884495e-05, + "loss": 146.4026, + "step": 81680 + }, + { + "epoch": 0.3300379367881802, + "grad_norm": 775.5628662109375, + "learning_rate": 3.405498624143533e-05, + "loss": 117.832, + "step": 81690 + }, + { + "epoch": 0.33007833805354786, + "grad_norm": 1377.5714111328125, + "learning_rate": 3.405299939597699e-05, + "loss": 115.358, + "step": 81700 + }, + { + "epoch": 0.33011873931891544, + "grad_norm": 750.0800170898438, + "learning_rate": 3.405101227654818e-05, + "loss": 119.2559, + "step": 81710 + }, + { + "epoch": 0.3301591405842831, + "grad_norm": 1357.65185546875, + "learning_rate": 3.404902488318766e-05, + "loss": 90.8858, + "step": 81720 + }, + { + "epoch": 0.3301995418496507, + "grad_norm": 512.173583984375, + "learning_rate": 3.404703721593416e-05, + "loss": 98.2772, + "step": 81730 + }, + { + "epoch": 0.33023994311501836, + "grad_norm": 850.8135986328125, + "learning_rate": 3.404504927482644e-05, + "loss": 110.2191, + "step": 81740 + }, + { + "epoch": 0.330280344380386, + "grad_norm": 1433.7340087890625, + "learning_rate": 3.4043061059903254e-05, + "loss": 93.5103, + "step": 81750 + }, + { + "epoch": 0.33032074564575364, + "grad_norm": 905.814453125, + "learning_rate": 3.404107257120336e-05, + "loss": 103.1108, + "step": 81760 + }, + { + "epoch": 0.3303611469111213, + "grad_norm": 1280.8375244140625, + "learning_rate": 3.4039083808765534e-05, + "loss": 101.5704, + "step": 81770 + }, + { + "epoch": 0.33040154817648887, + "grad_norm": 448.72723388671875, + "learning_rate": 3.403709477262853e-05, + "loss": 132.3038, + "step": 81780 + }, + { + "epoch": 0.3304419494418565, + "grad_norm": 983.0819091796875, + "learning_rate": 3.403510546283115e-05, + "loss": 87.6671, + "step": 81790 + }, + { + "epoch": 0.33048235070722415, + "grad_norm": 1228.34228515625, + "learning_rate": 3.403311587941215e-05, + "loss": 126.6853, + "step": 81800 + }, + { + "epoch": 0.3305227519725918, + "grad_norm": 460.838623046875, + "learning_rate": 3.403112602241034e-05, + "loss": 86.2539, + "step": 81810 + }, + { + "epoch": 0.33056315323795943, + "grad_norm": 563.1288452148438, + "learning_rate": 3.40291358918645e-05, + "loss": 82.2361, + "step": 81820 + }, + { + "epoch": 0.33060355450332707, + "grad_norm": 668.7342529296875, + "learning_rate": 3.402714548781344e-05, + "loss": 96.9953, + "step": 81830 + }, + { + "epoch": 0.33064395576869465, + "grad_norm": 2032.5028076171875, + "learning_rate": 3.402515481029595e-05, + "loss": 121.7971, + "step": 81840 + }, + { + "epoch": 0.3306843570340623, + "grad_norm": 1102.6875, + "learning_rate": 3.402316385935085e-05, + "loss": 117.2429, + "step": 81850 + }, + { + "epoch": 0.33072475829942993, + "grad_norm": 991.3683471679688, + "learning_rate": 3.402117263501695e-05, + "loss": 117.3886, + "step": 81860 + }, + { + "epoch": 0.3307651595647976, + "grad_norm": 647.4558715820312, + "learning_rate": 3.4019181137333066e-05, + "loss": 91.3662, + "step": 81870 + }, + { + "epoch": 0.3308055608301652, + "grad_norm": 842.1040649414062, + "learning_rate": 3.4017189366338036e-05, + "loss": 103.8474, + "step": 81880 + }, + { + "epoch": 0.33084596209553285, + "grad_norm": 635.5745239257812, + "learning_rate": 3.401519732207068e-05, + "loss": 40.4842, + "step": 81890 + }, + { + "epoch": 0.33088636336090044, + "grad_norm": 990.4810180664062, + "learning_rate": 3.401320500456984e-05, + "loss": 99.5591, + "step": 81900 + }, + { + "epoch": 0.3309267646262681, + "grad_norm": 552.2075805664062, + "learning_rate": 3.401121241387435e-05, + "loss": 100.2366, + "step": 81910 + }, + { + "epoch": 0.3309671658916357, + "grad_norm": 1086.6016845703125, + "learning_rate": 3.400921955002306e-05, + "loss": 119.2104, + "step": 81920 + }, + { + "epoch": 0.33100756715700336, + "grad_norm": 793.2503051757812, + "learning_rate": 3.4007226413054824e-05, + "loss": 81.8074, + "step": 81930 + }, + { + "epoch": 0.331047968422371, + "grad_norm": 1086.0770263671875, + "learning_rate": 3.400523300300849e-05, + "loss": 113.0809, + "step": 81940 + }, + { + "epoch": 0.33108836968773864, + "grad_norm": 747.408447265625, + "learning_rate": 3.4003239319922935e-05, + "loss": 71.8625, + "step": 81950 + }, + { + "epoch": 0.3311287709531063, + "grad_norm": 483.36322021484375, + "learning_rate": 3.4001245363837025e-05, + "loss": 88.8963, + "step": 81960 + }, + { + "epoch": 0.33116917221847386, + "grad_norm": 736.8058471679688, + "learning_rate": 3.3999251134789624e-05, + "loss": 75.7534, + "step": 81970 + }, + { + "epoch": 0.3312095734838415, + "grad_norm": 1581.167724609375, + "learning_rate": 3.3997256632819616e-05, + "loss": 110.1328, + "step": 81980 + }, + { + "epoch": 0.33124997474920914, + "grad_norm": 684.072998046875, + "learning_rate": 3.399526185796588e-05, + "loss": 73.6931, + "step": 81990 + }, + { + "epoch": 0.3312903760145768, + "grad_norm": 913.3223266601562, + "learning_rate": 3.399326681026731e-05, + "loss": 91.2296, + "step": 82000 + }, + { + "epoch": 0.3313307772799444, + "grad_norm": 804.4471435546875, + "learning_rate": 3.399127148976281e-05, + "loss": 102.2406, + "step": 82010 + }, + { + "epoch": 0.33137117854531206, + "grad_norm": 590.2749633789062, + "learning_rate": 3.398927589649125e-05, + "loss": 43.5383, + "step": 82020 + }, + { + "epoch": 0.33141157981067965, + "grad_norm": 572.0638427734375, + "learning_rate": 3.3987280030491564e-05, + "loss": 94.2182, + "step": 82030 + }, + { + "epoch": 0.3314519810760473, + "grad_norm": 1138.6346435546875, + "learning_rate": 3.398528389180265e-05, + "loss": 85.5984, + "step": 82040 + }, + { + "epoch": 0.3314923823414149, + "grad_norm": 2685.7275390625, + "learning_rate": 3.398328748046343e-05, + "loss": 105.5459, + "step": 82050 + }, + { + "epoch": 0.33153278360678257, + "grad_norm": 842.0713500976562, + "learning_rate": 3.3981290796512825e-05, + "loss": 78.8763, + "step": 82060 + }, + { + "epoch": 0.3315731848721502, + "grad_norm": 529.815185546875, + "learning_rate": 3.397929383998975e-05, + "loss": 88.9784, + "step": 82070 + }, + { + "epoch": 0.33161358613751785, + "grad_norm": 864.5286254882812, + "learning_rate": 3.3977296610933145e-05, + "loss": 76.7248, + "step": 82080 + }, + { + "epoch": 0.33165398740288543, + "grad_norm": 647.1029052734375, + "learning_rate": 3.397529910938195e-05, + "loss": 79.882, + "step": 82090 + }, + { + "epoch": 0.33169438866825307, + "grad_norm": 306.9989929199219, + "learning_rate": 3.3973301335375104e-05, + "loss": 89.8121, + "step": 82100 + }, + { + "epoch": 0.3317347899336207, + "grad_norm": 490.67364501953125, + "learning_rate": 3.3971303288951554e-05, + "loss": 113.188, + "step": 82110 + }, + { + "epoch": 0.33177519119898835, + "grad_norm": 781.9036865234375, + "learning_rate": 3.3969304970150255e-05, + "loss": 72.8038, + "step": 82120 + }, + { + "epoch": 0.331815592464356, + "grad_norm": 899.6373291015625, + "learning_rate": 3.396730637901016e-05, + "loss": 104.1216, + "step": 82130 + }, + { + "epoch": 0.33185599372972363, + "grad_norm": 927.2955322265625, + "learning_rate": 3.396530751557024e-05, + "loss": 109.0089, + "step": 82140 + }, + { + "epoch": 0.33189639499509127, + "grad_norm": 753.3737182617188, + "learning_rate": 3.396330837986946e-05, + "loss": 110.2347, + "step": 82150 + }, + { + "epoch": 0.33193679626045886, + "grad_norm": 437.6397705078125, + "learning_rate": 3.396130897194679e-05, + "loss": 76.3214, + "step": 82160 + }, + { + "epoch": 0.3319771975258265, + "grad_norm": 812.8916625976562, + "learning_rate": 3.395930929184122e-05, + "loss": 66.0082, + "step": 82170 + }, + { + "epoch": 0.33201759879119414, + "grad_norm": 1167.2698974609375, + "learning_rate": 3.395730933959172e-05, + "loss": 87.5702, + "step": 82180 + }, + { + "epoch": 0.3320580000565618, + "grad_norm": 640.4153442382812, + "learning_rate": 3.3955309115237296e-05, + "loss": 77.2734, + "step": 82190 + }, + { + "epoch": 0.3320984013219294, + "grad_norm": 630.8369140625, + "learning_rate": 3.395330861881693e-05, + "loss": 89.8761, + "step": 82200 + }, + { + "epoch": 0.33213880258729706, + "grad_norm": 863.7056274414062, + "learning_rate": 3.3951307850369634e-05, + "loss": 57.4858, + "step": 82210 + }, + { + "epoch": 0.33217920385266464, + "grad_norm": 565.8027954101562, + "learning_rate": 3.394930680993441e-05, + "loss": 119.0236, + "step": 82220 + }, + { + "epoch": 0.3322196051180323, + "grad_norm": 577.5712280273438, + "learning_rate": 3.394730549755027e-05, + "loss": 79.8449, + "step": 82230 + }, + { + "epoch": 0.3322600063833999, + "grad_norm": 547.7821655273438, + "learning_rate": 3.3945303913256216e-05, + "loss": 89.3276, + "step": 82240 + }, + { + "epoch": 0.33230040764876756, + "grad_norm": 706.7440795898438, + "learning_rate": 3.39433020570913e-05, + "loss": 102.9866, + "step": 82250 + }, + { + "epoch": 0.3323408089141352, + "grad_norm": 925.5498657226562, + "learning_rate": 3.394129992909452e-05, + "loss": 92.1777, + "step": 82260 + }, + { + "epoch": 0.33238121017950284, + "grad_norm": 4258.736328125, + "learning_rate": 3.3939297529304926e-05, + "loss": 93.1028, + "step": 82270 + }, + { + "epoch": 0.3324216114448705, + "grad_norm": 641.3876342773438, + "learning_rate": 3.393729485776154e-05, + "loss": 128.9514, + "step": 82280 + }, + { + "epoch": 0.33246201271023806, + "grad_norm": 895.9146728515625, + "learning_rate": 3.393529191450342e-05, + "loss": 90.2117, + "step": 82290 + }, + { + "epoch": 0.3325024139756057, + "grad_norm": 613.1245727539062, + "learning_rate": 3.393328869956962e-05, + "loss": 64.5785, + "step": 82300 + }, + { + "epoch": 0.33254281524097334, + "grad_norm": 397.8802185058594, + "learning_rate": 3.393128521299917e-05, + "loss": 70.581, + "step": 82310 + }, + { + "epoch": 0.332583216506341, + "grad_norm": 752.7325439453125, + "learning_rate": 3.392928145483115e-05, + "loss": 90.5952, + "step": 82320 + }, + { + "epoch": 0.3326236177717086, + "grad_norm": 767.4852905273438, + "learning_rate": 3.392727742510462e-05, + "loss": 110.1143, + "step": 82330 + }, + { + "epoch": 0.33266401903707626, + "grad_norm": 786.9341430664062, + "learning_rate": 3.3925273123858644e-05, + "loss": 91.6902, + "step": 82340 + }, + { + "epoch": 0.33270442030244385, + "grad_norm": 638.873291015625, + "learning_rate": 3.3923268551132294e-05, + "loss": 75.5683, + "step": 82350 + }, + { + "epoch": 0.3327448215678115, + "grad_norm": 1746.9228515625, + "learning_rate": 3.392126370696466e-05, + "loss": 127.3457, + "step": 82360 + }, + { + "epoch": 0.33278522283317913, + "grad_norm": 715.8932495117188, + "learning_rate": 3.391925859139482e-05, + "loss": 84.4223, + "step": 82370 + }, + { + "epoch": 0.33282562409854677, + "grad_norm": 865.2219848632812, + "learning_rate": 3.391725320446187e-05, + "loss": 98.9219, + "step": 82380 + }, + { + "epoch": 0.3328660253639144, + "grad_norm": 561.4866333007812, + "learning_rate": 3.3915247546204905e-05, + "loss": 96.9254, + "step": 82390 + }, + { + "epoch": 0.33290642662928205, + "grad_norm": 848.194580078125, + "learning_rate": 3.391324161666302e-05, + "loss": 101.265, + "step": 82400 + }, + { + "epoch": 0.33294682789464963, + "grad_norm": 1263.443115234375, + "learning_rate": 3.3911235415875326e-05, + "loss": 119.1247, + "step": 82410 + }, + { + "epoch": 0.3329872291600173, + "grad_norm": 1914.959228515625, + "learning_rate": 3.390922894388094e-05, + "loss": 81.7236, + "step": 82420 + }, + { + "epoch": 0.3330276304253849, + "grad_norm": 797.5968017578125, + "learning_rate": 3.390722220071897e-05, + "loss": 85.5707, + "step": 82430 + }, + { + "epoch": 0.33306803169075255, + "grad_norm": 866.651123046875, + "learning_rate": 3.390521518642855e-05, + "loss": 105.0089, + "step": 82440 + }, + { + "epoch": 0.3331084329561202, + "grad_norm": 502.738525390625, + "learning_rate": 3.39032079010488e-05, + "loss": 85.7633, + "step": 82450 + }, + { + "epoch": 0.33314883422148783, + "grad_norm": 1309.3475341796875, + "learning_rate": 3.390120034461886e-05, + "loss": 138.5502, + "step": 82460 + }, + { + "epoch": 0.3331892354868555, + "grad_norm": 688.4195556640625, + "learning_rate": 3.389919251717785e-05, + "loss": 135.6846, + "step": 82470 + }, + { + "epoch": 0.33322963675222306, + "grad_norm": 685.7501220703125, + "learning_rate": 3.3897184418764925e-05, + "loss": 97.6531, + "step": 82480 + }, + { + "epoch": 0.3332700380175907, + "grad_norm": 679.0531616210938, + "learning_rate": 3.389517604941924e-05, + "loss": 104.5268, + "step": 82490 + }, + { + "epoch": 0.33331043928295834, + "grad_norm": 1466.986328125, + "learning_rate": 3.3893167409179945e-05, + "loss": 88.4949, + "step": 82500 + }, + { + "epoch": 0.333350840548326, + "grad_norm": 1146.657470703125, + "learning_rate": 3.389115849808621e-05, + "loss": 106.2228, + "step": 82510 + }, + { + "epoch": 0.3333912418136936, + "grad_norm": 815.4164428710938, + "learning_rate": 3.3889149316177167e-05, + "loss": 108.8307, + "step": 82520 + }, + { + "epoch": 0.33343164307906126, + "grad_norm": 597.8690795898438, + "learning_rate": 3.388713986349202e-05, + "loss": 87.5178, + "step": 82530 + }, + { + "epoch": 0.33347204434442884, + "grad_norm": 939.6650390625, + "learning_rate": 3.388513014006993e-05, + "loss": 78.0222, + "step": 82540 + }, + { + "epoch": 0.3335124456097965, + "grad_norm": 745.924072265625, + "learning_rate": 3.388312014595008e-05, + "loss": 108.2532, + "step": 82550 + }, + { + "epoch": 0.3335528468751641, + "grad_norm": 582.1250610351562, + "learning_rate": 3.3881109881171656e-05, + "loss": 94.6473, + "step": 82560 + }, + { + "epoch": 0.33359324814053176, + "grad_norm": 532.5968017578125, + "learning_rate": 3.387909934577384e-05, + "loss": 76.7973, + "step": 82570 + }, + { + "epoch": 0.3336336494058994, + "grad_norm": 623.2122192382812, + "learning_rate": 3.387708853979585e-05, + "loss": 69.1601, + "step": 82580 + }, + { + "epoch": 0.33367405067126704, + "grad_norm": 1112.3890380859375, + "learning_rate": 3.3875077463276865e-05, + "loss": 103.8767, + "step": 82590 + }, + { + "epoch": 0.3337144519366347, + "grad_norm": 1004.6741333007812, + "learning_rate": 3.38730661162561e-05, + "loss": 99.4126, + "step": 82600 + }, + { + "epoch": 0.33375485320200227, + "grad_norm": 651.276123046875, + "learning_rate": 3.387105449877278e-05, + "loss": 63.5688, + "step": 82610 + }, + { + "epoch": 0.3337952544673699, + "grad_norm": 571.7739868164062, + "learning_rate": 3.38690426108661e-05, + "loss": 94.6158, + "step": 82620 + }, + { + "epoch": 0.33383565573273755, + "grad_norm": 600.306640625, + "learning_rate": 3.3867030452575296e-05, + "loss": 93.8475, + "step": 82630 + }, + { + "epoch": 0.3338760569981052, + "grad_norm": 818.9310302734375, + "learning_rate": 3.3865018023939595e-05, + "loss": 120.521, + "step": 82640 + }, + { + "epoch": 0.3339164582634728, + "grad_norm": 445.28155517578125, + "learning_rate": 3.386300532499823e-05, + "loss": 103.5589, + "step": 82650 + }, + { + "epoch": 0.33395685952884047, + "grad_norm": 455.9356689453125, + "learning_rate": 3.386099235579044e-05, + "loss": 119.9553, + "step": 82660 + }, + { + "epoch": 0.33399726079420805, + "grad_norm": 1061.396240234375, + "learning_rate": 3.385897911635547e-05, + "loss": 95.5534, + "step": 82670 + }, + { + "epoch": 0.3340376620595757, + "grad_norm": 478.8028869628906, + "learning_rate": 3.385696560673257e-05, + "loss": 72.2221, + "step": 82680 + }, + { + "epoch": 0.33407806332494333, + "grad_norm": 1176.6826171875, + "learning_rate": 3.385495182696098e-05, + "loss": 82.856, + "step": 82690 + }, + { + "epoch": 0.33411846459031097, + "grad_norm": 576.0626831054688, + "learning_rate": 3.3852937777079976e-05, + "loss": 48.9711, + "step": 82700 + }, + { + "epoch": 0.3341588658556786, + "grad_norm": 810.8895874023438, + "learning_rate": 3.385092345712882e-05, + "loss": 63.2003, + "step": 82710 + }, + { + "epoch": 0.33419926712104625, + "grad_norm": 933.2525634765625, + "learning_rate": 3.3848908867146784e-05, + "loss": 107.334, + "step": 82720 + }, + { + "epoch": 0.33423966838641384, + "grad_norm": 671.3782958984375, + "learning_rate": 3.3846894007173135e-05, + "loss": 87.3292, + "step": 82730 + }, + { + "epoch": 0.3342800696517815, + "grad_norm": 1546.299560546875, + "learning_rate": 3.384487887724716e-05, + "loss": 103.1651, + "step": 82740 + }, + { + "epoch": 0.3343204709171491, + "grad_norm": 847.4645385742188, + "learning_rate": 3.384286347740814e-05, + "loss": 80.8093, + "step": 82750 + }, + { + "epoch": 0.33436087218251676, + "grad_norm": 1101.0120849609375, + "learning_rate": 3.3840847807695367e-05, + "loss": 90.2314, + "step": 82760 + }, + { + "epoch": 0.3344012734478844, + "grad_norm": 733.326171875, + "learning_rate": 3.383883186814815e-05, + "loss": 76.9799, + "step": 82770 + }, + { + "epoch": 0.33444167471325204, + "grad_norm": 574.5037231445312, + "learning_rate": 3.3836815658805776e-05, + "loss": 71.756, + "step": 82780 + }, + { + "epoch": 0.3344820759786197, + "grad_norm": 1733.629150390625, + "learning_rate": 3.383479917970756e-05, + "loss": 107.1346, + "step": 82790 + }, + { + "epoch": 0.33452247724398726, + "grad_norm": 676.9070434570312, + "learning_rate": 3.3832782430892806e-05, + "loss": 88.2184, + "step": 82800 + }, + { + "epoch": 0.3345628785093549, + "grad_norm": 860.1717529296875, + "learning_rate": 3.383076541240084e-05, + "loss": 91.0974, + "step": 82810 + }, + { + "epoch": 0.33460327977472254, + "grad_norm": 359.771728515625, + "learning_rate": 3.3828748124270983e-05, + "loss": 94.2763, + "step": 82820 + }, + { + "epoch": 0.3346436810400902, + "grad_norm": 1193.3929443359375, + "learning_rate": 3.3826730566542555e-05, + "loss": 78.1795, + "step": 82830 + }, + { + "epoch": 0.3346840823054578, + "grad_norm": 608.0477905273438, + "learning_rate": 3.3824712739254904e-05, + "loss": 91.2345, + "step": 82840 + }, + { + "epoch": 0.33472448357082546, + "grad_norm": 712.8842163085938, + "learning_rate": 3.382269464244736e-05, + "loss": 80.289, + "step": 82850 + }, + { + "epoch": 0.33476488483619304, + "grad_norm": 727.74951171875, + "learning_rate": 3.382067627615926e-05, + "loss": 105.4943, + "step": 82860 + }, + { + "epoch": 0.3348052861015607, + "grad_norm": 493.4693298339844, + "learning_rate": 3.381865764042997e-05, + "loss": 109.5427, + "step": 82870 + }, + { + "epoch": 0.3348456873669283, + "grad_norm": 1741.708740234375, + "learning_rate": 3.381663873529883e-05, + "loss": 95.5162, + "step": 82880 + }, + { + "epoch": 0.33488608863229596, + "grad_norm": 464.9770202636719, + "learning_rate": 3.3814619560805205e-05, + "loss": 134.0782, + "step": 82890 + }, + { + "epoch": 0.3349264898976636, + "grad_norm": 772.6989135742188, + "learning_rate": 3.381260011698846e-05, + "loss": 62.8096, + "step": 82900 + }, + { + "epoch": 0.33496689116303124, + "grad_norm": 580.4071044921875, + "learning_rate": 3.3810580403887965e-05, + "loss": 87.5291, + "step": 82910 + }, + { + "epoch": 0.3350072924283989, + "grad_norm": 594.6162719726562, + "learning_rate": 3.3808560421543094e-05, + "loss": 79.712, + "step": 82920 + }, + { + "epoch": 0.33504769369376647, + "grad_norm": 364.3687744140625, + "learning_rate": 3.380654016999323e-05, + "loss": 82.58, + "step": 82930 + }, + { + "epoch": 0.3350880949591341, + "grad_norm": 573.33642578125, + "learning_rate": 3.3804519649277754e-05, + "loss": 85.6136, + "step": 82940 + }, + { + "epoch": 0.33512849622450175, + "grad_norm": 769.1336669921875, + "learning_rate": 3.3802498859436066e-05, + "loss": 99.9758, + "step": 82950 + }, + { + "epoch": 0.3351688974898694, + "grad_norm": 2037.6842041015625, + "learning_rate": 3.3800477800507555e-05, + "loss": 102.6061, + "step": 82960 + }, + { + "epoch": 0.33520929875523703, + "grad_norm": 964.8970336914062, + "learning_rate": 3.379845647253162e-05, + "loss": 91.3694, + "step": 82970 + }, + { + "epoch": 0.33524970002060467, + "grad_norm": 756.004638671875, + "learning_rate": 3.3796434875547675e-05, + "loss": 74.7638, + "step": 82980 + }, + { + "epoch": 0.33529010128597225, + "grad_norm": 1122.3616943359375, + "learning_rate": 3.379441300959513e-05, + "loss": 114.6603, + "step": 82990 + }, + { + "epoch": 0.3353305025513399, + "grad_norm": 1017.9636840820312, + "learning_rate": 3.37923908747134e-05, + "loss": 104.5223, + "step": 83000 + }, + { + "epoch": 0.33537090381670753, + "grad_norm": 543.82373046875, + "learning_rate": 3.379036847094191e-05, + "loss": 108.052, + "step": 83010 + }, + { + "epoch": 0.3354113050820752, + "grad_norm": 495.5430603027344, + "learning_rate": 3.378834579832008e-05, + "loss": 100.0261, + "step": 83020 + }, + { + "epoch": 0.3354517063474428, + "grad_norm": 864.05078125, + "learning_rate": 3.378632285688736e-05, + "loss": 113.4268, + "step": 83030 + }, + { + "epoch": 0.33549210761281045, + "grad_norm": 1108.4859619140625, + "learning_rate": 3.378429964668318e-05, + "loss": 81.4026, + "step": 83040 + }, + { + "epoch": 0.33553250887817804, + "grad_norm": 950.2079467773438, + "learning_rate": 3.378227616774697e-05, + "loss": 73.485, + "step": 83050 + }, + { + "epoch": 0.3355729101435457, + "grad_norm": 1102.7459716796875, + "learning_rate": 3.37802524201182e-05, + "loss": 98.2308, + "step": 83060 + }, + { + "epoch": 0.3356133114089133, + "grad_norm": 464.7413635253906, + "learning_rate": 3.377822840383632e-05, + "loss": 116.2588, + "step": 83070 + }, + { + "epoch": 0.33565371267428096, + "grad_norm": 456.1861572265625, + "learning_rate": 3.377620411894077e-05, + "loss": 68.7959, + "step": 83080 + }, + { + "epoch": 0.3356941139396486, + "grad_norm": 330.5731201171875, + "learning_rate": 3.377417956547103e-05, + "loss": 119.5674, + "step": 83090 + }, + { + "epoch": 0.33573451520501624, + "grad_norm": 652.9197387695312, + "learning_rate": 3.377215474346657e-05, + "loss": 82.99, + "step": 83100 + }, + { + "epoch": 0.3357749164703839, + "grad_norm": 2032.3016357421875, + "learning_rate": 3.377012965296687e-05, + "loss": 78.5033, + "step": 83110 + }, + { + "epoch": 0.33581531773575146, + "grad_norm": 767.1682739257812, + "learning_rate": 3.3768104294011394e-05, + "loss": 103.5421, + "step": 83120 + }, + { + "epoch": 0.3358557190011191, + "grad_norm": 911.9225463867188, + "learning_rate": 3.376607866663964e-05, + "loss": 77.2958, + "step": 83130 + }, + { + "epoch": 0.33589612026648674, + "grad_norm": 966.0482788085938, + "learning_rate": 3.3764052770891095e-05, + "loss": 95.5171, + "step": 83140 + }, + { + "epoch": 0.3359365215318544, + "grad_norm": 913.3355102539062, + "learning_rate": 3.376202660680526e-05, + "loss": 96.093, + "step": 83150 + }, + { + "epoch": 0.335976922797222, + "grad_norm": 212.34030151367188, + "learning_rate": 3.376000017442162e-05, + "loss": 88.2281, + "step": 83160 + }, + { + "epoch": 0.33601732406258966, + "grad_norm": 637.5535888671875, + "learning_rate": 3.37579734737797e-05, + "loss": 74.6872, + "step": 83170 + }, + { + "epoch": 0.33605772532795725, + "grad_norm": 2463.731201171875, + "learning_rate": 3.3755946504919e-05, + "loss": 148.0883, + "step": 83180 + }, + { + "epoch": 0.3360981265933249, + "grad_norm": 884.7205200195312, + "learning_rate": 3.375391926787905e-05, + "loss": 92.6693, + "step": 83190 + }, + { + "epoch": 0.3361385278586925, + "grad_norm": 981.3764038085938, + "learning_rate": 3.375189176269935e-05, + "loss": 90.9363, + "step": 83200 + }, + { + "epoch": 0.33617892912406017, + "grad_norm": 866.52099609375, + "learning_rate": 3.374986398941944e-05, + "loss": 141.9244, + "step": 83210 + }, + { + "epoch": 0.3362193303894278, + "grad_norm": 779.2120971679688, + "learning_rate": 3.374783594807887e-05, + "loss": 92.2276, + "step": 83220 + }, + { + "epoch": 0.33625973165479545, + "grad_norm": 451.9245910644531, + "learning_rate": 3.3745807638717144e-05, + "loss": 82.1897, + "step": 83230 + }, + { + "epoch": 0.3363001329201631, + "grad_norm": 5185.31494140625, + "learning_rate": 3.374377906137383e-05, + "loss": 74.8927, + "step": 83240 + }, + { + "epoch": 0.33634053418553067, + "grad_norm": 1955.7833251953125, + "learning_rate": 3.3741750216088465e-05, + "loss": 99.7349, + "step": 83250 + }, + { + "epoch": 0.3363809354508983, + "grad_norm": 503.9779357910156, + "learning_rate": 3.37397211029006e-05, + "loss": 100.7958, + "step": 83260 + }, + { + "epoch": 0.33642133671626595, + "grad_norm": 544.5552368164062, + "learning_rate": 3.373769172184981e-05, + "loss": 99.5987, + "step": 83270 + }, + { + "epoch": 0.3364617379816336, + "grad_norm": 495.8514099121094, + "learning_rate": 3.3735662072975635e-05, + "loss": 81.8363, + "step": 83280 + }, + { + "epoch": 0.33650213924700123, + "grad_norm": 680.1526489257812, + "learning_rate": 3.373363215631766e-05, + "loss": 63.9807, + "step": 83290 + }, + { + "epoch": 0.33654254051236887, + "grad_norm": 1508.8636474609375, + "learning_rate": 3.373160197191546e-05, + "loss": 83.47, + "step": 83300 + }, + { + "epoch": 0.33658294177773646, + "grad_norm": 503.3882141113281, + "learning_rate": 3.3729571519808606e-05, + "loss": 79.5202, + "step": 83310 + }, + { + "epoch": 0.3366233430431041, + "grad_norm": 546.6798706054688, + "learning_rate": 3.372754080003669e-05, + "loss": 110.26, + "step": 83320 + }, + { + "epoch": 0.33666374430847174, + "grad_norm": 695.0971069335938, + "learning_rate": 3.372550981263929e-05, + "loss": 82.5039, + "step": 83330 + }, + { + "epoch": 0.3367041455738394, + "grad_norm": 2818.900146484375, + "learning_rate": 3.3723478557656016e-05, + "loss": 101.7525, + "step": 83340 + }, + { + "epoch": 0.336744546839207, + "grad_norm": 337.97900390625, + "learning_rate": 3.3721447035126464e-05, + "loss": 94.9201, + "step": 83350 + }, + { + "epoch": 0.33678494810457466, + "grad_norm": 590.9136962890625, + "learning_rate": 3.371941524509024e-05, + "loss": 79.7133, + "step": 83360 + }, + { + "epoch": 0.33682534936994224, + "grad_norm": 1162.3157958984375, + "learning_rate": 3.371738318758694e-05, + "loss": 116.3378, + "step": 83370 + }, + { + "epoch": 0.3368657506353099, + "grad_norm": 1359.267578125, + "learning_rate": 3.37153508626562e-05, + "loss": 115.918, + "step": 83380 + }, + { + "epoch": 0.3369061519006775, + "grad_norm": 1147.1441650390625, + "learning_rate": 3.3713318270337643e-05, + "loss": 113.0778, + "step": 83390 + }, + { + "epoch": 0.33694655316604516, + "grad_norm": 731.8208618164062, + "learning_rate": 3.3711285410670876e-05, + "loss": 68.6027, + "step": 83400 + }, + { + "epoch": 0.3369869544314128, + "grad_norm": 612.0326538085938, + "learning_rate": 3.370925228369554e-05, + "loss": 113.5967, + "step": 83410 + }, + { + "epoch": 0.33702735569678044, + "grad_norm": 515.0682983398438, + "learning_rate": 3.370721888945127e-05, + "loss": 72.8747, + "step": 83420 + }, + { + "epoch": 0.3370677569621481, + "grad_norm": 668.2006225585938, + "learning_rate": 3.370518522797772e-05, + "loss": 52.1764, + "step": 83430 + }, + { + "epoch": 0.33710815822751566, + "grad_norm": 748.3060913085938, + "learning_rate": 3.370315129931453e-05, + "loss": 119.4054, + "step": 83440 + }, + { + "epoch": 0.3371485594928833, + "grad_norm": 393.0621032714844, + "learning_rate": 3.370111710350134e-05, + "loss": 67.754, + "step": 83450 + }, + { + "epoch": 0.33718896075825094, + "grad_norm": 1181.29248046875, + "learning_rate": 3.369908264057783e-05, + "loss": 100.8247, + "step": 83460 + }, + { + "epoch": 0.3372293620236186, + "grad_norm": 993.7855834960938, + "learning_rate": 3.3697047910583635e-05, + "loss": 83.7059, + "step": 83470 + }, + { + "epoch": 0.3372697632889862, + "grad_norm": 761.2182006835938, + "learning_rate": 3.369501291355845e-05, + "loss": 72.9, + "step": 83480 + }, + { + "epoch": 0.33731016455435386, + "grad_norm": 823.867919921875, + "learning_rate": 3.369297764954194e-05, + "loss": 77.3609, + "step": 83490 + }, + { + "epoch": 0.33735056581972145, + "grad_norm": 1464.65087890625, + "learning_rate": 3.369094211857378e-05, + "loss": 115.0996, + "step": 83500 + }, + { + "epoch": 0.3373909670850891, + "grad_norm": 1128.622314453125, + "learning_rate": 3.3688906320693645e-05, + "loss": 57.4639, + "step": 83510 + }, + { + "epoch": 0.33743136835045673, + "grad_norm": 795.282470703125, + "learning_rate": 3.3686870255941246e-05, + "loss": 86.5236, + "step": 83520 + }, + { + "epoch": 0.33747176961582437, + "grad_norm": 291.44891357421875, + "learning_rate": 3.368483392435626e-05, + "loss": 110.246, + "step": 83530 + }, + { + "epoch": 0.337512170881192, + "grad_norm": 908.11279296875, + "learning_rate": 3.368279732597839e-05, + "loss": 93.1726, + "step": 83540 + }, + { + "epoch": 0.33755257214655965, + "grad_norm": 593.4014282226562, + "learning_rate": 3.368076046084734e-05, + "loss": 96.0801, + "step": 83550 + }, + { + "epoch": 0.3375929734119273, + "grad_norm": 2778.798583984375, + "learning_rate": 3.3678723329002826e-05, + "loss": 107.4352, + "step": 83560 + }, + { + "epoch": 0.3376333746772949, + "grad_norm": 1735.656982421875, + "learning_rate": 3.367668593048456e-05, + "loss": 159.8387, + "step": 83570 + }, + { + "epoch": 0.3376737759426625, + "grad_norm": 1310.9683837890625, + "learning_rate": 3.367464826533225e-05, + "loss": 141.2295, + "step": 83580 + }, + { + "epoch": 0.33771417720803015, + "grad_norm": 892.2634887695312, + "learning_rate": 3.3672610333585645e-05, + "loss": 92.4649, + "step": 83590 + }, + { + "epoch": 0.3377545784733978, + "grad_norm": 712.882568359375, + "learning_rate": 3.3670572135284456e-05, + "loss": 90.7725, + "step": 83600 + }, + { + "epoch": 0.33779497973876543, + "grad_norm": 492.2097473144531, + "learning_rate": 3.366853367046843e-05, + "loss": 75.8117, + "step": 83610 + }, + { + "epoch": 0.3378353810041331, + "grad_norm": 412.7674255371094, + "learning_rate": 3.3666494939177295e-05, + "loss": 106.8386, + "step": 83620 + }, + { + "epoch": 0.33787578226950066, + "grad_norm": 742.4546508789062, + "learning_rate": 3.366445594145081e-05, + "loss": 88.8715, + "step": 83630 + }, + { + "epoch": 0.3379161835348683, + "grad_norm": 1200.4505615234375, + "learning_rate": 3.366241667732872e-05, + "loss": 146.6149, + "step": 83640 + }, + { + "epoch": 0.33795658480023594, + "grad_norm": 538.9617919921875, + "learning_rate": 3.366037714685078e-05, + "loss": 59.5824, + "step": 83650 + }, + { + "epoch": 0.3379969860656036, + "grad_norm": 754.9732055664062, + "learning_rate": 3.365833735005676e-05, + "loss": 64.2393, + "step": 83660 + }, + { + "epoch": 0.3380373873309712, + "grad_norm": 652.6065673828125, + "learning_rate": 3.365629728698642e-05, + "loss": 76.3833, + "step": 83670 + }, + { + "epoch": 0.33807778859633886, + "grad_norm": 1467.4876708984375, + "learning_rate": 3.365425695767953e-05, + "loss": 119.286, + "step": 83680 + }, + { + "epoch": 0.33811818986170644, + "grad_norm": 1276.3514404296875, + "learning_rate": 3.365221636217588e-05, + "loss": 105.5317, + "step": 83690 + }, + { + "epoch": 0.3381585911270741, + "grad_norm": 2073.47265625, + "learning_rate": 3.3650175500515235e-05, + "loss": 73.0024, + "step": 83700 + }, + { + "epoch": 0.3381989923924417, + "grad_norm": 862.1356201171875, + "learning_rate": 3.364813437273739e-05, + "loss": 104.3614, + "step": 83710 + }, + { + "epoch": 0.33823939365780936, + "grad_norm": 576.5679321289062, + "learning_rate": 3.3646092978882144e-05, + "loss": 91.9552, + "step": 83720 + }, + { + "epoch": 0.338279794923177, + "grad_norm": 349.8805847167969, + "learning_rate": 3.364405131898929e-05, + "loss": 66.7025, + "step": 83730 + }, + { + "epoch": 0.33832019618854464, + "grad_norm": 1218.9844970703125, + "learning_rate": 3.364200939309863e-05, + "loss": 93.4518, + "step": 83740 + }, + { + "epoch": 0.3383605974539123, + "grad_norm": 351.40447998046875, + "learning_rate": 3.363996720124997e-05, + "loss": 159.4376, + "step": 83750 + }, + { + "epoch": 0.33840099871927987, + "grad_norm": 566.8226318359375, + "learning_rate": 3.363792474348313e-05, + "loss": 104.6407, + "step": 83760 + }, + { + "epoch": 0.3384413999846475, + "grad_norm": 390.2467956542969, + "learning_rate": 3.363588201983792e-05, + "loss": 165.1854, + "step": 83770 + }, + { + "epoch": 0.33848180125001515, + "grad_norm": 518.2762451171875, + "learning_rate": 3.363383903035419e-05, + "loss": 68.536, + "step": 83780 + }, + { + "epoch": 0.3385222025153828, + "grad_norm": 576.6489868164062, + "learning_rate": 3.363179577507173e-05, + "loss": 64.7262, + "step": 83790 + }, + { + "epoch": 0.3385626037807504, + "grad_norm": 605.21435546875, + "learning_rate": 3.36297522540304e-05, + "loss": 81.0703, + "step": 83800 + }, + { + "epoch": 0.33860300504611807, + "grad_norm": 1284.6297607421875, + "learning_rate": 3.362770846727003e-05, + "loss": 137.1083, + "step": 83810 + }, + { + "epoch": 0.33864340631148565, + "grad_norm": 308.6549072265625, + "learning_rate": 3.362566441483046e-05, + "loss": 104.8823, + "step": 83820 + }, + { + "epoch": 0.3386838075768533, + "grad_norm": 539.6609497070312, + "learning_rate": 3.362362009675156e-05, + "loss": 90.2006, + "step": 83830 + }, + { + "epoch": 0.33872420884222093, + "grad_norm": 793.8370361328125, + "learning_rate": 3.362157551307317e-05, + "loss": 96.2198, + "step": 83840 + }, + { + "epoch": 0.33876461010758857, + "grad_norm": 671.9019165039062, + "learning_rate": 3.361953066383515e-05, + "loss": 71.67, + "step": 83850 + }, + { + "epoch": 0.3388050113729562, + "grad_norm": 702.4300537109375, + "learning_rate": 3.3617485549077365e-05, + "loss": 95.1327, + "step": 83860 + }, + { + "epoch": 0.33884541263832385, + "grad_norm": 931.1136474609375, + "learning_rate": 3.3615440168839693e-05, + "loss": 90.8453, + "step": 83870 + }, + { + "epoch": 0.3388858139036915, + "grad_norm": 1213.544189453125, + "learning_rate": 3.3613394523162e-05, + "loss": 135.8231, + "step": 83880 + }, + { + "epoch": 0.3389262151690591, + "grad_norm": 2314.10498046875, + "learning_rate": 3.361134861208419e-05, + "loss": 80.7136, + "step": 83890 + }, + { + "epoch": 0.3389666164344267, + "grad_norm": 582.6361694335938, + "learning_rate": 3.360930243564611e-05, + "loss": 81.3902, + "step": 83900 + }, + { + "epoch": 0.33900701769979436, + "grad_norm": 590.9949340820312, + "learning_rate": 3.360725599388768e-05, + "loss": 134.0901, + "step": 83910 + }, + { + "epoch": 0.339047418965162, + "grad_norm": 539.693359375, + "learning_rate": 3.360520928684879e-05, + "loss": 85.4828, + "step": 83920 + }, + { + "epoch": 0.33908782023052964, + "grad_norm": 500.8472595214844, + "learning_rate": 3.3603162314569334e-05, + "loss": 51.1034, + "step": 83930 + }, + { + "epoch": 0.3391282214958973, + "grad_norm": 1377.3482666015625, + "learning_rate": 3.360111507708923e-05, + "loss": 124.3442, + "step": 83940 + }, + { + "epoch": 0.33916862276126486, + "grad_norm": 1070.694091796875, + "learning_rate": 3.3599067574448385e-05, + "loss": 47.361, + "step": 83950 + }, + { + "epoch": 0.3392090240266325, + "grad_norm": 402.19281005859375, + "learning_rate": 3.3597019806686724e-05, + "loss": 91.6691, + "step": 83960 + }, + { + "epoch": 0.33924942529200014, + "grad_norm": 1146.0657958984375, + "learning_rate": 3.359497177384415e-05, + "loss": 145.7598, + "step": 83970 + }, + { + "epoch": 0.3392898265573678, + "grad_norm": 809.862548828125, + "learning_rate": 3.3592923475960604e-05, + "loss": 89.0152, + "step": 83980 + }, + { + "epoch": 0.3393302278227354, + "grad_norm": 751.2743530273438, + "learning_rate": 3.3590874913076024e-05, + "loss": 77.5074, + "step": 83990 + }, + { + "epoch": 0.33937062908810306, + "grad_norm": 537.554443359375, + "learning_rate": 3.3588826085230336e-05, + "loss": 85.5056, + "step": 84000 + }, + { + "epoch": 0.33941103035347064, + "grad_norm": 495.6501770019531, + "learning_rate": 3.3586776992463486e-05, + "loss": 67.3079, + "step": 84010 + }, + { + "epoch": 0.3394514316188383, + "grad_norm": 569.8097534179688, + "learning_rate": 3.358472763481542e-05, + "loss": 105.2055, + "step": 84020 + }, + { + "epoch": 0.3394918328842059, + "grad_norm": 1326.6912841796875, + "learning_rate": 3.35826780123261e-05, + "loss": 112.04, + "step": 84030 + }, + { + "epoch": 0.33953223414957356, + "grad_norm": 684.7462768554688, + "learning_rate": 3.358062812503548e-05, + "loss": 73.9432, + "step": 84040 + }, + { + "epoch": 0.3395726354149412, + "grad_norm": 957.3126220703125, + "learning_rate": 3.357857797298353e-05, + "loss": 137.715, + "step": 84050 + }, + { + "epoch": 0.33961303668030884, + "grad_norm": 976.27783203125, + "learning_rate": 3.35765275562102e-05, + "loss": 76.5222, + "step": 84060 + }, + { + "epoch": 0.3396534379456765, + "grad_norm": 631.074951171875, + "learning_rate": 3.357447687475548e-05, + "loss": 74.2974, + "step": 84070 + }, + { + "epoch": 0.33969383921104407, + "grad_norm": 1341.777099609375, + "learning_rate": 3.357242592865934e-05, + "loss": 78.6549, + "step": 84080 + }, + { + "epoch": 0.3397342404764117, + "grad_norm": 615.5068969726562, + "learning_rate": 3.357037471796178e-05, + "loss": 73.5693, + "step": 84090 + }, + { + "epoch": 0.33977464174177935, + "grad_norm": 538.767822265625, + "learning_rate": 3.356832324270277e-05, + "loss": 84.9979, + "step": 84100 + }, + { + "epoch": 0.339815043007147, + "grad_norm": 687.803955078125, + "learning_rate": 3.356627150292231e-05, + "loss": 106.0108, + "step": 84110 + }, + { + "epoch": 0.33985544427251463, + "grad_norm": 662.6283569335938, + "learning_rate": 3.356421949866041e-05, + "loss": 92.5312, + "step": 84120 + }, + { + "epoch": 0.33989584553788227, + "grad_norm": 1064.208984375, + "learning_rate": 3.356216722995706e-05, + "loss": 84.421, + "step": 84130 + }, + { + "epoch": 0.33993624680324985, + "grad_norm": 895.4779052734375, + "learning_rate": 3.356011469685229e-05, + "loss": 81.5686, + "step": 84140 + }, + { + "epoch": 0.3399766480686175, + "grad_norm": 500.9759521484375, + "learning_rate": 3.355806189938609e-05, + "loss": 82.4018, + "step": 84150 + }, + { + "epoch": 0.34001704933398513, + "grad_norm": 684.8465576171875, + "learning_rate": 3.35560088375985e-05, + "loss": 73.1852, + "step": 84160 + }, + { + "epoch": 0.3400574505993528, + "grad_norm": 967.1045532226562, + "learning_rate": 3.3553955511529534e-05, + "loss": 82.1116, + "step": 84170 + }, + { + "epoch": 0.3400978518647204, + "grad_norm": 259.2762145996094, + "learning_rate": 3.355190192121923e-05, + "loss": 75.4657, + "step": 84180 + }, + { + "epoch": 0.34013825313008805, + "grad_norm": 1107.017822265625, + "learning_rate": 3.3549848066707626e-05, + "loss": 90.488, + "step": 84190 + }, + { + "epoch": 0.3401786543954557, + "grad_norm": 512.5900268554688, + "learning_rate": 3.354779394803475e-05, + "loss": 91.7635, + "step": 84200 + }, + { + "epoch": 0.3402190556608233, + "grad_norm": 777.394775390625, + "learning_rate": 3.354573956524066e-05, + "loss": 80.794, + "step": 84210 + }, + { + "epoch": 0.3402594569261909, + "grad_norm": 1492.374755859375, + "learning_rate": 3.3543684918365405e-05, + "loss": 88.91, + "step": 84220 + }, + { + "epoch": 0.34029985819155856, + "grad_norm": 616.6952514648438, + "learning_rate": 3.354163000744903e-05, + "loss": 81.9586, + "step": 84230 + }, + { + "epoch": 0.3403402594569262, + "grad_norm": 807.5482177734375, + "learning_rate": 3.3539574832531617e-05, + "loss": 65.5615, + "step": 84240 + }, + { + "epoch": 0.34038066072229384, + "grad_norm": 583.454833984375, + "learning_rate": 3.3537519393653216e-05, + "loss": 77.7393, + "step": 84250 + }, + { + "epoch": 0.3404210619876615, + "grad_norm": 747.4718017578125, + "learning_rate": 3.353546369085391e-05, + "loss": 117.457, + "step": 84260 + }, + { + "epoch": 0.34046146325302906, + "grad_norm": 855.71826171875, + "learning_rate": 3.3533407724173765e-05, + "loss": 69.2124, + "step": 84270 + }, + { + "epoch": 0.3405018645183967, + "grad_norm": 1862.537109375, + "learning_rate": 3.353135149365288e-05, + "loss": 87.6468, + "step": 84280 + }, + { + "epoch": 0.34054226578376434, + "grad_norm": 831.04052734375, + "learning_rate": 3.352929499933132e-05, + "loss": 90.309, + "step": 84290 + }, + { + "epoch": 0.340582667049132, + "grad_norm": 5085.953125, + "learning_rate": 3.352723824124919e-05, + "loss": 146.9665, + "step": 84300 + }, + { + "epoch": 0.3406230683144996, + "grad_norm": 500.9397277832031, + "learning_rate": 3.352518121944659e-05, + "loss": 157.914, + "step": 84310 + }, + { + "epoch": 0.34066346957986726, + "grad_norm": 1098.39501953125, + "learning_rate": 3.3523123933963614e-05, + "loss": 109.3622, + "step": 84320 + }, + { + "epoch": 0.34070387084523485, + "grad_norm": 627.7136840820312, + "learning_rate": 3.352106638484038e-05, + "loss": 127.0168, + "step": 84330 + }, + { + "epoch": 0.3407442721106025, + "grad_norm": 765.4631958007812, + "learning_rate": 3.351900857211699e-05, + "loss": 70.015, + "step": 84340 + }, + { + "epoch": 0.3407846733759701, + "grad_norm": 1328.2342529296875, + "learning_rate": 3.351695049583357e-05, + "loss": 78.2862, + "step": 84350 + }, + { + "epoch": 0.34082507464133777, + "grad_norm": 1253.0789794921875, + "learning_rate": 3.351489215603024e-05, + "loss": 105.3766, + "step": 84360 + }, + { + "epoch": 0.3408654759067054, + "grad_norm": 762.9935302734375, + "learning_rate": 3.351283355274714e-05, + "loss": 103.2634, + "step": 84370 + }, + { + "epoch": 0.34090587717207305, + "grad_norm": 485.6178894042969, + "learning_rate": 3.351077468602438e-05, + "loss": 90.5623, + "step": 84380 + }, + { + "epoch": 0.3409462784374407, + "grad_norm": 692.3379516601562, + "learning_rate": 3.350871555590212e-05, + "loss": 93.4147, + "step": 84390 + }, + { + "epoch": 0.34098667970280827, + "grad_norm": 868.484375, + "learning_rate": 3.350665616242049e-05, + "loss": 87.9156, + "step": 84400 + }, + { + "epoch": 0.3410270809681759, + "grad_norm": 712.4720458984375, + "learning_rate": 3.350459650561964e-05, + "loss": 103.0383, + "step": 84410 + }, + { + "epoch": 0.34106748223354355, + "grad_norm": 441.8598327636719, + "learning_rate": 3.3502536585539746e-05, + "loss": 52.438, + "step": 84420 + }, + { + "epoch": 0.3411078834989112, + "grad_norm": 909.822265625, + "learning_rate": 3.350047640222094e-05, + "loss": 73.4636, + "step": 84430 + }, + { + "epoch": 0.34114828476427883, + "grad_norm": 437.29510498046875, + "learning_rate": 3.349841595570339e-05, + "loss": 104.8032, + "step": 84440 + }, + { + "epoch": 0.34118868602964647, + "grad_norm": 660.9466552734375, + "learning_rate": 3.3496355246027276e-05, + "loss": 74.0558, + "step": 84450 + }, + { + "epoch": 0.34122908729501406, + "grad_norm": 740.646240234375, + "learning_rate": 3.349429427323277e-05, + "loss": 83.1433, + "step": 84460 + }, + { + "epoch": 0.3412694885603817, + "grad_norm": 3191.33837890625, + "learning_rate": 3.349223303736005e-05, + "loss": 120.7385, + "step": 84470 + }, + { + "epoch": 0.34130988982574934, + "grad_norm": 402.4794006347656, + "learning_rate": 3.34901715384493e-05, + "loss": 95.8126, + "step": 84480 + }, + { + "epoch": 0.341350291091117, + "grad_norm": 696.9688110351562, + "learning_rate": 3.3488109776540704e-05, + "loss": 86.6368, + "step": 84490 + }, + { + "epoch": 0.3413906923564846, + "grad_norm": 1213.3975830078125, + "learning_rate": 3.3486047751674465e-05, + "loss": 80.124, + "step": 84500 + }, + { + "epoch": 0.34143109362185226, + "grad_norm": 616.4169921875, + "learning_rate": 3.348398546389079e-05, + "loss": 83.0174, + "step": 84510 + }, + { + "epoch": 0.3414714948872199, + "grad_norm": 617.55224609375, + "learning_rate": 3.3481922913229875e-05, + "loss": 85.7227, + "step": 84520 + }, + { + "epoch": 0.3415118961525875, + "grad_norm": 1011.8010864257812, + "learning_rate": 3.347986009973193e-05, + "loss": 104.7956, + "step": 84530 + }, + { + "epoch": 0.3415522974179551, + "grad_norm": 408.4366149902344, + "learning_rate": 3.3477797023437176e-05, + "loss": 105.7305, + "step": 84540 + }, + { + "epoch": 0.34159269868332276, + "grad_norm": 365.054931640625, + "learning_rate": 3.3475733684385815e-05, + "loss": 84.2622, + "step": 84550 + }, + { + "epoch": 0.3416330999486904, + "grad_norm": 583.8577880859375, + "learning_rate": 3.3473670082618105e-05, + "loss": 75.6874, + "step": 84560 + }, + { + "epoch": 0.34167350121405804, + "grad_norm": 520.3046875, + "learning_rate": 3.347160621817425e-05, + "loss": 73.0806, + "step": 84570 + }, + { + "epoch": 0.3417139024794257, + "grad_norm": 568.4674682617188, + "learning_rate": 3.34695420910945e-05, + "loss": 91.75, + "step": 84580 + }, + { + "epoch": 0.34175430374479326, + "grad_norm": 303.7978210449219, + "learning_rate": 3.3467477701419095e-05, + "loss": 54.9498, + "step": 84590 + }, + { + "epoch": 0.3417947050101609, + "grad_norm": 957.1175537109375, + "learning_rate": 3.3465413049188276e-05, + "loss": 89.6905, + "step": 84600 + }, + { + "epoch": 0.34183510627552854, + "grad_norm": 633.1253051757812, + "learning_rate": 3.34633481344423e-05, + "loss": 112.9896, + "step": 84610 + }, + { + "epoch": 0.3418755075408962, + "grad_norm": 1093.57568359375, + "learning_rate": 3.346128295722142e-05, + "loss": 69.1264, + "step": 84620 + }, + { + "epoch": 0.3419159088062638, + "grad_norm": 419.0455322265625, + "learning_rate": 3.3459217517565896e-05, + "loss": 124.8967, + "step": 84630 + }, + { + "epoch": 0.34195631007163146, + "grad_norm": 1158.8779296875, + "learning_rate": 3.3457151815516e-05, + "loss": 104.9582, + "step": 84640 + }, + { + "epoch": 0.34199671133699905, + "grad_norm": 675.0980224609375, + "learning_rate": 3.3455085851112e-05, + "loss": 89.6018, + "step": 84650 + }, + { + "epoch": 0.3420371126023667, + "grad_norm": 490.6619873046875, + "learning_rate": 3.345301962439417e-05, + "loss": 112.2254, + "step": 84660 + }, + { + "epoch": 0.34207751386773433, + "grad_norm": 784.7500610351562, + "learning_rate": 3.34509531354028e-05, + "loss": 96.2077, + "step": 84670 + }, + { + "epoch": 0.34211791513310197, + "grad_norm": 845.2930908203125, + "learning_rate": 3.344888638417817e-05, + "loss": 90.1497, + "step": 84680 + }, + { + "epoch": 0.3421583163984696, + "grad_norm": 398.4805603027344, + "learning_rate": 3.3446819370760577e-05, + "loss": 75.8576, + "step": 84690 + }, + { + "epoch": 0.34219871766383725, + "grad_norm": 541.4842529296875, + "learning_rate": 3.3444752095190326e-05, + "loss": 87.6842, + "step": 84700 + }, + { + "epoch": 0.3422391189292049, + "grad_norm": 563.3409423828125, + "learning_rate": 3.34426845575077e-05, + "loss": 84.6292, + "step": 84710 + }, + { + "epoch": 0.3422795201945725, + "grad_norm": 583.0428466796875, + "learning_rate": 3.344061675775303e-05, + "loss": 86.9812, + "step": 84720 + }, + { + "epoch": 0.3423199214599401, + "grad_norm": 872.3020629882812, + "learning_rate": 3.34385486959666e-05, + "loss": 81.1865, + "step": 84730 + }, + { + "epoch": 0.34236032272530775, + "grad_norm": 971.595458984375, + "learning_rate": 3.343648037218876e-05, + "loss": 107.7514, + "step": 84740 + }, + { + "epoch": 0.3424007239906754, + "grad_norm": 1021.2527465820312, + "learning_rate": 3.343441178645981e-05, + "loss": 119.604, + "step": 84750 + }, + { + "epoch": 0.34244112525604303, + "grad_norm": 498.3939514160156, + "learning_rate": 3.3432342938820084e-05, + "loss": 78.3572, + "step": 84760 + }, + { + "epoch": 0.3424815265214107, + "grad_norm": 1169.1053466796875, + "learning_rate": 3.3430273829309925e-05, + "loss": 85.5517, + "step": 84770 + }, + { + "epoch": 0.34252192778677826, + "grad_norm": 705.2420654296875, + "learning_rate": 3.342820445796966e-05, + "loss": 64.7175, + "step": 84780 + }, + { + "epoch": 0.3425623290521459, + "grad_norm": 821.1361694335938, + "learning_rate": 3.342613482483963e-05, + "loss": 80.4281, + "step": 84790 + }, + { + "epoch": 0.34260273031751354, + "grad_norm": 606.3366088867188, + "learning_rate": 3.342406492996019e-05, + "loss": 92.4013, + "step": 84800 + }, + { + "epoch": 0.3426431315828812, + "grad_norm": 1029.2320556640625, + "learning_rate": 3.34219947733717e-05, + "loss": 78.2731, + "step": 84810 + }, + { + "epoch": 0.3426835328482488, + "grad_norm": 1098.935791015625, + "learning_rate": 3.3419924355114505e-05, + "loss": 88.1725, + "step": 84820 + }, + { + "epoch": 0.34272393411361646, + "grad_norm": 1072.2862548828125, + "learning_rate": 3.341785367522898e-05, + "loss": 97.6202, + "step": 84830 + }, + { + "epoch": 0.3427643353789841, + "grad_norm": 1765.024658203125, + "learning_rate": 3.341578273375548e-05, + "loss": 83.5294, + "step": 84840 + }, + { + "epoch": 0.3428047366443517, + "grad_norm": 393.2286682128906, + "learning_rate": 3.3413711530734404e-05, + "loss": 74.9004, + "step": 84850 + }, + { + "epoch": 0.3428451379097193, + "grad_norm": 603.1321411132812, + "learning_rate": 3.341164006620611e-05, + "loss": 116.7461, + "step": 84860 + }, + { + "epoch": 0.34288553917508696, + "grad_norm": 1024.42236328125, + "learning_rate": 3.340956834021099e-05, + "loss": 88.2224, + "step": 84870 + }, + { + "epoch": 0.3429259404404546, + "grad_norm": 844.7884521484375, + "learning_rate": 3.340749635278942e-05, + "loss": 86.441, + "step": 84880 + }, + { + "epoch": 0.34296634170582224, + "grad_norm": 776.6093139648438, + "learning_rate": 3.3405424103981815e-05, + "loss": 91.3884, + "step": 84890 + }, + { + "epoch": 0.3430067429711899, + "grad_norm": 524.0580444335938, + "learning_rate": 3.340335159382857e-05, + "loss": 92.8227, + "step": 84900 + }, + { + "epoch": 0.34304714423655747, + "grad_norm": 744.7914428710938, + "learning_rate": 3.340127882237008e-05, + "loss": 78.8487, + "step": 84910 + }, + { + "epoch": 0.3430875455019251, + "grad_norm": 927.65478515625, + "learning_rate": 3.339920578964676e-05, + "loss": 102.2473, + "step": 84920 + }, + { + "epoch": 0.34312794676729275, + "grad_norm": 601.9940185546875, + "learning_rate": 3.339713249569902e-05, + "loss": 91.6009, + "step": 84930 + }, + { + "epoch": 0.3431683480326604, + "grad_norm": 635.5281372070312, + "learning_rate": 3.339505894056729e-05, + "loss": 74.6865, + "step": 84940 + }, + { + "epoch": 0.343208749298028, + "grad_norm": 3052.357666015625, + "learning_rate": 3.339298512429199e-05, + "loss": 158.5514, + "step": 84950 + }, + { + "epoch": 0.34324915056339567, + "grad_norm": 715.1381225585938, + "learning_rate": 3.339091104691355e-05, + "loss": 102.8049, + "step": 84960 + }, + { + "epoch": 0.34328955182876325, + "grad_norm": 383.21624755859375, + "learning_rate": 3.3388836708472404e-05, + "loss": 68.6991, + "step": 84970 + }, + { + "epoch": 0.3433299530941309, + "grad_norm": 720.8235473632812, + "learning_rate": 3.3386762109009e-05, + "loss": 65.4419, + "step": 84980 + }, + { + "epoch": 0.34337035435949853, + "grad_norm": 582.97900390625, + "learning_rate": 3.338468724856377e-05, + "loss": 60.3088, + "step": 84990 + }, + { + "epoch": 0.34341075562486617, + "grad_norm": 556.2716064453125, + "learning_rate": 3.3382612127177166e-05, + "loss": 115.2563, + "step": 85000 + }, + { + "epoch": 0.3434511568902338, + "grad_norm": 819.02587890625, + "learning_rate": 3.338053674488966e-05, + "loss": 86.0891, + "step": 85010 + }, + { + "epoch": 0.34349155815560145, + "grad_norm": 627.9694213867188, + "learning_rate": 3.3378461101741693e-05, + "loss": 81.1804, + "step": 85020 + }, + { + "epoch": 0.3435319594209691, + "grad_norm": 759.7229614257812, + "learning_rate": 3.3376385197773737e-05, + "loss": 91.9274, + "step": 85030 + }, + { + "epoch": 0.3435723606863367, + "grad_norm": 809.6134033203125, + "learning_rate": 3.337430903302627e-05, + "loss": 108.438, + "step": 85040 + }, + { + "epoch": 0.3436127619517043, + "grad_norm": 462.60504150390625, + "learning_rate": 3.337223260753977e-05, + "loss": 102.0488, + "step": 85050 + }, + { + "epoch": 0.34365316321707196, + "grad_norm": 545.3731689453125, + "learning_rate": 3.33701559213547e-05, + "loss": 149.6475, + "step": 85060 + }, + { + "epoch": 0.3436935644824396, + "grad_norm": 1361.9505615234375, + "learning_rate": 3.336807897451156e-05, + "loss": 89.6667, + "step": 85070 + }, + { + "epoch": 0.34373396574780724, + "grad_norm": 1223.3074951171875, + "learning_rate": 3.336600176705083e-05, + "loss": 103.0274, + "step": 85080 + }, + { + "epoch": 0.3437743670131749, + "grad_norm": 640.1355590820312, + "learning_rate": 3.336392429901303e-05, + "loss": 115.3512, + "step": 85090 + }, + { + "epoch": 0.34381476827854246, + "grad_norm": 1140.1168212890625, + "learning_rate": 3.336184657043864e-05, + "loss": 74.2441, + "step": 85100 + }, + { + "epoch": 0.3438551695439101, + "grad_norm": 946.5442504882812, + "learning_rate": 3.335976858136816e-05, + "loss": 94.2939, + "step": 85110 + }, + { + "epoch": 0.34389557080927774, + "grad_norm": 555.2844848632812, + "learning_rate": 3.335769033184213e-05, + "loss": 71.1758, + "step": 85120 + }, + { + "epoch": 0.3439359720746454, + "grad_norm": 976.6113891601562, + "learning_rate": 3.3355611821901046e-05, + "loss": 97.331, + "step": 85130 + }, + { + "epoch": 0.343976373340013, + "grad_norm": 590.6163330078125, + "learning_rate": 3.335353305158543e-05, + "loss": 101.0907, + "step": 85140 + }, + { + "epoch": 0.34401677460538066, + "grad_norm": 598.8223266601562, + "learning_rate": 3.335145402093582e-05, + "loss": 56.8433, + "step": 85150 + }, + { + "epoch": 0.34405717587074824, + "grad_norm": 705.1436767578125, + "learning_rate": 3.3349374729992725e-05, + "loss": 70.9367, + "step": 85160 + }, + { + "epoch": 0.3440975771361159, + "grad_norm": 707.2876586914062, + "learning_rate": 3.3347295178796707e-05, + "loss": 97.816, + "step": 85170 + }, + { + "epoch": 0.3441379784014835, + "grad_norm": 710.8496704101562, + "learning_rate": 3.334521536738829e-05, + "loss": 102.5164, + "step": 85180 + }, + { + "epoch": 0.34417837966685116, + "grad_norm": 3177.18310546875, + "learning_rate": 3.334313529580804e-05, + "loss": 128.5299, + "step": 85190 + }, + { + "epoch": 0.3442187809322188, + "grad_norm": 562.9314575195312, + "learning_rate": 3.334105496409649e-05, + "loss": 79.6824, + "step": 85200 + }, + { + "epoch": 0.34425918219758644, + "grad_norm": 606.8613891601562, + "learning_rate": 3.333897437229421e-05, + "loss": 114.9136, + "step": 85210 + }, + { + "epoch": 0.3442995834629541, + "grad_norm": 1853.1583251953125, + "learning_rate": 3.333689352044175e-05, + "loss": 99.146, + "step": 85220 + }, + { + "epoch": 0.34433998472832167, + "grad_norm": 893.4524536132812, + "learning_rate": 3.3334812408579696e-05, + "loss": 85.95, + "step": 85230 + }, + { + "epoch": 0.3443803859936893, + "grad_norm": 1208.007080078125, + "learning_rate": 3.3332731036748604e-05, + "loss": 87.6602, + "step": 85240 + }, + { + "epoch": 0.34442078725905695, + "grad_norm": 1075.04736328125, + "learning_rate": 3.333064940498905e-05, + "loss": 84.4554, + "step": 85250 + }, + { + "epoch": 0.3444611885244246, + "grad_norm": 831.989013671875, + "learning_rate": 3.332856751334163e-05, + "loss": 136.3989, + "step": 85260 + }, + { + "epoch": 0.34450158978979223, + "grad_norm": 771.6121215820312, + "learning_rate": 3.3326485361846924e-05, + "loss": 74.2253, + "step": 85270 + }, + { + "epoch": 0.34454199105515987, + "grad_norm": 830.8035888671875, + "learning_rate": 3.3324402950545524e-05, + "loss": 110.6995, + "step": 85280 + }, + { + "epoch": 0.34458239232052745, + "grad_norm": 719.1708984375, + "learning_rate": 3.3322320279478025e-05, + "loss": 69.0071, + "step": 85290 + }, + { + "epoch": 0.3446227935858951, + "grad_norm": 969.331298828125, + "learning_rate": 3.332023734868504e-05, + "loss": 86.0726, + "step": 85300 + }, + { + "epoch": 0.34466319485126273, + "grad_norm": 1741.87841796875, + "learning_rate": 3.3318154158207164e-05, + "loss": 159.3101, + "step": 85310 + }, + { + "epoch": 0.3447035961166304, + "grad_norm": 1854.074951171875, + "learning_rate": 3.3316070708085014e-05, + "loss": 88.9789, + "step": 85320 + }, + { + "epoch": 0.344743997381998, + "grad_norm": 1672.3138427734375, + "learning_rate": 3.3313986998359213e-05, + "loss": 102.0677, + "step": 85330 + }, + { + "epoch": 0.34478439864736565, + "grad_norm": 623.7470092773438, + "learning_rate": 3.3311903029070384e-05, + "loss": 70.7283, + "step": 85340 + }, + { + "epoch": 0.3448247999127333, + "grad_norm": 561.6229858398438, + "learning_rate": 3.330981880025915e-05, + "loss": 49.4234, + "step": 85350 + }, + { + "epoch": 0.3448652011781009, + "grad_norm": 1611.0814208984375, + "learning_rate": 3.3307734311966144e-05, + "loss": 108.521, + "step": 85360 + }, + { + "epoch": 0.3449056024434685, + "grad_norm": 458.3751220703125, + "learning_rate": 3.330564956423201e-05, + "loss": 83.4613, + "step": 85370 + }, + { + "epoch": 0.34494600370883616, + "grad_norm": 1349.204833984375, + "learning_rate": 3.330356455709739e-05, + "loss": 84.4523, + "step": 85380 + }, + { + "epoch": 0.3449864049742038, + "grad_norm": 477.1666564941406, + "learning_rate": 3.3301479290602925e-05, + "loss": 87.3975, + "step": 85390 + }, + { + "epoch": 0.34502680623957144, + "grad_norm": 692.7836303710938, + "learning_rate": 3.329939376478927e-05, + "loss": 82.3372, + "step": 85400 + }, + { + "epoch": 0.3450672075049391, + "grad_norm": 427.59588623046875, + "learning_rate": 3.329730797969709e-05, + "loss": 94.2344, + "step": 85410 + }, + { + "epoch": 0.34510760877030666, + "grad_norm": 504.99578857421875, + "learning_rate": 3.329522193536705e-05, + "loss": 86.4315, + "step": 85420 + }, + { + "epoch": 0.3451480100356743, + "grad_norm": 749.046875, + "learning_rate": 3.3293135631839806e-05, + "loss": 66.7848, + "step": 85430 + }, + { + "epoch": 0.34518841130104194, + "grad_norm": 866.1326904296875, + "learning_rate": 3.329104906915604e-05, + "loss": 77.0775, + "step": 85440 + }, + { + "epoch": 0.3452288125664096, + "grad_norm": 1671.7645263671875, + "learning_rate": 3.328896224735644e-05, + "loss": 140.2245, + "step": 85450 + }, + { + "epoch": 0.3452692138317772, + "grad_norm": 671.6039428710938, + "learning_rate": 3.328687516648167e-05, + "loss": 114.7906, + "step": 85460 + }, + { + "epoch": 0.34530961509714486, + "grad_norm": 904.0907592773438, + "learning_rate": 3.328478782657243e-05, + "loss": 80.1498, + "step": 85470 + }, + { + "epoch": 0.34535001636251245, + "grad_norm": 520.9303588867188, + "learning_rate": 3.328270022766941e-05, + "loss": 107.3932, + "step": 85480 + }, + { + "epoch": 0.3453904176278801, + "grad_norm": 1022.9935913085938, + "learning_rate": 3.3280612369813305e-05, + "loss": 82.4861, + "step": 85490 + }, + { + "epoch": 0.3454308188932477, + "grad_norm": 1036.1605224609375, + "learning_rate": 3.3278524253044834e-05, + "loss": 90.6727, + "step": 85500 + }, + { + "epoch": 0.34547122015861537, + "grad_norm": 746.9507446289062, + "learning_rate": 3.327643587740469e-05, + "loss": 87.7794, + "step": 85510 + }, + { + "epoch": 0.345511621423983, + "grad_norm": 653.7132568359375, + "learning_rate": 3.3274347242933606e-05, + "loss": 89.5333, + "step": 85520 + }, + { + "epoch": 0.34555202268935065, + "grad_norm": 768.3308715820312, + "learning_rate": 3.327225834967227e-05, + "loss": 119.7375, + "step": 85530 + }, + { + "epoch": 0.3455924239547183, + "grad_norm": 895.6587524414062, + "learning_rate": 3.327016919766144e-05, + "loss": 96.1434, + "step": 85540 + }, + { + "epoch": 0.34563282522008587, + "grad_norm": 1001.6537475585938, + "learning_rate": 3.3268079786941825e-05, + "loss": 113.9048, + "step": 85550 + }, + { + "epoch": 0.3456732264854535, + "grad_norm": 5583.56103515625, + "learning_rate": 3.326599011755416e-05, + "loss": 117.5154, + "step": 85560 + }, + { + "epoch": 0.34571362775082115, + "grad_norm": 1083.78857421875, + "learning_rate": 3.326390018953919e-05, + "loss": 80.8917, + "step": 85570 + }, + { + "epoch": 0.3457540290161888, + "grad_norm": 598.2891235351562, + "learning_rate": 3.326181000293766e-05, + "loss": 81.718, + "step": 85580 + }, + { + "epoch": 0.34579443028155643, + "grad_norm": 952.2680053710938, + "learning_rate": 3.325971955779031e-05, + "loss": 94.1512, + "step": 85590 + }, + { + "epoch": 0.34583483154692407, + "grad_norm": 603.2572021484375, + "learning_rate": 3.325762885413791e-05, + "loss": 86.26, + "step": 85600 + }, + { + "epoch": 0.34587523281229166, + "grad_norm": 636.6422119140625, + "learning_rate": 3.32555378920212e-05, + "loss": 74.1937, + "step": 85610 + }, + { + "epoch": 0.3459156340776593, + "grad_norm": 975.27880859375, + "learning_rate": 3.325344667148095e-05, + "loss": 87.4401, + "step": 85620 + }, + { + "epoch": 0.34595603534302694, + "grad_norm": 1367.5758056640625, + "learning_rate": 3.325135519255795e-05, + "loss": 146.9385, + "step": 85630 + }, + { + "epoch": 0.3459964366083946, + "grad_norm": 495.0063171386719, + "learning_rate": 3.3249263455292954e-05, + "loss": 59.8794, + "step": 85640 + }, + { + "epoch": 0.3460368378737622, + "grad_norm": 598.726318359375, + "learning_rate": 3.324717145972674e-05, + "loss": 96.7968, + "step": 85650 + }, + { + "epoch": 0.34607723913912986, + "grad_norm": 774.0640258789062, + "learning_rate": 3.32450792059001e-05, + "loss": 70.5245, + "step": 85660 + }, + { + "epoch": 0.3461176404044975, + "grad_norm": 513.7930297851562, + "learning_rate": 3.3242986693853824e-05, + "loss": 70.8973, + "step": 85670 + }, + { + "epoch": 0.3461580416698651, + "grad_norm": 1833.3221435546875, + "learning_rate": 3.32408939236287e-05, + "loss": 99.8368, + "step": 85680 + }, + { + "epoch": 0.3461984429352327, + "grad_norm": 813.6258544921875, + "learning_rate": 3.323880089526554e-05, + "loss": 80.3407, + "step": 85690 + }, + { + "epoch": 0.34623884420060036, + "grad_norm": 775.0750732421875, + "learning_rate": 3.323670760880513e-05, + "loss": 146.5104, + "step": 85700 + }, + { + "epoch": 0.346279245465968, + "grad_norm": 620.9314575195312, + "learning_rate": 3.3234614064288297e-05, + "loss": 69.6769, + "step": 85710 + }, + { + "epoch": 0.34631964673133564, + "grad_norm": 1123.7935791015625, + "learning_rate": 3.323252026175585e-05, + "loss": 82.0667, + "step": 85720 + }, + { + "epoch": 0.3463600479967033, + "grad_norm": 523.3013916015625, + "learning_rate": 3.323042620124861e-05, + "loss": 92.5944, + "step": 85730 + }, + { + "epoch": 0.34640044926207086, + "grad_norm": 1041.9119873046875, + "learning_rate": 3.32283318828074e-05, + "loss": 120.0448, + "step": 85740 + }, + { + "epoch": 0.3464408505274385, + "grad_norm": 1483.08642578125, + "learning_rate": 3.322623730647304e-05, + "loss": 93.7491, + "step": 85750 + }, + { + "epoch": 0.34648125179280614, + "grad_norm": 703.116455078125, + "learning_rate": 3.322414247228638e-05, + "loss": 74.1667, + "step": 85760 + }, + { + "epoch": 0.3465216530581738, + "grad_norm": 682.5, + "learning_rate": 3.322204738028826e-05, + "loss": 71.3626, + "step": 85770 + }, + { + "epoch": 0.3465620543235414, + "grad_norm": 406.64697265625, + "learning_rate": 3.321995203051951e-05, + "loss": 71.1493, + "step": 85780 + }, + { + "epoch": 0.34660245558890906, + "grad_norm": 818.5914916992188, + "learning_rate": 3.321785642302099e-05, + "loss": 128.1707, + "step": 85790 + }, + { + "epoch": 0.34664285685427665, + "grad_norm": 969.037109375, + "learning_rate": 3.3215760557833556e-05, + "loss": 75.3972, + "step": 85800 + }, + { + "epoch": 0.3466832581196443, + "grad_norm": 444.7679748535156, + "learning_rate": 3.3213664434998065e-05, + "loss": 122.1979, + "step": 85810 + }, + { + "epoch": 0.34672365938501193, + "grad_norm": 744.8341064453125, + "learning_rate": 3.3211568054555384e-05, + "loss": 85.5789, + "step": 85820 + }, + { + "epoch": 0.34676406065037957, + "grad_norm": 517.8134155273438, + "learning_rate": 3.320947141654639e-05, + "loss": 72.9007, + "step": 85830 + }, + { + "epoch": 0.3468044619157472, + "grad_norm": 1763.0599365234375, + "learning_rate": 3.320737452101194e-05, + "loss": 93.9577, + "step": 85840 + }, + { + "epoch": 0.34684486318111485, + "grad_norm": 1835.9246826171875, + "learning_rate": 3.3205277367992924e-05, + "loss": 104.0352, + "step": 85850 + }, + { + "epoch": 0.3468852644464825, + "grad_norm": 624.373291015625, + "learning_rate": 3.3203179957530235e-05, + "loss": 93.1028, + "step": 85860 + }, + { + "epoch": 0.3469256657118501, + "grad_norm": 703.8759765625, + "learning_rate": 3.320108228966475e-05, + "loss": 92.0235, + "step": 85870 + }, + { + "epoch": 0.3469660669772177, + "grad_norm": 1600.4736328125, + "learning_rate": 3.319898436443737e-05, + "loss": 70.1648, + "step": 85880 + }, + { + "epoch": 0.34700646824258535, + "grad_norm": 435.03125, + "learning_rate": 3.319688618188899e-05, + "loss": 89.7342, + "step": 85890 + }, + { + "epoch": 0.347046869507953, + "grad_norm": 1552.344482421875, + "learning_rate": 3.319478774206053e-05, + "loss": 131.7178, + "step": 85900 + }, + { + "epoch": 0.34708727077332063, + "grad_norm": 483.6307373046875, + "learning_rate": 3.319268904499288e-05, + "loss": 94.0856, + "step": 85910 + }, + { + "epoch": 0.3471276720386883, + "grad_norm": 1821.349853515625, + "learning_rate": 3.3190590090726966e-05, + "loss": 109.9803, + "step": 85920 + }, + { + "epoch": 0.34716807330405586, + "grad_norm": 413.9924621582031, + "learning_rate": 3.318849087930371e-05, + "loss": 53.6993, + "step": 85930 + }, + { + "epoch": 0.3472084745694235, + "grad_norm": 2630.708740234375, + "learning_rate": 3.3186391410764033e-05, + "loss": 88.0865, + "step": 85940 + }, + { + "epoch": 0.34724887583479114, + "grad_norm": 505.8538513183594, + "learning_rate": 3.3184291685148866e-05, + "loss": 86.5297, + "step": 85950 + }, + { + "epoch": 0.3472892771001588, + "grad_norm": 388.1467590332031, + "learning_rate": 3.3182191702499146e-05, + "loss": 119.6043, + "step": 85960 + }, + { + "epoch": 0.3473296783655264, + "grad_norm": 721.4622802734375, + "learning_rate": 3.318009146285582e-05, + "loss": 70.7333, + "step": 85970 + }, + { + "epoch": 0.34737007963089406, + "grad_norm": 984.4783935546875, + "learning_rate": 3.317799096625981e-05, + "loss": 74.2794, + "step": 85980 + }, + { + "epoch": 0.3474104808962617, + "grad_norm": 958.5164794921875, + "learning_rate": 3.317589021275209e-05, + "loss": 96.0893, + "step": 85990 + }, + { + "epoch": 0.3474508821616293, + "grad_norm": 472.7339782714844, + "learning_rate": 3.317378920237361e-05, + "loss": 71.0125, + "step": 86000 + }, + { + "epoch": 0.3474912834269969, + "grad_norm": 800.665283203125, + "learning_rate": 3.317168793516533e-05, + "loss": 70.7101, + "step": 86010 + }, + { + "epoch": 0.34753168469236456, + "grad_norm": 379.1642150878906, + "learning_rate": 3.31695864111682e-05, + "loss": 55.8482, + "step": 86020 + }, + { + "epoch": 0.3475720859577322, + "grad_norm": 677.8098754882812, + "learning_rate": 3.316748463042321e-05, + "loss": 88.6488, + "step": 86030 + }, + { + "epoch": 0.34761248722309984, + "grad_norm": 766.5693359375, + "learning_rate": 3.316538259297133e-05, + "loss": 121.0973, + "step": 86040 + }, + { + "epoch": 0.3476528884884675, + "grad_norm": 1478.9981689453125, + "learning_rate": 3.316328029885353e-05, + "loss": 99.8293, + "step": 86050 + }, + { + "epoch": 0.34769328975383507, + "grad_norm": 457.76171875, + "learning_rate": 3.3161177748110816e-05, + "loss": 61.4794, + "step": 86060 + }, + { + "epoch": 0.3477336910192027, + "grad_norm": 391.7570495605469, + "learning_rate": 3.315907494078416e-05, + "loss": 61.3823, + "step": 86070 + }, + { + "epoch": 0.34777409228457035, + "grad_norm": 966.30712890625, + "learning_rate": 3.315697187691456e-05, + "loss": 152.197, + "step": 86080 + }, + { + "epoch": 0.347814493549938, + "grad_norm": 731.5145263671875, + "learning_rate": 3.315486855654302e-05, + "loss": 65.9464, + "step": 86090 + }, + { + "epoch": 0.3478548948153056, + "grad_norm": 505.6097717285156, + "learning_rate": 3.315276497971055e-05, + "loss": 69.5034, + "step": 86100 + }, + { + "epoch": 0.34789529608067327, + "grad_norm": 1299.887451171875, + "learning_rate": 3.315066114645815e-05, + "loss": 69.67, + "step": 86110 + }, + { + "epoch": 0.34793569734604085, + "grad_norm": 485.54681396484375, + "learning_rate": 3.314855705682685e-05, + "loss": 82.433, + "step": 86120 + }, + { + "epoch": 0.3479760986114085, + "grad_norm": 690.2813720703125, + "learning_rate": 3.314645271085765e-05, + "loss": 78.8854, + "step": 86130 + }, + { + "epoch": 0.34801649987677613, + "grad_norm": 1562.938720703125, + "learning_rate": 3.3144348108591594e-05, + "loss": 93.8399, + "step": 86140 + }, + { + "epoch": 0.34805690114214377, + "grad_norm": 396.84710693359375, + "learning_rate": 3.314224325006969e-05, + "loss": 108.6005, + "step": 86150 + }, + { + "epoch": 0.3480973024075114, + "grad_norm": 1014.1446533203125, + "learning_rate": 3.3140138135333004e-05, + "loss": 94.7308, + "step": 86160 + }, + { + "epoch": 0.34813770367287905, + "grad_norm": 465.6385498046875, + "learning_rate": 3.313803276442255e-05, + "loss": 80.5293, + "step": 86170 + }, + { + "epoch": 0.3481781049382467, + "grad_norm": 891.406494140625, + "learning_rate": 3.313592713737939e-05, + "loss": 108.2343, + "step": 86180 + }, + { + "epoch": 0.3482185062036143, + "grad_norm": 707.58154296875, + "learning_rate": 3.3133821254244564e-05, + "loss": 96.7283, + "step": 86190 + }, + { + "epoch": 0.3482589074689819, + "grad_norm": 599.442138671875, + "learning_rate": 3.3131715115059134e-05, + "loss": 77.2233, + "step": 86200 + }, + { + "epoch": 0.34829930873434956, + "grad_norm": 882.073974609375, + "learning_rate": 3.3129608719864154e-05, + "loss": 79.6481, + "step": 86210 + }, + { + "epoch": 0.3483397099997172, + "grad_norm": 352.6758117675781, + "learning_rate": 3.312750206870069e-05, + "loss": 76.5809, + "step": 86220 + }, + { + "epoch": 0.34838011126508484, + "grad_norm": 1356.5174560546875, + "learning_rate": 3.312539516160982e-05, + "loss": 140.9335, + "step": 86230 + }, + { + "epoch": 0.3484205125304525, + "grad_norm": 2334.94921875, + "learning_rate": 3.312328799863261e-05, + "loss": 144.6698, + "step": 86240 + }, + { + "epoch": 0.34846091379582006, + "grad_norm": 788.2113037109375, + "learning_rate": 3.312118057981015e-05, + "loss": 99.9811, + "step": 86250 + }, + { + "epoch": 0.3485013150611877, + "grad_norm": 557.9508666992188, + "learning_rate": 3.311907290518352e-05, + "loss": 69.6625, + "step": 86260 + }, + { + "epoch": 0.34854171632655534, + "grad_norm": 757.5223999023438, + "learning_rate": 3.31169649747938e-05, + "loss": 82.9069, + "step": 86270 + }, + { + "epoch": 0.348582117591923, + "grad_norm": 1100.958740234375, + "learning_rate": 3.3114856788682105e-05, + "loss": 99.9403, + "step": 86280 + }, + { + "epoch": 0.3486225188572906, + "grad_norm": 987.8717651367188, + "learning_rate": 3.311274834688951e-05, + "loss": 110.4741, + "step": 86290 + }, + { + "epoch": 0.34866292012265826, + "grad_norm": 458.2936706542969, + "learning_rate": 3.3110639649457153e-05, + "loss": 90.7558, + "step": 86300 + }, + { + "epoch": 0.3487033213880259, + "grad_norm": 713.0794677734375, + "learning_rate": 3.310853069642611e-05, + "loss": 81.0091, + "step": 86310 + }, + { + "epoch": 0.3487437226533935, + "grad_norm": 1093.604248046875, + "learning_rate": 3.310642148783752e-05, + "loss": 89.0664, + "step": 86320 + }, + { + "epoch": 0.3487841239187611, + "grad_norm": 368.9817810058594, + "learning_rate": 3.31043120237325e-05, + "loss": 66.2774, + "step": 86330 + }, + { + "epoch": 0.34882452518412876, + "grad_norm": 648.4984130859375, + "learning_rate": 3.310220230415217e-05, + "loss": 83.7845, + "step": 86340 + }, + { + "epoch": 0.3488649264494964, + "grad_norm": 497.5029296875, + "learning_rate": 3.3100092329137654e-05, + "loss": 72.8571, + "step": 86350 + }, + { + "epoch": 0.34890532771486404, + "grad_norm": 1994.8642578125, + "learning_rate": 3.30979820987301e-05, + "loss": 97.099, + "step": 86360 + }, + { + "epoch": 0.3489457289802317, + "grad_norm": 452.9716796875, + "learning_rate": 3.3095871612970636e-05, + "loss": 64.1192, + "step": 86370 + }, + { + "epoch": 0.34898613024559927, + "grad_norm": 664.4078369140625, + "learning_rate": 3.3093760871900414e-05, + "loss": 79.8381, + "step": 86380 + }, + { + "epoch": 0.3490265315109669, + "grad_norm": 732.9580688476562, + "learning_rate": 3.3091649875560584e-05, + "loss": 97.8003, + "step": 86390 + }, + { + "epoch": 0.34906693277633455, + "grad_norm": 924.2863159179688, + "learning_rate": 3.30895386239923e-05, + "loss": 89.1731, + "step": 86400 + }, + { + "epoch": 0.3491073340417022, + "grad_norm": 1572.65869140625, + "learning_rate": 3.308742711723672e-05, + "loss": 117.7768, + "step": 86410 + }, + { + "epoch": 0.34914773530706983, + "grad_norm": 1214.7684326171875, + "learning_rate": 3.308531535533501e-05, + "loss": 76.8139, + "step": 86420 + }, + { + "epoch": 0.34918813657243747, + "grad_norm": 427.4381408691406, + "learning_rate": 3.308320333832835e-05, + "loss": 94.0217, + "step": 86430 + }, + { + "epoch": 0.34922853783780505, + "grad_norm": 709.712890625, + "learning_rate": 3.308109106625789e-05, + "loss": 102.3376, + "step": 86440 + }, + { + "epoch": 0.3492689391031727, + "grad_norm": 526.7239990234375, + "learning_rate": 3.307897853916483e-05, + "loss": 90.4032, + "step": 86450 + }, + { + "epoch": 0.34930934036854033, + "grad_norm": 738.6033935546875, + "learning_rate": 3.307686575709036e-05, + "loss": 90.9033, + "step": 86460 + }, + { + "epoch": 0.349349741633908, + "grad_norm": 724.3355712890625, + "learning_rate": 3.3074752720075644e-05, + "loss": 86.0186, + "step": 86470 + }, + { + "epoch": 0.3493901428992756, + "grad_norm": 757.2182006835938, + "learning_rate": 3.30726394281619e-05, + "loss": 71.2576, + "step": 86480 + }, + { + "epoch": 0.34943054416464325, + "grad_norm": 678.388916015625, + "learning_rate": 3.307052588139032e-05, + "loss": 113.9437, + "step": 86490 + }, + { + "epoch": 0.3494709454300109, + "grad_norm": 653.3453369140625, + "learning_rate": 3.3068412079802114e-05, + "loss": 57.2362, + "step": 86500 + }, + { + "epoch": 0.3495113466953785, + "grad_norm": 398.6534423828125, + "learning_rate": 3.306629802343848e-05, + "loss": 71.8553, + "step": 86510 + }, + { + "epoch": 0.3495517479607461, + "grad_norm": 495.8721618652344, + "learning_rate": 3.306418371234064e-05, + "loss": 197.8962, + "step": 86520 + }, + { + "epoch": 0.34959214922611376, + "grad_norm": 753.1102294921875, + "learning_rate": 3.306206914654981e-05, + "loss": 107.3446, + "step": 86530 + }, + { + "epoch": 0.3496325504914814, + "grad_norm": 719.4027099609375, + "learning_rate": 3.305995432610722e-05, + "loss": 75.8335, + "step": 86540 + }, + { + "epoch": 0.34967295175684904, + "grad_norm": 2193.4169921875, + "learning_rate": 3.30578392510541e-05, + "loss": 87.1155, + "step": 86550 + }, + { + "epoch": 0.3497133530222167, + "grad_norm": 1018.2779541015625, + "learning_rate": 3.305572392143168e-05, + "loss": 169.7359, + "step": 86560 + }, + { + "epoch": 0.34975375428758426, + "grad_norm": 383.8271789550781, + "learning_rate": 3.3053608337281194e-05, + "loss": 92.7564, + "step": 86570 + }, + { + "epoch": 0.3497941555529519, + "grad_norm": 810.8055419921875, + "learning_rate": 3.30514924986439e-05, + "loss": 117.8991, + "step": 86580 + }, + { + "epoch": 0.34983455681831954, + "grad_norm": 1271.8101806640625, + "learning_rate": 3.3049376405561046e-05, + "loss": 104.6427, + "step": 86590 + }, + { + "epoch": 0.3498749580836872, + "grad_norm": 1454.68994140625, + "learning_rate": 3.304726005807386e-05, + "loss": 99.7888, + "step": 86600 + }, + { + "epoch": 0.3499153593490548, + "grad_norm": 624.6162109375, + "learning_rate": 3.304514345622364e-05, + "loss": 71.1065, + "step": 86610 + }, + { + "epoch": 0.34995576061442246, + "grad_norm": 785.2533569335938, + "learning_rate": 3.3043026600051624e-05, + "loss": 111.0803, + "step": 86620 + }, + { + "epoch": 0.3499961618797901, + "grad_norm": 698.0706787109375, + "learning_rate": 3.304090948959909e-05, + "loss": 74.9212, + "step": 86630 + }, + { + "epoch": 0.3500365631451577, + "grad_norm": 702.9820556640625, + "learning_rate": 3.3038792124907325e-05, + "loss": 96.7492, + "step": 86640 + }, + { + "epoch": 0.3500769644105253, + "grad_norm": 1148.6387939453125, + "learning_rate": 3.303667450601758e-05, + "loss": 99.1558, + "step": 86650 + }, + { + "epoch": 0.35011736567589297, + "grad_norm": 785.9705810546875, + "learning_rate": 3.303455663297116e-05, + "loss": 78.6736, + "step": 86660 + }, + { + "epoch": 0.3501577669412606, + "grad_norm": 723.2868041992188, + "learning_rate": 3.303243850580934e-05, + "loss": 85.1889, + "step": 86670 + }, + { + "epoch": 0.35019816820662825, + "grad_norm": 381.4661560058594, + "learning_rate": 3.303032012457343e-05, + "loss": 70.5739, + "step": 86680 + }, + { + "epoch": 0.3502385694719959, + "grad_norm": 855.794677734375, + "learning_rate": 3.3028201489304716e-05, + "loss": 93.2593, + "step": 86690 + }, + { + "epoch": 0.35027897073736347, + "grad_norm": 1084.3970947265625, + "learning_rate": 3.3026082600044506e-05, + "loss": 112.2485, + "step": 86700 + }, + { + "epoch": 0.3503193720027311, + "grad_norm": 1166.3978271484375, + "learning_rate": 3.3023963456834115e-05, + "loss": 112.0847, + "step": 86710 + }, + { + "epoch": 0.35035977326809875, + "grad_norm": 465.0201721191406, + "learning_rate": 3.302184405971485e-05, + "loss": 115.2947, + "step": 86720 + }, + { + "epoch": 0.3504001745334664, + "grad_norm": 805.5089111328125, + "learning_rate": 3.301972440872803e-05, + "loss": 143.8151, + "step": 86730 + }, + { + "epoch": 0.35044057579883403, + "grad_norm": 2086.866455078125, + "learning_rate": 3.3017604503914976e-05, + "loss": 67.6944, + "step": 86740 + }, + { + "epoch": 0.35048097706420167, + "grad_norm": 561.1386108398438, + "learning_rate": 3.301548434531702e-05, + "loss": 66.0216, + "step": 86750 + }, + { + "epoch": 0.35052137832956926, + "grad_norm": 628.0346069335938, + "learning_rate": 3.30133639329755e-05, + "loss": 108.8022, + "step": 86760 + }, + { + "epoch": 0.3505617795949369, + "grad_norm": 695.5948486328125, + "learning_rate": 3.3011243266931745e-05, + "loss": 83.1611, + "step": 86770 + }, + { + "epoch": 0.35060218086030454, + "grad_norm": 1191.60986328125, + "learning_rate": 3.300912234722711e-05, + "loss": 94.6949, + "step": 86780 + }, + { + "epoch": 0.3506425821256722, + "grad_norm": 524.5386962890625, + "learning_rate": 3.300700117390294e-05, + "loss": 123.5844, + "step": 86790 + }, + { + "epoch": 0.3506829833910398, + "grad_norm": 548.4117431640625, + "learning_rate": 3.300487974700058e-05, + "loss": 57.2298, + "step": 86800 + }, + { + "epoch": 0.35072338465640746, + "grad_norm": 897.8585205078125, + "learning_rate": 3.3002758066561394e-05, + "loss": 89.7753, + "step": 86810 + }, + { + "epoch": 0.3507637859217751, + "grad_norm": 1565.1051025390625, + "learning_rate": 3.300063613262675e-05, + "loss": 113.6457, + "step": 86820 + }, + { + "epoch": 0.3508041871871427, + "grad_norm": 241.42478942871094, + "learning_rate": 3.2998513945238e-05, + "loss": 76.9554, + "step": 86830 + }, + { + "epoch": 0.3508445884525103, + "grad_norm": 659.2351684570312, + "learning_rate": 3.299639150443654e-05, + "loss": 71.0986, + "step": 86840 + }, + { + "epoch": 0.35088498971787796, + "grad_norm": 809.9782104492188, + "learning_rate": 3.299426881026374e-05, + "loss": 93.0248, + "step": 86850 + }, + { + "epoch": 0.3509253909832456, + "grad_norm": 557.2509155273438, + "learning_rate": 3.299214586276096e-05, + "loss": 89.7262, + "step": 86860 + }, + { + "epoch": 0.35096579224861324, + "grad_norm": 1598.8074951171875, + "learning_rate": 3.2990022661969626e-05, + "loss": 119.0493, + "step": 86870 + }, + { + "epoch": 0.3510061935139809, + "grad_norm": 840.4151000976562, + "learning_rate": 3.2987899207931105e-05, + "loss": 111.9219, + "step": 86880 + }, + { + "epoch": 0.35104659477934846, + "grad_norm": 636.1414184570312, + "learning_rate": 3.29857755006868e-05, + "loss": 109.5856, + "step": 86890 + }, + { + "epoch": 0.3510869960447161, + "grad_norm": 287.1276550292969, + "learning_rate": 3.298365154027812e-05, + "loss": 81.9283, + "step": 86900 + }, + { + "epoch": 0.35112739731008374, + "grad_norm": 433.8459777832031, + "learning_rate": 3.298152732674647e-05, + "loss": 79.4663, + "step": 86910 + }, + { + "epoch": 0.3511677985754514, + "grad_norm": 844.0192260742188, + "learning_rate": 3.2979402860133264e-05, + "loss": 83.3004, + "step": 86920 + }, + { + "epoch": 0.351208199840819, + "grad_norm": 954.455322265625, + "learning_rate": 3.297727814047991e-05, + "loss": 76.9229, + "step": 86930 + }, + { + "epoch": 0.35124860110618666, + "grad_norm": 620.6124877929688, + "learning_rate": 3.297515316782784e-05, + "loss": 65.9544, + "step": 86940 + }, + { + "epoch": 0.3512890023715543, + "grad_norm": 851.1443481445312, + "learning_rate": 3.297302794221849e-05, + "loss": 85.2386, + "step": 86950 + }, + { + "epoch": 0.3513294036369219, + "grad_norm": 697.5347290039062, + "learning_rate": 3.2970902463693264e-05, + "loss": 104.1863, + "step": 86960 + }, + { + "epoch": 0.35136980490228953, + "grad_norm": 459.5032958984375, + "learning_rate": 3.296877673229362e-05, + "loss": 102.5771, + "step": 86970 + }, + { + "epoch": 0.35141020616765717, + "grad_norm": 1060.83935546875, + "learning_rate": 3.2966650748061004e-05, + "loss": 101.2959, + "step": 86980 + }, + { + "epoch": 0.3514506074330248, + "grad_norm": 320.83062744140625, + "learning_rate": 3.2964524511036856e-05, + "loss": 61.1571, + "step": 86990 + }, + { + "epoch": 0.35149100869839245, + "grad_norm": 633.435302734375, + "learning_rate": 3.2962398021262623e-05, + "loss": 81.5038, + "step": 87000 + }, + { + "epoch": 0.3515314099637601, + "grad_norm": 864.2952270507812, + "learning_rate": 3.2960271278779764e-05, + "loss": 92.6942, + "step": 87010 + }, + { + "epoch": 0.3515718112291277, + "grad_norm": 496.82208251953125, + "learning_rate": 3.295814428362975e-05, + "loss": 74.549, + "step": 87020 + }, + { + "epoch": 0.3516122124944953, + "grad_norm": 679.3554077148438, + "learning_rate": 3.295601703585404e-05, + "loss": 48.3027, + "step": 87030 + }, + { + "epoch": 0.35165261375986295, + "grad_norm": 1151.3690185546875, + "learning_rate": 3.2953889535494114e-05, + "loss": 79.6353, + "step": 87040 + }, + { + "epoch": 0.3516930150252306, + "grad_norm": 1297.6444091796875, + "learning_rate": 3.295176178259143e-05, + "loss": 93.5183, + "step": 87050 + }, + { + "epoch": 0.35173341629059823, + "grad_norm": 1032.6943359375, + "learning_rate": 3.294963377718749e-05, + "loss": 89.0405, + "step": 87060 + }, + { + "epoch": 0.3517738175559659, + "grad_norm": 508.0064697265625, + "learning_rate": 3.294750551932377e-05, + "loss": 110.1034, + "step": 87070 + }, + { + "epoch": 0.35181421882133346, + "grad_norm": 1124.3106689453125, + "learning_rate": 3.294537700904177e-05, + "loss": 81.8424, + "step": 87080 + }, + { + "epoch": 0.3518546200867011, + "grad_norm": 452.4642639160156, + "learning_rate": 3.294324824638297e-05, + "loss": 109.2027, + "step": 87090 + }, + { + "epoch": 0.35189502135206874, + "grad_norm": 1184.383544921875, + "learning_rate": 3.294111923138889e-05, + "loss": 86.8364, + "step": 87100 + }, + { + "epoch": 0.3519354226174364, + "grad_norm": 912.8917236328125, + "learning_rate": 3.2938989964101026e-05, + "loss": 122.9421, + "step": 87110 + }, + { + "epoch": 0.351975823882804, + "grad_norm": 1006.783935546875, + "learning_rate": 3.293686044456089e-05, + "loss": 96.9731, + "step": 87120 + }, + { + "epoch": 0.35201622514817166, + "grad_norm": 538.7522583007812, + "learning_rate": 3.293473067281e-05, + "loss": 65.9581, + "step": 87130 + }, + { + "epoch": 0.3520566264135393, + "grad_norm": 609.7351684570312, + "learning_rate": 3.293260064888988e-05, + "loss": 93.3117, + "step": 87140 + }, + { + "epoch": 0.3520970276789069, + "grad_norm": 301.0176086425781, + "learning_rate": 3.293047037284205e-05, + "loss": 55.4157, + "step": 87150 + }, + { + "epoch": 0.3521374289442745, + "grad_norm": 767.8717651367188, + "learning_rate": 3.292833984470804e-05, + "loss": 87.9203, + "step": 87160 + }, + { + "epoch": 0.35217783020964216, + "grad_norm": 629.380126953125, + "learning_rate": 3.2926209064529384e-05, + "loss": 146.7373, + "step": 87170 + }, + { + "epoch": 0.3522182314750098, + "grad_norm": 581.038330078125, + "learning_rate": 3.292407803234763e-05, + "loss": 92.5485, + "step": 87180 + }, + { + "epoch": 0.35225863274037744, + "grad_norm": 604.4237060546875, + "learning_rate": 3.292194674820433e-05, + "loss": 106.1463, + "step": 87190 + }, + { + "epoch": 0.3522990340057451, + "grad_norm": 788.7254638671875, + "learning_rate": 3.2919815212141025e-05, + "loss": 78.6774, + "step": 87200 + }, + { + "epoch": 0.35233943527111267, + "grad_norm": 660.8157348632812, + "learning_rate": 3.2917683424199255e-05, + "loss": 134.0804, + "step": 87210 + }, + { + "epoch": 0.3523798365364803, + "grad_norm": 836.4684448242188, + "learning_rate": 3.291555138442062e-05, + "loss": 91.0431, + "step": 87220 + }, + { + "epoch": 0.35242023780184795, + "grad_norm": 530.5277709960938, + "learning_rate": 3.291341909284664e-05, + "loss": 70.3136, + "step": 87230 + }, + { + "epoch": 0.3524606390672156, + "grad_norm": 789.9409790039062, + "learning_rate": 3.291128654951892e-05, + "loss": 87.8443, + "step": 87240 + }, + { + "epoch": 0.3525010403325832, + "grad_norm": 758.8594360351562, + "learning_rate": 3.290915375447902e-05, + "loss": 115.3221, + "step": 87250 + }, + { + "epoch": 0.35254144159795087, + "grad_norm": 924.7924194335938, + "learning_rate": 3.290702070776851e-05, + "loss": 74.3461, + "step": 87260 + }, + { + "epoch": 0.3525818428633185, + "grad_norm": 855.8338012695312, + "learning_rate": 3.2904887409429e-05, + "loss": 66.8454, + "step": 87270 + }, + { + "epoch": 0.3526222441286861, + "grad_norm": 630.6656494140625, + "learning_rate": 3.2902753859502056e-05, + "loss": 53.6111, + "step": 87280 + }, + { + "epoch": 0.35266264539405373, + "grad_norm": 847.9459838867188, + "learning_rate": 3.290062005802929e-05, + "loss": 114.0361, + "step": 87290 + }, + { + "epoch": 0.35270304665942137, + "grad_norm": 457.1285400390625, + "learning_rate": 3.289848600505229e-05, + "loss": 72.1069, + "step": 87300 + }, + { + "epoch": 0.352743447924789, + "grad_norm": 702.7105712890625, + "learning_rate": 3.289635170061267e-05, + "loss": 114.9443, + "step": 87310 + }, + { + "epoch": 0.35278384919015665, + "grad_norm": 969.5294189453125, + "learning_rate": 3.289421714475203e-05, + "loss": 93.857, + "step": 87320 + }, + { + "epoch": 0.3528242504555243, + "grad_norm": 1040.03076171875, + "learning_rate": 3.289208233751199e-05, + "loss": 77.8261, + "step": 87330 + }, + { + "epoch": 0.3528646517208919, + "grad_norm": 853.9056396484375, + "learning_rate": 3.288994727893416e-05, + "loss": 102.2801, + "step": 87340 + }, + { + "epoch": 0.3529050529862595, + "grad_norm": 1493.41357421875, + "learning_rate": 3.2887811969060184e-05, + "loss": 61.2288, + "step": 87350 + }, + { + "epoch": 0.35294545425162716, + "grad_norm": 577.939453125, + "learning_rate": 3.288567640793167e-05, + "loss": 140.3046, + "step": 87360 + }, + { + "epoch": 0.3529858555169948, + "grad_norm": 994.3568725585938, + "learning_rate": 3.288354059559026e-05, + "loss": 82.9637, + "step": 87370 + }, + { + "epoch": 0.35302625678236244, + "grad_norm": 282.9548645019531, + "learning_rate": 3.28814045320776e-05, + "loss": 66.826, + "step": 87380 + }, + { + "epoch": 0.3530666580477301, + "grad_norm": 767.0397338867188, + "learning_rate": 3.287926821743532e-05, + "loss": 87.2148, + "step": 87390 + }, + { + "epoch": 0.35310705931309766, + "grad_norm": 1333.095703125, + "learning_rate": 3.287713165170508e-05, + "loss": 93.2881, + "step": 87400 + }, + { + "epoch": 0.3531474605784653, + "grad_norm": 899.1914672851562, + "learning_rate": 3.2874994834928524e-05, + "loss": 163.2087, + "step": 87410 + }, + { + "epoch": 0.35318786184383294, + "grad_norm": 797.1392822265625, + "learning_rate": 3.2872857767147316e-05, + "loss": 96.2209, + "step": 87420 + }, + { + "epoch": 0.3532282631092006, + "grad_norm": 255.86068725585938, + "learning_rate": 3.2870720448403127e-05, + "loss": 92.0963, + "step": 87430 + }, + { + "epoch": 0.3532686643745682, + "grad_norm": 808.1256713867188, + "learning_rate": 3.286858287873761e-05, + "loss": 113.1729, + "step": 87440 + }, + { + "epoch": 0.35330906563993586, + "grad_norm": 460.62274169921875, + "learning_rate": 3.286644505819244e-05, + "loss": 41.1772, + "step": 87450 + }, + { + "epoch": 0.3533494669053035, + "grad_norm": 619.1489868164062, + "learning_rate": 3.286430698680931e-05, + "loss": 64.6923, + "step": 87460 + }, + { + "epoch": 0.3533898681706711, + "grad_norm": 2652.8544921875, + "learning_rate": 3.286216866462988e-05, + "loss": 92.6461, + "step": 87470 + }, + { + "epoch": 0.3534302694360387, + "grad_norm": 395.4430847167969, + "learning_rate": 3.286003009169586e-05, + "loss": 74.8611, + "step": 87480 + }, + { + "epoch": 0.35347067070140636, + "grad_norm": 682.4451904296875, + "learning_rate": 3.285789126804893e-05, + "loss": 112.9788, + "step": 87490 + }, + { + "epoch": 0.353511071966774, + "grad_norm": 842.6771850585938, + "learning_rate": 3.285575219373079e-05, + "loss": 59.2459, + "step": 87500 + }, + { + "epoch": 0.35355147323214164, + "grad_norm": 1115.3431396484375, + "learning_rate": 3.285361286878314e-05, + "loss": 94.2603, + "step": 87510 + }, + { + "epoch": 0.3535918744975093, + "grad_norm": 558.6900024414062, + "learning_rate": 3.2851473293247694e-05, + "loss": 76.985, + "step": 87520 + }, + { + "epoch": 0.35363227576287687, + "grad_norm": 522.1242065429688, + "learning_rate": 3.2849333467166156e-05, + "loss": 103.9679, + "step": 87530 + }, + { + "epoch": 0.3536726770282445, + "grad_norm": 563.0612182617188, + "learning_rate": 3.284719339058025e-05, + "loss": 74.1899, + "step": 87540 + }, + { + "epoch": 0.35371307829361215, + "grad_norm": 722.4180297851562, + "learning_rate": 3.284505306353169e-05, + "loss": 81.2012, + "step": 87550 + }, + { + "epoch": 0.3537534795589798, + "grad_norm": 808.2442016601562, + "learning_rate": 3.284291248606221e-05, + "loss": 107.4187, + "step": 87560 + }, + { + "epoch": 0.35379388082434743, + "grad_norm": 414.97821044921875, + "learning_rate": 3.284077165821354e-05, + "loss": 93.6818, + "step": 87570 + }, + { + "epoch": 0.35383428208971507, + "grad_norm": 915.7906494140625, + "learning_rate": 3.2838630580027416e-05, + "loss": 67.4432, + "step": 87580 + }, + { + "epoch": 0.3538746833550827, + "grad_norm": 581.0415649414062, + "learning_rate": 3.2836489251545576e-05, + "loss": 62.9087, + "step": 87590 + }, + { + "epoch": 0.3539150846204503, + "grad_norm": 306.6755065917969, + "learning_rate": 3.2834347672809776e-05, + "loss": 99.4511, + "step": 87600 + }, + { + "epoch": 0.35395548588581793, + "grad_norm": 748.4332885742188, + "learning_rate": 3.283220584386175e-05, + "loss": 109.956, + "step": 87610 + }, + { + "epoch": 0.3539958871511856, + "grad_norm": 794.6282958984375, + "learning_rate": 3.283006376474327e-05, + "loss": 85.7806, + "step": 87620 + }, + { + "epoch": 0.3540362884165532, + "grad_norm": 358.8987731933594, + "learning_rate": 3.2827921435496097e-05, + "loss": 70.3216, + "step": 87630 + }, + { + "epoch": 0.35407668968192085, + "grad_norm": 1492.3966064453125, + "learning_rate": 3.2825778856161984e-05, + "loss": 129.1665, + "step": 87640 + }, + { + "epoch": 0.3541170909472885, + "grad_norm": 492.3809509277344, + "learning_rate": 3.2823636026782715e-05, + "loss": 76.7388, + "step": 87650 + }, + { + "epoch": 0.3541574922126561, + "grad_norm": 773.2371215820312, + "learning_rate": 3.282149294740005e-05, + "loss": 74.791, + "step": 87660 + }, + { + "epoch": 0.3541978934780237, + "grad_norm": 636.8792114257812, + "learning_rate": 3.2819349618055784e-05, + "loss": 79.0147, + "step": 87670 + }, + { + "epoch": 0.35423829474339136, + "grad_norm": 1017.0528564453125, + "learning_rate": 3.28172060387917e-05, + "loss": 78.0035, + "step": 87680 + }, + { + "epoch": 0.354278696008759, + "grad_norm": 742.8696899414062, + "learning_rate": 3.2815062209649585e-05, + "loss": 97.4056, + "step": 87690 + }, + { + "epoch": 0.35431909727412664, + "grad_norm": 1507.964599609375, + "learning_rate": 3.281291813067123e-05, + "loss": 130.426, + "step": 87700 + }, + { + "epoch": 0.3543594985394943, + "grad_norm": 1280.0906982421875, + "learning_rate": 3.2810773801898445e-05, + "loss": 88.9227, + "step": 87710 + }, + { + "epoch": 0.35439989980486186, + "grad_norm": 614.4711303710938, + "learning_rate": 3.2808629223373026e-05, + "loss": 92.9796, + "step": 87720 + }, + { + "epoch": 0.3544403010702295, + "grad_norm": 1042.930908203125, + "learning_rate": 3.280648439513679e-05, + "loss": 105.3859, + "step": 87730 + }, + { + "epoch": 0.35448070233559714, + "grad_norm": 619.9752807617188, + "learning_rate": 3.2804339317231545e-05, + "loss": 75.4527, + "step": 87740 + }, + { + "epoch": 0.3545211036009648, + "grad_norm": 2066.908935546875, + "learning_rate": 3.2802193989699116e-05, + "loss": 113.2203, + "step": 87750 + }, + { + "epoch": 0.3545615048663324, + "grad_norm": 592.1727294921875, + "learning_rate": 3.2800048412581315e-05, + "loss": 57.2573, + "step": 87760 + }, + { + "epoch": 0.35460190613170006, + "grad_norm": 568.56396484375, + "learning_rate": 3.279790258591999e-05, + "loss": 74.9995, + "step": 87770 + }, + { + "epoch": 0.3546423073970677, + "grad_norm": 1174.885009765625, + "learning_rate": 3.279575650975696e-05, + "loss": 82.9036, + "step": 87780 + }, + { + "epoch": 0.3546827086624353, + "grad_norm": 799.6396484375, + "learning_rate": 3.279361018413407e-05, + "loss": 75.552, + "step": 87790 + }, + { + "epoch": 0.3547231099278029, + "grad_norm": 483.81732177734375, + "learning_rate": 3.2791463609093164e-05, + "loss": 112.6379, + "step": 87800 + }, + { + "epoch": 0.35476351119317057, + "grad_norm": 638.4161987304688, + "learning_rate": 3.278931678467609e-05, + "loss": 119.82, + "step": 87810 + }, + { + "epoch": 0.3548039124585382, + "grad_norm": 708.8178100585938, + "learning_rate": 3.27871697109247e-05, + "loss": 75.986, + "step": 87820 + }, + { + "epoch": 0.35484431372390585, + "grad_norm": 679.2796020507812, + "learning_rate": 3.2785022387880854e-05, + "loss": 79.5893, + "step": 87830 + }, + { + "epoch": 0.3548847149892735, + "grad_norm": 901.177001953125, + "learning_rate": 3.278287481558641e-05, + "loss": 90.761, + "step": 87840 + }, + { + "epoch": 0.35492511625464107, + "grad_norm": 704.9710083007812, + "learning_rate": 3.278072699408324e-05, + "loss": 92.1933, + "step": 87850 + }, + { + "epoch": 0.3549655175200087, + "grad_norm": 1206.699951171875, + "learning_rate": 3.2778578923413226e-05, + "loss": 74.0387, + "step": 87860 + }, + { + "epoch": 0.35500591878537635, + "grad_norm": 701.18359375, + "learning_rate": 3.2776430603618225e-05, + "loss": 103.8285, + "step": 87870 + }, + { + "epoch": 0.355046320050744, + "grad_norm": 656.9613037109375, + "learning_rate": 3.277428203474014e-05, + "loss": 107.3558, + "step": 87880 + }, + { + "epoch": 0.35508672131611163, + "grad_norm": 285.7418212890625, + "learning_rate": 3.277213321682085e-05, + "loss": 64.3524, + "step": 87890 + }, + { + "epoch": 0.35512712258147927, + "grad_norm": 833.6400756835938, + "learning_rate": 3.276998414990225e-05, + "loss": 143.3411, + "step": 87900 + }, + { + "epoch": 0.3551675238468469, + "grad_norm": 600.82958984375, + "learning_rate": 3.276783483402623e-05, + "loss": 122.1238, + "step": 87910 + }, + { + "epoch": 0.3552079251122145, + "grad_norm": 621.6184692382812, + "learning_rate": 3.27656852692347e-05, + "loss": 46.5502, + "step": 87920 + }, + { + "epoch": 0.35524832637758214, + "grad_norm": 565.7012939453125, + "learning_rate": 3.276353545556956e-05, + "loss": 87.7749, + "step": 87930 + }, + { + "epoch": 0.3552887276429498, + "grad_norm": 689.2240600585938, + "learning_rate": 3.276138539307273e-05, + "loss": 65.5364, + "step": 87940 + }, + { + "epoch": 0.3553291289083174, + "grad_norm": 635.4413452148438, + "learning_rate": 3.275923508178611e-05, + "loss": 120.7625, + "step": 87950 + }, + { + "epoch": 0.35536953017368506, + "grad_norm": 1726.8812255859375, + "learning_rate": 3.275708452175164e-05, + "loss": 113.0987, + "step": 87960 + }, + { + "epoch": 0.3554099314390527, + "grad_norm": 492.5638427734375, + "learning_rate": 3.2754933713011245e-05, + "loss": 93.8371, + "step": 87970 + }, + { + "epoch": 0.3554503327044203, + "grad_norm": 465.644287109375, + "learning_rate": 3.275278265560684e-05, + "loss": 74.9434, + "step": 87980 + }, + { + "epoch": 0.3554907339697879, + "grad_norm": 401.0062255859375, + "learning_rate": 3.275063134958038e-05, + "loss": 98.9481, + "step": 87990 + }, + { + "epoch": 0.35553113523515556, + "grad_norm": 806.2699584960938, + "learning_rate": 3.27484797949738e-05, + "loss": 72.3224, + "step": 88000 + }, + { + "epoch": 0.3555715365005232, + "grad_norm": 482.5807800292969, + "learning_rate": 3.274632799182904e-05, + "loss": 64.9665, + "step": 88010 + }, + { + "epoch": 0.35561193776589084, + "grad_norm": 1322.98291015625, + "learning_rate": 3.274417594018805e-05, + "loss": 88.3097, + "step": 88020 + }, + { + "epoch": 0.3556523390312585, + "grad_norm": 3311.193359375, + "learning_rate": 3.2742023640092785e-05, + "loss": 106.4048, + "step": 88030 + }, + { + "epoch": 0.35569274029662606, + "grad_norm": 1332.1236572265625, + "learning_rate": 3.2739871091585216e-05, + "loss": 103.2393, + "step": 88040 + }, + { + "epoch": 0.3557331415619937, + "grad_norm": 665.483642578125, + "learning_rate": 3.27377182947073e-05, + "loss": 65.9337, + "step": 88050 + }, + { + "epoch": 0.35577354282736134, + "grad_norm": 664.5662841796875, + "learning_rate": 3.2735565249501005e-05, + "loss": 103.7267, + "step": 88060 + }, + { + "epoch": 0.355813944092729, + "grad_norm": 708.9069213867188, + "learning_rate": 3.2733411956008314e-05, + "loss": 85.3532, + "step": 88070 + }, + { + "epoch": 0.3558543453580966, + "grad_norm": 1130.1409912109375, + "learning_rate": 3.27312584142712e-05, + "loss": 112.1936, + "step": 88080 + }, + { + "epoch": 0.35589474662346426, + "grad_norm": 1411.5921630859375, + "learning_rate": 3.2729104624331643e-05, + "loss": 85.1943, + "step": 88090 + }, + { + "epoch": 0.3559351478888319, + "grad_norm": 719.8018188476562, + "learning_rate": 3.272695058623165e-05, + "loss": 115.2951, + "step": 88100 + }, + { + "epoch": 0.3559755491541995, + "grad_norm": 494.1856689453125, + "learning_rate": 3.272479630001319e-05, + "loss": 87.3665, + "step": 88110 + }, + { + "epoch": 0.35601595041956713, + "grad_norm": 1042.535888671875, + "learning_rate": 3.272264176571828e-05, + "loss": 89.0865, + "step": 88120 + }, + { + "epoch": 0.35605635168493477, + "grad_norm": 727.0180053710938, + "learning_rate": 3.272048698338892e-05, + "loss": 120.8848, + "step": 88130 + }, + { + "epoch": 0.3560967529503024, + "grad_norm": 719.1257934570312, + "learning_rate": 3.271833195306711e-05, + "loss": 68.7868, + "step": 88140 + }, + { + "epoch": 0.35613715421567005, + "grad_norm": 929.3975830078125, + "learning_rate": 3.271617667479489e-05, + "loss": 146.9501, + "step": 88150 + }, + { + "epoch": 0.3561775554810377, + "grad_norm": 1331.013916015625, + "learning_rate": 3.271402114861424e-05, + "loss": 143.7038, + "step": 88160 + }, + { + "epoch": 0.3562179567464053, + "grad_norm": 838.1109619140625, + "learning_rate": 3.271186537456721e-05, + "loss": 87.956, + "step": 88170 + }, + { + "epoch": 0.3562583580117729, + "grad_norm": 757.2185668945312, + "learning_rate": 3.270970935269582e-05, + "loss": 69.6579, + "step": 88180 + }, + { + "epoch": 0.35629875927714055, + "grad_norm": 713.9384765625, + "learning_rate": 3.27075530830421e-05, + "loss": 126.8766, + "step": 88190 + }, + { + "epoch": 0.3563391605425082, + "grad_norm": 1228.2550048828125, + "learning_rate": 3.270539656564809e-05, + "loss": 81.162, + "step": 88200 + }, + { + "epoch": 0.35637956180787583, + "grad_norm": 571.8278198242188, + "learning_rate": 3.270323980055583e-05, + "loss": 79.4412, + "step": 88210 + }, + { + "epoch": 0.3564199630732435, + "grad_norm": 1500.001708984375, + "learning_rate": 3.270108278780738e-05, + "loss": 115.8107, + "step": 88220 + }, + { + "epoch": 0.35646036433861106, + "grad_norm": 591.5989379882812, + "learning_rate": 3.2698925527444775e-05, + "loss": 84.7953, + "step": 88230 + }, + { + "epoch": 0.3565007656039787, + "grad_norm": 835.56787109375, + "learning_rate": 3.269676801951008e-05, + "loss": 132.6144, + "step": 88240 + }, + { + "epoch": 0.35654116686934634, + "grad_norm": 1637.7548828125, + "learning_rate": 3.2694610264045355e-05, + "loss": 104.4766, + "step": 88250 + }, + { + "epoch": 0.356581568134714, + "grad_norm": 515.0355834960938, + "learning_rate": 3.269245226109267e-05, + "loss": 66.2912, + "step": 88260 + }, + { + "epoch": 0.3566219694000816, + "grad_norm": 410.22967529296875, + "learning_rate": 3.269029401069409e-05, + "loss": 61.585, + "step": 88270 + }, + { + "epoch": 0.35666237066544926, + "grad_norm": 377.3796691894531, + "learning_rate": 3.2688135512891696e-05, + "loss": 67.4672, + "step": 88280 + }, + { + "epoch": 0.3567027719308169, + "grad_norm": 507.6622619628906, + "learning_rate": 3.268597676772757e-05, + "loss": 73.2574, + "step": 88290 + }, + { + "epoch": 0.3567431731961845, + "grad_norm": 716.213623046875, + "learning_rate": 3.268381777524379e-05, + "loss": 82.9428, + "step": 88300 + }, + { + "epoch": 0.3567835744615521, + "grad_norm": 1588.6416015625, + "learning_rate": 3.268165853548247e-05, + "loss": 150.2788, + "step": 88310 + }, + { + "epoch": 0.35682397572691976, + "grad_norm": 717.2210083007812, + "learning_rate": 3.2679499048485665e-05, + "loss": 89.0653, + "step": 88320 + }, + { + "epoch": 0.3568643769922874, + "grad_norm": 691.7781372070312, + "learning_rate": 3.267733931429551e-05, + "loss": 68.1205, + "step": 88330 + }, + { + "epoch": 0.35690477825765504, + "grad_norm": 1380.028076171875, + "learning_rate": 3.2675179332954094e-05, + "loss": 118.0067, + "step": 88340 + }, + { + "epoch": 0.3569451795230227, + "grad_norm": 458.1353454589844, + "learning_rate": 3.267301910450353e-05, + "loss": 79.0056, + "step": 88350 + }, + { + "epoch": 0.35698558078839027, + "grad_norm": 464.8153991699219, + "learning_rate": 3.267085862898594e-05, + "loss": 82.7701, + "step": 88360 + }, + { + "epoch": 0.3570259820537579, + "grad_norm": 517.0345458984375, + "learning_rate": 3.266869790644344e-05, + "loss": 78.9179, + "step": 88370 + }, + { + "epoch": 0.35706638331912555, + "grad_norm": 711.9871215820312, + "learning_rate": 3.266653693691814e-05, + "loss": 84.6692, + "step": 88380 + }, + { + "epoch": 0.3571067845844932, + "grad_norm": 836.9574584960938, + "learning_rate": 3.266437572045219e-05, + "loss": 96.8038, + "step": 88390 + }, + { + "epoch": 0.3571471858498608, + "grad_norm": 1065.0244140625, + "learning_rate": 3.266221425708771e-05, + "loss": 69.6669, + "step": 88400 + }, + { + "epoch": 0.35718758711522847, + "grad_norm": 395.3609619140625, + "learning_rate": 3.266005254686686e-05, + "loss": 82.3327, + "step": 88410 + }, + { + "epoch": 0.3572279883805961, + "grad_norm": 644.152099609375, + "learning_rate": 3.265789058983175e-05, + "loss": 69.4609, + "step": 88420 + }, + { + "epoch": 0.3572683896459637, + "grad_norm": 857.5526733398438, + "learning_rate": 3.265572838602455e-05, + "loss": 87.8593, + "step": 88430 + }, + { + "epoch": 0.35730879091133133, + "grad_norm": 1352.630859375, + "learning_rate": 3.265356593548741e-05, + "loss": 81.7298, + "step": 88440 + }, + { + "epoch": 0.35734919217669897, + "grad_norm": 427.7598876953125, + "learning_rate": 3.265140323826249e-05, + "loss": 77.2852, + "step": 88450 + }, + { + "epoch": 0.3573895934420666, + "grad_norm": 960.296142578125, + "learning_rate": 3.264924029439195e-05, + "loss": 101.2278, + "step": 88460 + }, + { + "epoch": 0.35742999470743425, + "grad_norm": 963.3108520507812, + "learning_rate": 3.264707710391796e-05, + "loss": 95.2298, + "step": 88470 + }, + { + "epoch": 0.3574703959728019, + "grad_norm": 401.11328125, + "learning_rate": 3.264491366688269e-05, + "loss": 64.6161, + "step": 88480 + }, + { + "epoch": 0.3575107972381695, + "grad_norm": 944.6473999023438, + "learning_rate": 3.264274998332831e-05, + "loss": 79.5143, + "step": 88490 + }, + { + "epoch": 0.3575511985035371, + "grad_norm": 1133.834716796875, + "learning_rate": 3.264058605329702e-05, + "loss": 72.4217, + "step": 88500 + }, + { + "epoch": 0.35759159976890476, + "grad_norm": 519.32958984375, + "learning_rate": 3.2638421876831e-05, + "loss": 104.2263, + "step": 88510 + }, + { + "epoch": 0.3576320010342724, + "grad_norm": 449.1706848144531, + "learning_rate": 3.2636257453972424e-05, + "loss": 103.2052, + "step": 88520 + }, + { + "epoch": 0.35767240229964004, + "grad_norm": 805.3878784179688, + "learning_rate": 3.2634092784763515e-05, + "loss": 94.1543, + "step": 88530 + }, + { + "epoch": 0.3577128035650077, + "grad_norm": 1003.130126953125, + "learning_rate": 3.2631927869246456e-05, + "loss": 93.8464, + "step": 88540 + }, + { + "epoch": 0.35775320483037526, + "grad_norm": 518.1934814453125, + "learning_rate": 3.2629762707463466e-05, + "loss": 76.0356, + "step": 88550 + }, + { + "epoch": 0.3577936060957429, + "grad_norm": 356.888916015625, + "learning_rate": 3.2627597299456746e-05, + "loss": 87.7413, + "step": 88560 + }, + { + "epoch": 0.35783400736111054, + "grad_norm": 550.6648559570312, + "learning_rate": 3.262543164526852e-05, + "loss": 82.5179, + "step": 88570 + }, + { + "epoch": 0.3578744086264782, + "grad_norm": 559.25732421875, + "learning_rate": 3.2623265744941e-05, + "loss": 105.0516, + "step": 88580 + }, + { + "epoch": 0.3579148098918458, + "grad_norm": 1134.0194091796875, + "learning_rate": 3.262109959851642e-05, + "loss": 80.088, + "step": 88590 + }, + { + "epoch": 0.35795521115721346, + "grad_norm": 1071.7696533203125, + "learning_rate": 3.2618933206036994e-05, + "loss": 87.0773, + "step": 88600 + }, + { + "epoch": 0.3579956124225811, + "grad_norm": 2167.77783203125, + "learning_rate": 3.2616766567544976e-05, + "loss": 127.5384, + "step": 88610 + }, + { + "epoch": 0.3580360136879487, + "grad_norm": 580.5249633789062, + "learning_rate": 3.26145996830826e-05, + "loss": 96.787, + "step": 88620 + }, + { + "epoch": 0.3580764149533163, + "grad_norm": 445.2737121582031, + "learning_rate": 3.261243255269211e-05, + "loss": 89.0721, + "step": 88630 + }, + { + "epoch": 0.35811681621868396, + "grad_norm": 1056.9725341796875, + "learning_rate": 3.2610265176415746e-05, + "loss": 102.3534, + "step": 88640 + }, + { + "epoch": 0.3581572174840516, + "grad_norm": 628.934326171875, + "learning_rate": 3.260809755429578e-05, + "loss": 84.6767, + "step": 88650 + }, + { + "epoch": 0.35819761874941924, + "grad_norm": 698.8455810546875, + "learning_rate": 3.260592968637445e-05, + "loss": 124.512, + "step": 88660 + }, + { + "epoch": 0.3582380200147869, + "grad_norm": 942.1621704101562, + "learning_rate": 3.260376157269404e-05, + "loss": 94.2384, + "step": 88670 + }, + { + "epoch": 0.35827842128015447, + "grad_norm": 390.2577819824219, + "learning_rate": 3.2601593213296805e-05, + "loss": 122.796, + "step": 88680 + }, + { + "epoch": 0.3583188225455221, + "grad_norm": 374.1717529296875, + "learning_rate": 3.259942460822503e-05, + "loss": 78.6592, + "step": 88690 + }, + { + "epoch": 0.35835922381088975, + "grad_norm": 1210.132080078125, + "learning_rate": 3.2597255757520976e-05, + "loss": 96.9263, + "step": 88700 + }, + { + "epoch": 0.3583996250762574, + "grad_norm": 472.6987609863281, + "learning_rate": 3.2595086661226943e-05, + "loss": 80.7076, + "step": 88710 + }, + { + "epoch": 0.35844002634162503, + "grad_norm": 610.9305419921875, + "learning_rate": 3.25929173193852e-05, + "loss": 82.1192, + "step": 88720 + }, + { + "epoch": 0.35848042760699267, + "grad_norm": 633.282470703125, + "learning_rate": 3.259074773203806e-05, + "loss": 94.1855, + "step": 88730 + }, + { + "epoch": 0.3585208288723603, + "grad_norm": 343.98602294921875, + "learning_rate": 3.2588577899227814e-05, + "loss": 99.942, + "step": 88740 + }, + { + "epoch": 0.3585612301377279, + "grad_norm": 744.5244140625, + "learning_rate": 3.258640782099675e-05, + "loss": 82.9756, + "step": 88750 + }, + { + "epoch": 0.35860163140309553, + "grad_norm": 640.1964721679688, + "learning_rate": 3.258423749738719e-05, + "loss": 84.4535, + "step": 88760 + }, + { + "epoch": 0.3586420326684632, + "grad_norm": 879.0712890625, + "learning_rate": 3.258206692844145e-05, + "loss": 92.9891, + "step": 88770 + }, + { + "epoch": 0.3586824339338308, + "grad_norm": 1063.33984375, + "learning_rate": 3.2579896114201826e-05, + "loss": 92.2605, + "step": 88780 + }, + { + "epoch": 0.35872283519919845, + "grad_norm": 280.90313720703125, + "learning_rate": 3.257772505471065e-05, + "loss": 101.0131, + "step": 88790 + }, + { + "epoch": 0.3587632364645661, + "grad_norm": 1049.8856201171875, + "learning_rate": 3.257555375001026e-05, + "loss": 83.8249, + "step": 88800 + }, + { + "epoch": 0.3588036377299337, + "grad_norm": 1204.9791259765625, + "learning_rate": 3.257338220014297e-05, + "loss": 104.7229, + "step": 88810 + }, + { + "epoch": 0.3588440389953013, + "grad_norm": 7539.5673828125, + "learning_rate": 3.257121040515112e-05, + "loss": 124.0376, + "step": 88820 + }, + { + "epoch": 0.35888444026066896, + "grad_norm": 1165.003662109375, + "learning_rate": 3.2569038365077045e-05, + "loss": 68.5107, + "step": 88830 + }, + { + "epoch": 0.3589248415260366, + "grad_norm": 711.9754638671875, + "learning_rate": 3.2566866079963104e-05, + "loss": 112.671, + "step": 88840 + }, + { + "epoch": 0.35896524279140424, + "grad_norm": 823.610107421875, + "learning_rate": 3.256469354985163e-05, + "loss": 116.0265, + "step": 88850 + }, + { + "epoch": 0.3590056440567719, + "grad_norm": 717.2149658203125, + "learning_rate": 3.2562520774785e-05, + "loss": 97.1488, + "step": 88860 + }, + { + "epoch": 0.35904604532213946, + "grad_norm": 864.1978149414062, + "learning_rate": 3.256034775480555e-05, + "loss": 82.7011, + "step": 88870 + }, + { + "epoch": 0.3590864465875071, + "grad_norm": 779.4966430664062, + "learning_rate": 3.2558174489955656e-05, + "loss": 105.2323, + "step": 88880 + }, + { + "epoch": 0.35912684785287474, + "grad_norm": 606.7836303710938, + "learning_rate": 3.2556000980277686e-05, + "loss": 77.2484, + "step": 88890 + }, + { + "epoch": 0.3591672491182424, + "grad_norm": 567.4918823242188, + "learning_rate": 3.255382722581401e-05, + "loss": 70.0886, + "step": 88900 + }, + { + "epoch": 0.35920765038361, + "grad_norm": 2557.928955078125, + "learning_rate": 3.2551653226607016e-05, + "loss": 116.5941, + "step": 88910 + }, + { + "epoch": 0.35924805164897766, + "grad_norm": 776.94873046875, + "learning_rate": 3.2549478982699074e-05, + "loss": 73.1439, + "step": 88920 + }, + { + "epoch": 0.3592884529143453, + "grad_norm": 705.6689453125, + "learning_rate": 3.254730449413258e-05, + "loss": 89.6669, + "step": 88930 + }, + { + "epoch": 0.3593288541797129, + "grad_norm": 1078.5087890625, + "learning_rate": 3.2545129760949924e-05, + "loss": 81.4536, + "step": 88940 + }, + { + "epoch": 0.3593692554450805, + "grad_norm": 683.2706909179688, + "learning_rate": 3.254295478319351e-05, + "loss": 69.2943, + "step": 88950 + }, + { + "epoch": 0.35940965671044817, + "grad_norm": 1015.894287109375, + "learning_rate": 3.254077956090573e-05, + "loss": 72.2055, + "step": 88960 + }, + { + "epoch": 0.3594500579758158, + "grad_norm": 324.81903076171875, + "learning_rate": 3.2538604094128994e-05, + "loss": 113.9915, + "step": 88970 + }, + { + "epoch": 0.35949045924118345, + "grad_norm": 713.9981689453125, + "learning_rate": 3.253642838290572e-05, + "loss": 105.8523, + "step": 88980 + }, + { + "epoch": 0.3595308605065511, + "grad_norm": 1153.80859375, + "learning_rate": 3.2534252427278316e-05, + "loss": 74.3865, + "step": 88990 + }, + { + "epoch": 0.35957126177191867, + "grad_norm": 799.9464721679688, + "learning_rate": 3.253207622728921e-05, + "loss": 84.7613, + "step": 89000 + }, + { + "epoch": 0.3596116630372863, + "grad_norm": 327.6941833496094, + "learning_rate": 3.252989978298083e-05, + "loss": 100.0621, + "step": 89010 + }, + { + "epoch": 0.35965206430265395, + "grad_norm": 980.4714965820312, + "learning_rate": 3.25277230943956e-05, + "loss": 94.5128, + "step": 89020 + }, + { + "epoch": 0.3596924655680216, + "grad_norm": 1082.35498046875, + "learning_rate": 3.2525546161575964e-05, + "loss": 92.8927, + "step": 89030 + }, + { + "epoch": 0.35973286683338923, + "grad_norm": 870.029541015625, + "learning_rate": 3.2523368984564345e-05, + "loss": 77.0458, + "step": 89040 + }, + { + "epoch": 0.35977326809875687, + "grad_norm": 560.4774169921875, + "learning_rate": 3.252119156340321e-05, + "loss": 78.6218, + "step": 89050 + }, + { + "epoch": 0.3598136693641245, + "grad_norm": 572.9273071289062, + "learning_rate": 3.2519013898134994e-05, + "loss": 97.3764, + "step": 89060 + }, + { + "epoch": 0.3598540706294921, + "grad_norm": 386.744384765625, + "learning_rate": 3.2516835988802155e-05, + "loss": 77.373, + "step": 89070 + }, + { + "epoch": 0.35989447189485974, + "grad_norm": 565.7501220703125, + "learning_rate": 3.251465783544716e-05, + "loss": 69.8459, + "step": 89080 + }, + { + "epoch": 0.3599348731602274, + "grad_norm": 881.9639282226562, + "learning_rate": 3.2512479438112464e-05, + "loss": 69.1056, + "step": 89090 + }, + { + "epoch": 0.359975274425595, + "grad_norm": 987.2113647460938, + "learning_rate": 3.2510300796840546e-05, + "loss": 92.3107, + "step": 89100 + }, + { + "epoch": 0.36001567569096266, + "grad_norm": 832.7267456054688, + "learning_rate": 3.2508121911673866e-05, + "loss": 78.1073, + "step": 89110 + }, + { + "epoch": 0.3600560769563303, + "grad_norm": 517.5066528320312, + "learning_rate": 3.250594278265491e-05, + "loss": 77.4118, + "step": 89120 + }, + { + "epoch": 0.3600964782216979, + "grad_norm": 2448.97802734375, + "learning_rate": 3.250376340982616e-05, + "loss": 93.2356, + "step": 89130 + }, + { + "epoch": 0.3601368794870655, + "grad_norm": 493.5299377441406, + "learning_rate": 3.250158379323011e-05, + "loss": 75.9196, + "step": 89140 + }, + { + "epoch": 0.36017728075243316, + "grad_norm": 921.6806030273438, + "learning_rate": 3.249940393290925e-05, + "loss": 88.1876, + "step": 89150 + }, + { + "epoch": 0.3602176820178008, + "grad_norm": 943.0089721679688, + "learning_rate": 3.249722382890607e-05, + "loss": 63.7129, + "step": 89160 + }, + { + "epoch": 0.36025808328316844, + "grad_norm": 878.5640258789062, + "learning_rate": 3.249504348126308e-05, + "loss": 83.0043, + "step": 89170 + }, + { + "epoch": 0.3602984845485361, + "grad_norm": 800.5379638671875, + "learning_rate": 3.249286289002278e-05, + "loss": 64.5711, + "step": 89180 + }, + { + "epoch": 0.36033888581390366, + "grad_norm": 955.5904541015625, + "learning_rate": 3.2490682055227695e-05, + "loss": 99.918, + "step": 89190 + }, + { + "epoch": 0.3603792870792713, + "grad_norm": 641.8076171875, + "learning_rate": 3.248850097692032e-05, + "loss": 71.2816, + "step": 89200 + }, + { + "epoch": 0.36041968834463894, + "grad_norm": 451.8675842285156, + "learning_rate": 3.2486319655143196e-05, + "loss": 91.7122, + "step": 89210 + }, + { + "epoch": 0.3604600896100066, + "grad_norm": 266.1933288574219, + "learning_rate": 3.248413808993884e-05, + "loss": 72.2979, + "step": 89220 + }, + { + "epoch": 0.3605004908753742, + "grad_norm": 874.8329467773438, + "learning_rate": 3.248195628134979e-05, + "loss": 68.0958, + "step": 89230 + }, + { + "epoch": 0.36054089214074186, + "grad_norm": 609.6244506835938, + "learning_rate": 3.2479774229418565e-05, + "loss": 66.7637, + "step": 89240 + }, + { + "epoch": 0.3605812934061095, + "grad_norm": 1143.501220703125, + "learning_rate": 3.247759193418773e-05, + "loss": 77.0549, + "step": 89250 + }, + { + "epoch": 0.3606216946714771, + "grad_norm": 593.2518310546875, + "learning_rate": 3.2475409395699805e-05, + "loss": 73.7827, + "step": 89260 + }, + { + "epoch": 0.36066209593684473, + "grad_norm": 487.49761962890625, + "learning_rate": 3.2473226613997355e-05, + "loss": 142.6961, + "step": 89270 + }, + { + "epoch": 0.36070249720221237, + "grad_norm": 892.8362426757812, + "learning_rate": 3.247104358912293e-05, + "loss": 116.5464, + "step": 89280 + }, + { + "epoch": 0.36074289846758, + "grad_norm": 657.222900390625, + "learning_rate": 3.2468860321119095e-05, + "loss": 150.7397, + "step": 89290 + }, + { + "epoch": 0.36078329973294765, + "grad_norm": 9581.21484375, + "learning_rate": 3.246667681002841e-05, + "loss": 128.4402, + "step": 89300 + }, + { + "epoch": 0.3608237009983153, + "grad_norm": 1147.2603759765625, + "learning_rate": 3.2464493055893436e-05, + "loss": 107.1294, + "step": 89310 + }, + { + "epoch": 0.3608641022636829, + "grad_norm": 294.98358154296875, + "learning_rate": 3.246230905875675e-05, + "loss": 76.079, + "step": 89320 + }, + { + "epoch": 0.3609045035290505, + "grad_norm": 658.5462036132812, + "learning_rate": 3.246012481866093e-05, + "loss": 78.2021, + "step": 89330 + }, + { + "epoch": 0.36094490479441815, + "grad_norm": 1024.15673828125, + "learning_rate": 3.245794033564857e-05, + "loss": 80.0115, + "step": 89340 + }, + { + "epoch": 0.3609853060597858, + "grad_norm": 912.2161254882812, + "learning_rate": 3.245575560976225e-05, + "loss": 161.8321, + "step": 89350 + }, + { + "epoch": 0.36102570732515343, + "grad_norm": 695.5064086914062, + "learning_rate": 3.2453570641044565e-05, + "loss": 67.5372, + "step": 89360 + }, + { + "epoch": 0.3610661085905211, + "grad_norm": 744.06494140625, + "learning_rate": 3.24513854295381e-05, + "loss": 54.8685, + "step": 89370 + }, + { + "epoch": 0.3611065098558887, + "grad_norm": 498.30572509765625, + "learning_rate": 3.244919997528546e-05, + "loss": 89.5356, + "step": 89380 + }, + { + "epoch": 0.3611469111212563, + "grad_norm": 657.6673583984375, + "learning_rate": 3.2447014278329275e-05, + "loss": 83.6491, + "step": 89390 + }, + { + "epoch": 0.36118731238662394, + "grad_norm": 954.83203125, + "learning_rate": 3.244482833871213e-05, + "loss": 67.8483, + "step": 89400 + }, + { + "epoch": 0.3612277136519916, + "grad_norm": 498.165283203125, + "learning_rate": 3.2442642156476653e-05, + "loss": 47.7002, + "step": 89410 + }, + { + "epoch": 0.3612681149173592, + "grad_norm": 623.276611328125, + "learning_rate": 3.244045573166545e-05, + "loss": 100.7567, + "step": 89420 + }, + { + "epoch": 0.36130851618272686, + "grad_norm": 2812.62744140625, + "learning_rate": 3.243826906432117e-05, + "loss": 94.6014, + "step": 89430 + }, + { + "epoch": 0.3613489174480945, + "grad_norm": 705.4237670898438, + "learning_rate": 3.2436082154486426e-05, + "loss": 102.7016, + "step": 89440 + }, + { + "epoch": 0.3613893187134621, + "grad_norm": 602.6668090820312, + "learning_rate": 3.243389500220386e-05, + "loss": 73.7306, + "step": 89450 + }, + { + "epoch": 0.3614297199788297, + "grad_norm": 1418.2730712890625, + "learning_rate": 3.243170760751611e-05, + "loss": 108.4565, + "step": 89460 + }, + { + "epoch": 0.36147012124419736, + "grad_norm": 435.5602111816406, + "learning_rate": 3.242951997046581e-05, + "loss": 95.2928, + "step": 89470 + }, + { + "epoch": 0.361510522509565, + "grad_norm": 1419.375732421875, + "learning_rate": 3.242733209109563e-05, + "loss": 80.0574, + "step": 89480 + }, + { + "epoch": 0.36155092377493264, + "grad_norm": 988.6339111328125, + "learning_rate": 3.242514396944821e-05, + "loss": 63.8403, + "step": 89490 + }, + { + "epoch": 0.3615913250403003, + "grad_norm": 783.754638671875, + "learning_rate": 3.242295560556621e-05, + "loss": 98.6021, + "step": 89500 + }, + { + "epoch": 0.36163172630566787, + "grad_norm": 1324.8360595703125, + "learning_rate": 3.24207669994923e-05, + "loss": 87.4897, + "step": 89510 + }, + { + "epoch": 0.3616721275710355, + "grad_norm": 2711.906494140625, + "learning_rate": 3.2418578151269135e-05, + "loss": 122.1951, + "step": 89520 + }, + { + "epoch": 0.36171252883640315, + "grad_norm": 482.2725830078125, + "learning_rate": 3.24163890609394e-05, + "loss": 100.3463, + "step": 89530 + }, + { + "epoch": 0.3617529301017708, + "grad_norm": 863.1302490234375, + "learning_rate": 3.2414199728545767e-05, + "loss": 65.9106, + "step": 89540 + }, + { + "epoch": 0.3617933313671384, + "grad_norm": 504.1653137207031, + "learning_rate": 3.2412010154130914e-05, + "loss": 71.6231, + "step": 89550 + }, + { + "epoch": 0.36183373263250607, + "grad_norm": 1887.3756103515625, + "learning_rate": 3.240982033773754e-05, + "loss": 136.0895, + "step": 89560 + }, + { + "epoch": 0.3618741338978737, + "grad_norm": 312.2517395019531, + "learning_rate": 3.2407630279408326e-05, + "loss": 43.9679, + "step": 89570 + }, + { + "epoch": 0.3619145351632413, + "grad_norm": 987.0220947265625, + "learning_rate": 3.240543997918598e-05, + "loss": 74.6431, + "step": 89580 + }, + { + "epoch": 0.36195493642860893, + "grad_norm": 767.616455078125, + "learning_rate": 3.240324943711318e-05, + "loss": 82.1988, + "step": 89590 + }, + { + "epoch": 0.36199533769397657, + "grad_norm": 344.7796630859375, + "learning_rate": 3.240105865323266e-05, + "loss": 48.3087, + "step": 89600 + }, + { + "epoch": 0.3620357389593442, + "grad_norm": 637.0189208984375, + "learning_rate": 3.23988676275871e-05, + "loss": 131.0972, + "step": 89610 + }, + { + "epoch": 0.36207614022471185, + "grad_norm": 1073.5078125, + "learning_rate": 3.239667636021925e-05, + "loss": 97.9604, + "step": 89620 + }, + { + "epoch": 0.3621165414900795, + "grad_norm": 836.5335693359375, + "learning_rate": 3.239448485117181e-05, + "loss": 84.9496, + "step": 89630 + }, + { + "epoch": 0.3621569427554471, + "grad_norm": 420.5333557128906, + "learning_rate": 3.23922931004875e-05, + "loss": 128.2756, + "step": 89640 + }, + { + "epoch": 0.3621973440208147, + "grad_norm": 1284.200927734375, + "learning_rate": 3.239010110820906e-05, + "loss": 62.6318, + "step": 89650 + }, + { + "epoch": 0.36223774528618236, + "grad_norm": 652.5184326171875, + "learning_rate": 3.2387908874379213e-05, + "loss": 83.192, + "step": 89660 + }, + { + "epoch": 0.36227814655155, + "grad_norm": 1031.2569580078125, + "learning_rate": 3.2385716399040706e-05, + "loss": 78.374, + "step": 89670 + }, + { + "epoch": 0.36231854781691764, + "grad_norm": 1436.958251953125, + "learning_rate": 3.238352368223629e-05, + "loss": 94.9829, + "step": 89680 + }, + { + "epoch": 0.3623589490822853, + "grad_norm": 859.0652465820312, + "learning_rate": 3.23813307240087e-05, + "loss": 104.7603, + "step": 89690 + }, + { + "epoch": 0.3623993503476529, + "grad_norm": 1290.4884033203125, + "learning_rate": 3.23791375244007e-05, + "loss": 76.3908, + "step": 89700 + }, + { + "epoch": 0.3624397516130205, + "grad_norm": 715.0908203125, + "learning_rate": 3.237694408345503e-05, + "loss": 105.7416, + "step": 89710 + }, + { + "epoch": 0.36248015287838814, + "grad_norm": 542.5261840820312, + "learning_rate": 3.2374750401214466e-05, + "loss": 78.4241, + "step": 89720 + }, + { + "epoch": 0.3625205541437558, + "grad_norm": 577.8153076171875, + "learning_rate": 3.2372556477721766e-05, + "loss": 64.0406, + "step": 89730 + }, + { + "epoch": 0.3625609554091234, + "grad_norm": 1459.3253173828125, + "learning_rate": 3.2370362313019725e-05, + "loss": 98.0162, + "step": 89740 + }, + { + "epoch": 0.36260135667449106, + "grad_norm": 874.1311645507812, + "learning_rate": 3.2368167907151086e-05, + "loss": 70.1586, + "step": 89750 + }, + { + "epoch": 0.3626417579398587, + "grad_norm": 1122.0955810546875, + "learning_rate": 3.236597326015865e-05, + "loss": 91.195, + "step": 89760 + }, + { + "epoch": 0.3626821592052263, + "grad_norm": 916.176025390625, + "learning_rate": 3.23637783720852e-05, + "loss": 70.8858, + "step": 89770 + }, + { + "epoch": 0.3627225604705939, + "grad_norm": 818.7608642578125, + "learning_rate": 3.236158324297353e-05, + "loss": 82.0691, + "step": 89780 + }, + { + "epoch": 0.36276296173596156, + "grad_norm": 715.8935546875, + "learning_rate": 3.235938787286642e-05, + "loss": 67.6552, + "step": 89790 + }, + { + "epoch": 0.3628033630013292, + "grad_norm": 1078.6180419921875, + "learning_rate": 3.235719226180669e-05, + "loss": 84.8811, + "step": 89800 + }, + { + "epoch": 0.36284376426669684, + "grad_norm": 1044.2490234375, + "learning_rate": 3.2354996409837136e-05, + "loss": 103.4573, + "step": 89810 + }, + { + "epoch": 0.3628841655320645, + "grad_norm": 660.0457153320312, + "learning_rate": 3.2352800317000555e-05, + "loss": 76.4551, + "step": 89820 + }, + { + "epoch": 0.36292456679743207, + "grad_norm": 609.685791015625, + "learning_rate": 3.235060398333978e-05, + "loss": 68.8241, + "step": 89830 + }, + { + "epoch": 0.3629649680627997, + "grad_norm": 1783.923095703125, + "learning_rate": 3.234840740889762e-05, + "loss": 100.306, + "step": 89840 + }, + { + "epoch": 0.36300536932816735, + "grad_norm": 484.3624572753906, + "learning_rate": 3.23462105937169e-05, + "loss": 95.2711, + "step": 89850 + }, + { + "epoch": 0.363045770593535, + "grad_norm": 870.2490234375, + "learning_rate": 3.234401353784045e-05, + "loss": 91.9865, + "step": 89860 + }, + { + "epoch": 0.36308617185890263, + "grad_norm": 664.4890747070312, + "learning_rate": 3.23418162413111e-05, + "loss": 72.0798, + "step": 89870 + }, + { + "epoch": 0.36312657312427027, + "grad_norm": 825.5774536132812, + "learning_rate": 3.233961870417169e-05, + "loss": 117.6078, + "step": 89880 + }, + { + "epoch": 0.3631669743896379, + "grad_norm": 585.6904296875, + "learning_rate": 3.233742092646506e-05, + "loss": 52.681, + "step": 89890 + }, + { + "epoch": 0.3632073756550055, + "grad_norm": 601.2078247070312, + "learning_rate": 3.2335222908234054e-05, + "loss": 119.4539, + "step": 89900 + }, + { + "epoch": 0.36324777692037313, + "grad_norm": 969.7693481445312, + "learning_rate": 3.233302464952153e-05, + "loss": 99.7812, + "step": 89910 + }, + { + "epoch": 0.3632881781857408, + "grad_norm": 447.8693542480469, + "learning_rate": 3.233082615037034e-05, + "loss": 90.6206, + "step": 89920 + }, + { + "epoch": 0.3633285794511084, + "grad_norm": 592.6596069335938, + "learning_rate": 3.232862741082335e-05, + "loss": 65.5405, + "step": 89930 + }, + { + "epoch": 0.36336898071647605, + "grad_norm": 1372.2392578125, + "learning_rate": 3.232642843092341e-05, + "loss": 59.6827, + "step": 89940 + }, + { + "epoch": 0.3634093819818437, + "grad_norm": 718.0592041015625, + "learning_rate": 3.232422921071341e-05, + "loss": 94.7248, + "step": 89950 + }, + { + "epoch": 0.3634497832472113, + "grad_norm": 532.1602783203125, + "learning_rate": 3.2322029750236224e-05, + "loss": 70.8681, + "step": 89960 + }, + { + "epoch": 0.3634901845125789, + "grad_norm": 989.7373046875, + "learning_rate": 3.2319830049534714e-05, + "loss": 78.4934, + "step": 89970 + }, + { + "epoch": 0.36353058577794656, + "grad_norm": 583.197509765625, + "learning_rate": 3.2317630108651775e-05, + "loss": 103.809, + "step": 89980 + }, + { + "epoch": 0.3635709870433142, + "grad_norm": 727.3381958007812, + "learning_rate": 3.231542992763029e-05, + "loss": 110.8814, + "step": 89990 + }, + { + "epoch": 0.36361138830868184, + "grad_norm": 1612.8070068359375, + "learning_rate": 3.2313229506513167e-05, + "loss": 105.7398, + "step": 90000 + }, + { + "epoch": 0.3636517895740495, + "grad_norm": 687.9180297851562, + "learning_rate": 3.231102884534329e-05, + "loss": 102.5131, + "step": 90010 + }, + { + "epoch": 0.3636921908394171, + "grad_norm": 989.65380859375, + "learning_rate": 3.2308827944163576e-05, + "loss": 106.3476, + "step": 90020 + }, + { + "epoch": 0.3637325921047847, + "grad_norm": 685.1614379882812, + "learning_rate": 3.230662680301692e-05, + "loss": 150.4364, + "step": 90030 + }, + { + "epoch": 0.36377299337015234, + "grad_norm": 797.5096435546875, + "learning_rate": 3.2304425421946234e-05, + "loss": 80.4998, + "step": 90040 + }, + { + "epoch": 0.36381339463552, + "grad_norm": 1322.1912841796875, + "learning_rate": 3.230222380099445e-05, + "loss": 98.4494, + "step": 90050 + }, + { + "epoch": 0.3638537959008876, + "grad_norm": 1370.9744873046875, + "learning_rate": 3.230002194020447e-05, + "loss": 104.4646, + "step": 90060 + }, + { + "epoch": 0.36389419716625526, + "grad_norm": 586.5908813476562, + "learning_rate": 3.229781983961923e-05, + "loss": 79.4248, + "step": 90070 + }, + { + "epoch": 0.3639345984316229, + "grad_norm": 780.2183837890625, + "learning_rate": 3.229561749928166e-05, + "loss": 82.4254, + "step": 90080 + }, + { + "epoch": 0.3639749996969905, + "grad_norm": 593.0147705078125, + "learning_rate": 3.22934149192347e-05, + "loss": 76.4439, + "step": 90090 + }, + { + "epoch": 0.3640154009623581, + "grad_norm": 1012.6089477539062, + "learning_rate": 3.229121209952129e-05, + "loss": 78.1409, + "step": 90100 + }, + { + "epoch": 0.36405580222772577, + "grad_norm": 673.1834106445312, + "learning_rate": 3.2289009040184375e-05, + "loss": 93.0144, + "step": 90110 + }, + { + "epoch": 0.3640962034930934, + "grad_norm": 486.15423583984375, + "learning_rate": 3.2286805741266895e-05, + "loss": 150.6456, + "step": 90120 + }, + { + "epoch": 0.36413660475846105, + "grad_norm": 800.614013671875, + "learning_rate": 3.228460220281181e-05, + "loss": 103.6271, + "step": 90130 + }, + { + "epoch": 0.3641770060238287, + "grad_norm": 467.4158935546875, + "learning_rate": 3.2282398424862086e-05, + "loss": 103.0582, + "step": 90140 + }, + { + "epoch": 0.36421740728919627, + "grad_norm": 846.1546630859375, + "learning_rate": 3.228019440746068e-05, + "loss": 109.7466, + "step": 90150 + }, + { + "epoch": 0.3642578085545639, + "grad_norm": 365.59503173828125, + "learning_rate": 3.2277990150650554e-05, + "loss": 74.4027, + "step": 90160 + }, + { + "epoch": 0.36429820981993155, + "grad_norm": 870.727783203125, + "learning_rate": 3.22757856544747e-05, + "loss": 84.0158, + "step": 90170 + }, + { + "epoch": 0.3643386110852992, + "grad_norm": 950.7010498046875, + "learning_rate": 3.227358091897608e-05, + "loss": 109.9572, + "step": 90180 + }, + { + "epoch": 0.36437901235066683, + "grad_norm": 757.5420532226562, + "learning_rate": 3.227137594419768e-05, + "loss": 68.2657, + "step": 90190 + }, + { + "epoch": 0.36441941361603447, + "grad_norm": 814.689697265625, + "learning_rate": 3.2269170730182486e-05, + "loss": 84.939, + "step": 90200 + }, + { + "epoch": 0.3644598148814021, + "grad_norm": 1111.14306640625, + "learning_rate": 3.22669652769735e-05, + "loss": 51.8974, + "step": 90210 + }, + { + "epoch": 0.3645002161467697, + "grad_norm": 845.1309204101562, + "learning_rate": 3.22647595846137e-05, + "loss": 96.9501, + "step": 90220 + }, + { + "epoch": 0.36454061741213734, + "grad_norm": 607.8756103515625, + "learning_rate": 3.2262553653146106e-05, + "loss": 57.5571, + "step": 90230 + }, + { + "epoch": 0.364581018677505, + "grad_norm": 698.8919677734375, + "learning_rate": 3.2260347482613714e-05, + "loss": 77.4011, + "step": 90240 + }, + { + "epoch": 0.3646214199428726, + "grad_norm": 918.161376953125, + "learning_rate": 3.2258141073059533e-05, + "loss": 111.1229, + "step": 90250 + }, + { + "epoch": 0.36466182120824026, + "grad_norm": 485.7393798828125, + "learning_rate": 3.225593442452658e-05, + "loss": 93.1469, + "step": 90260 + }, + { + "epoch": 0.3647022224736079, + "grad_norm": 767.5725708007812, + "learning_rate": 3.225372753705788e-05, + "loss": 103.719, + "step": 90270 + }, + { + "epoch": 0.3647426237389755, + "grad_norm": 962.8639526367188, + "learning_rate": 3.225152041069645e-05, + "loss": 117.0298, + "step": 90280 + }, + { + "epoch": 0.3647830250043431, + "grad_norm": 594.6404418945312, + "learning_rate": 3.224931304548532e-05, + "loss": 88.5563, + "step": 90290 + }, + { + "epoch": 0.36482342626971076, + "grad_norm": 1656.471923828125, + "learning_rate": 3.224710544146753e-05, + "loss": 97.5573, + "step": 90300 + }, + { + "epoch": 0.3648638275350784, + "grad_norm": 682.7074584960938, + "learning_rate": 3.224489759868612e-05, + "loss": 64.9782, + "step": 90310 + }, + { + "epoch": 0.36490422880044604, + "grad_norm": 1259.0740966796875, + "learning_rate": 3.224268951718411e-05, + "loss": 107.5049, + "step": 90320 + }, + { + "epoch": 0.3649446300658137, + "grad_norm": 2308.173828125, + "learning_rate": 3.224048119700458e-05, + "loss": 129.5445, + "step": 90330 + }, + { + "epoch": 0.3649850313311813, + "grad_norm": 1461.1622314453125, + "learning_rate": 3.223827263819056e-05, + "loss": 129.2254, + "step": 90340 + }, + { + "epoch": 0.3650254325965489, + "grad_norm": 1078.935791015625, + "learning_rate": 3.223606384078512e-05, + "loss": 113.4982, + "step": 90350 + }, + { + "epoch": 0.36506583386191654, + "grad_norm": 736.0798950195312, + "learning_rate": 3.223385480483131e-05, + "loss": 121.4105, + "step": 90360 + }, + { + "epoch": 0.3651062351272842, + "grad_norm": 433.25640869140625, + "learning_rate": 3.223164553037221e-05, + "loss": 66.2823, + "step": 90370 + }, + { + "epoch": 0.3651466363926518, + "grad_norm": 1381.4921875, + "learning_rate": 3.2229436017450876e-05, + "loss": 79.2064, + "step": 90380 + }, + { + "epoch": 0.36518703765801946, + "grad_norm": 2187.48291015625, + "learning_rate": 3.222722626611039e-05, + "loss": 119.9125, + "step": 90390 + }, + { + "epoch": 0.3652274389233871, + "grad_norm": 2059.15576171875, + "learning_rate": 3.222501627639384e-05, + "loss": 99.8287, + "step": 90400 + }, + { + "epoch": 0.3652678401887547, + "grad_norm": 674.8538208007812, + "learning_rate": 3.222280604834429e-05, + "loss": 75.4639, + "step": 90410 + }, + { + "epoch": 0.36530824145412233, + "grad_norm": 614.7540283203125, + "learning_rate": 3.222059558200486e-05, + "loss": 141.176, + "step": 90420 + }, + { + "epoch": 0.36534864271948997, + "grad_norm": 965.9248657226562, + "learning_rate": 3.221838487741862e-05, + "loss": 86.3737, + "step": 90430 + }, + { + "epoch": 0.3653890439848576, + "grad_norm": 856.3554077148438, + "learning_rate": 3.2216173934628674e-05, + "loss": 73.9712, + "step": 90440 + }, + { + "epoch": 0.36542944525022525, + "grad_norm": 439.26202392578125, + "learning_rate": 3.221396275367813e-05, + "loss": 98.9497, + "step": 90450 + }, + { + "epoch": 0.3654698465155929, + "grad_norm": 556.65380859375, + "learning_rate": 3.2211751334610094e-05, + "loss": 91.9757, + "step": 90460 + }, + { + "epoch": 0.3655102477809605, + "grad_norm": 463.5917663574219, + "learning_rate": 3.220953967746768e-05, + "loss": 99.9918, + "step": 90470 + }, + { + "epoch": 0.3655506490463281, + "grad_norm": 467.8904113769531, + "learning_rate": 3.2207327782294e-05, + "loss": 118.0824, + "step": 90480 + }, + { + "epoch": 0.36559105031169575, + "grad_norm": 451.4460754394531, + "learning_rate": 3.2205115649132185e-05, + "loss": 88.5353, + "step": 90490 + }, + { + "epoch": 0.3656314515770634, + "grad_norm": 1093.22900390625, + "learning_rate": 3.220290327802536e-05, + "loss": 85.3289, + "step": 90500 + }, + { + "epoch": 0.36567185284243103, + "grad_norm": 394.1527404785156, + "learning_rate": 3.2200690669016645e-05, + "loss": 119.818, + "step": 90510 + }, + { + "epoch": 0.3657122541077987, + "grad_norm": 433.1138916015625, + "learning_rate": 3.219847782214918e-05, + "loss": 66.5584, + "step": 90520 + }, + { + "epoch": 0.3657526553731663, + "grad_norm": 425.083740234375, + "learning_rate": 3.219626473746613e-05, + "loss": 95.7318, + "step": 90530 + }, + { + "epoch": 0.3657930566385339, + "grad_norm": 525.1397094726562, + "learning_rate": 3.21940514150106e-05, + "loss": 70.0633, + "step": 90540 + }, + { + "epoch": 0.36583345790390154, + "grad_norm": 1292.973388671875, + "learning_rate": 3.2191837854825766e-05, + "loss": 96.959, + "step": 90550 + }, + { + "epoch": 0.3658738591692692, + "grad_norm": 934.8228149414062, + "learning_rate": 3.218962405695478e-05, + "loss": 84.6857, + "step": 90560 + }, + { + "epoch": 0.3659142604346368, + "grad_norm": 396.1979064941406, + "learning_rate": 3.2187410021440786e-05, + "loss": 78.1695, + "step": 90570 + }, + { + "epoch": 0.36595466170000446, + "grad_norm": 924.4595947265625, + "learning_rate": 3.218519574832697e-05, + "loss": 103.5428, + "step": 90580 + }, + { + "epoch": 0.3659950629653721, + "grad_norm": 1243.9974365234375, + "learning_rate": 3.218298123765649e-05, + "loss": 97.395, + "step": 90590 + }, + { + "epoch": 0.3660354642307397, + "grad_norm": 1798.9185791015625, + "learning_rate": 3.218076648947251e-05, + "loss": 65.4942, + "step": 90600 + }, + { + "epoch": 0.3660758654961073, + "grad_norm": 665.0444946289062, + "learning_rate": 3.217855150381822e-05, + "loss": 87.5551, + "step": 90610 + }, + { + "epoch": 0.36611626676147496, + "grad_norm": 988.7350463867188, + "learning_rate": 3.217633628073681e-05, + "loss": 87.0365, + "step": 90620 + }, + { + "epoch": 0.3661566680268426, + "grad_norm": 900.7857055664062, + "learning_rate": 3.217412082027144e-05, + "loss": 86.6207, + "step": 90630 + }, + { + "epoch": 0.36619706929221024, + "grad_norm": 989.8163452148438, + "learning_rate": 3.217190512246532e-05, + "loss": 116.068, + "step": 90640 + }, + { + "epoch": 0.3662374705575779, + "grad_norm": 754.2236328125, + "learning_rate": 3.216968918736164e-05, + "loss": 103.5243, + "step": 90650 + }, + { + "epoch": 0.3662778718229455, + "grad_norm": 463.61431884765625, + "learning_rate": 3.2167473015003616e-05, + "loss": 55.2255, + "step": 90660 + }, + { + "epoch": 0.3663182730883131, + "grad_norm": 688.5117797851562, + "learning_rate": 3.216525660543444e-05, + "loss": 80.8866, + "step": 90670 + }, + { + "epoch": 0.36635867435368075, + "grad_norm": 352.57513427734375, + "learning_rate": 3.216303995869731e-05, + "loss": 115.0387, + "step": 90680 + }, + { + "epoch": 0.3663990756190484, + "grad_norm": 459.9197692871094, + "learning_rate": 3.2160823074835464e-05, + "loss": 92.0809, + "step": 90690 + }, + { + "epoch": 0.366439476884416, + "grad_norm": 781.8767700195312, + "learning_rate": 3.215860595389211e-05, + "loss": 89.969, + "step": 90700 + }, + { + "epoch": 0.36647987814978367, + "grad_norm": 417.2863464355469, + "learning_rate": 3.215638859591048e-05, + "loss": 123.7721, + "step": 90710 + }, + { + "epoch": 0.3665202794151513, + "grad_norm": 650.0309448242188, + "learning_rate": 3.215417100093378e-05, + "loss": 94.3897, + "step": 90720 + }, + { + "epoch": 0.3665606806805189, + "grad_norm": 868.7900390625, + "learning_rate": 3.215195316900527e-05, + "loss": 104.5579, + "step": 90730 + }, + { + "epoch": 0.36660108194588653, + "grad_norm": 750.8766479492188, + "learning_rate": 3.2149735100168176e-05, + "loss": 75.294, + "step": 90740 + }, + { + "epoch": 0.36664148321125417, + "grad_norm": 403.7306213378906, + "learning_rate": 3.214751679446574e-05, + "loss": 107.0253, + "step": 90750 + }, + { + "epoch": 0.3666818844766218, + "grad_norm": 1695.947021484375, + "learning_rate": 3.214529825194121e-05, + "loss": 125.2526, + "step": 90760 + }, + { + "epoch": 0.36672228574198945, + "grad_norm": 756.2650146484375, + "learning_rate": 3.214307947263783e-05, + "loss": 78.2523, + "step": 90770 + }, + { + "epoch": 0.3667626870073571, + "grad_norm": 655.8005981445312, + "learning_rate": 3.2140860456598877e-05, + "loss": 64.7341, + "step": 90780 + }, + { + "epoch": 0.3668030882727247, + "grad_norm": 2416.950439453125, + "learning_rate": 3.213864120386759e-05, + "loss": 77.318, + "step": 90790 + }, + { + "epoch": 0.3668434895380923, + "grad_norm": 518.7205200195312, + "learning_rate": 3.213642171448725e-05, + "loss": 78.753, + "step": 90800 + }, + { + "epoch": 0.36688389080345996, + "grad_norm": 737.206298828125, + "learning_rate": 3.213420198850111e-05, + "loss": 72.0301, + "step": 90810 + }, + { + "epoch": 0.3669242920688276, + "grad_norm": 873.0435791015625, + "learning_rate": 3.213198202595247e-05, + "loss": 75.7544, + "step": 90820 + }, + { + "epoch": 0.36696469333419524, + "grad_norm": 967.7666015625, + "learning_rate": 3.212976182688458e-05, + "loss": 71.267, + "step": 90830 + }, + { + "epoch": 0.3670050945995629, + "grad_norm": 817.9600219726562, + "learning_rate": 3.212754139134075e-05, + "loss": 93.7848, + "step": 90840 + }, + { + "epoch": 0.3670454958649305, + "grad_norm": 727.7638549804688, + "learning_rate": 3.212532071936425e-05, + "loss": 101.0185, + "step": 90850 + }, + { + "epoch": 0.3670858971302981, + "grad_norm": 516.7991943359375, + "learning_rate": 3.2123099810998385e-05, + "loss": 86.6833, + "step": 90860 + }, + { + "epoch": 0.36712629839566574, + "grad_norm": 467.3835144042969, + "learning_rate": 3.212087866628644e-05, + "loss": 94.0811, + "step": 90870 + }, + { + "epoch": 0.3671666996610334, + "grad_norm": 907.18798828125, + "learning_rate": 3.211865728527173e-05, + "loss": 80.5591, + "step": 90880 + }, + { + "epoch": 0.367207100926401, + "grad_norm": 420.8382873535156, + "learning_rate": 3.211643566799756e-05, + "loss": 62.1145, + "step": 90890 + }, + { + "epoch": 0.36724750219176866, + "grad_norm": 889.0955200195312, + "learning_rate": 3.2114213814507235e-05, + "loss": 44.5033, + "step": 90900 + }, + { + "epoch": 0.3672879034571363, + "grad_norm": 831.3045654296875, + "learning_rate": 3.211199172484407e-05, + "loss": 89.691, + "step": 90910 + }, + { + "epoch": 0.3673283047225039, + "grad_norm": 1132.6920166015625, + "learning_rate": 3.21097693990514e-05, + "loss": 70.107, + "step": 90920 + }, + { + "epoch": 0.3673687059878715, + "grad_norm": 1008.6563720703125, + "learning_rate": 3.210754683717253e-05, + "loss": 94.5218, + "step": 90930 + }, + { + "epoch": 0.36740910725323916, + "grad_norm": 1470.785888671875, + "learning_rate": 3.2105324039250814e-05, + "loss": 110.0201, + "step": 90940 + }, + { + "epoch": 0.3674495085186068, + "grad_norm": 914.1454467773438, + "learning_rate": 3.210310100532956e-05, + "loss": 95.8609, + "step": 90950 + }, + { + "epoch": 0.36748990978397444, + "grad_norm": 533.9212036132812, + "learning_rate": 3.210087773545214e-05, + "loss": 61.9411, + "step": 90960 + }, + { + "epoch": 0.3675303110493421, + "grad_norm": 525.5610961914062, + "learning_rate": 3.209865422966186e-05, + "loss": 79.3202, + "step": 90970 + }, + { + "epoch": 0.3675707123147097, + "grad_norm": 773.8341674804688, + "learning_rate": 3.20964304880021e-05, + "loss": 74.2218, + "step": 90980 + }, + { + "epoch": 0.3676111135800773, + "grad_norm": 717.37890625, + "learning_rate": 3.209420651051619e-05, + "loss": 132.3644, + "step": 90990 + }, + { + "epoch": 0.36765151484544495, + "grad_norm": 428.2706604003906, + "learning_rate": 3.2091982297247505e-05, + "loss": 66.7343, + "step": 91000 + }, + { + "epoch": 0.3676919161108126, + "grad_norm": 863.875732421875, + "learning_rate": 3.2089757848239395e-05, + "loss": 114.6113, + "step": 91010 + }, + { + "epoch": 0.36773231737618023, + "grad_norm": 796.22021484375, + "learning_rate": 3.208753316353523e-05, + "loss": 124.125, + "step": 91020 + }, + { + "epoch": 0.36777271864154787, + "grad_norm": 410.2618713378906, + "learning_rate": 3.2085308243178386e-05, + "loss": 78.5684, + "step": 91030 + }, + { + "epoch": 0.3678131199069155, + "grad_norm": 390.0378112792969, + "learning_rate": 3.208308308721224e-05, + "loss": 126.5721, + "step": 91040 + }, + { + "epoch": 0.3678535211722831, + "grad_norm": 585.85595703125, + "learning_rate": 3.2080857695680156e-05, + "loss": 125.1761, + "step": 91050 + }, + { + "epoch": 0.36789392243765073, + "grad_norm": 744.8727416992188, + "learning_rate": 3.2078632068625534e-05, + "loss": 114.8646, + "step": 91060 + }, + { + "epoch": 0.3679343237030184, + "grad_norm": 394.9700927734375, + "learning_rate": 3.207640620609177e-05, + "loss": 68.6612, + "step": 91070 + }, + { + "epoch": 0.367974724968386, + "grad_norm": 540.9749145507812, + "learning_rate": 3.2074180108122255e-05, + "loss": 77.7369, + "step": 91080 + }, + { + "epoch": 0.36801512623375365, + "grad_norm": 664.342041015625, + "learning_rate": 3.207195377476037e-05, + "loss": 99.0511, + "step": 91090 + }, + { + "epoch": 0.3680555274991213, + "grad_norm": 623.8460083007812, + "learning_rate": 3.2069727206049536e-05, + "loss": 60.7185, + "step": 91100 + }, + { + "epoch": 0.3680959287644889, + "grad_norm": 770.0367431640625, + "learning_rate": 3.2067500402033154e-05, + "loss": 82.6185, + "step": 91110 + }, + { + "epoch": 0.3681363300298565, + "grad_norm": 388.62127685546875, + "learning_rate": 3.206527336275464e-05, + "loss": 119.7676, + "step": 91120 + }, + { + "epoch": 0.36817673129522416, + "grad_norm": 936.1165771484375, + "learning_rate": 3.206304608825741e-05, + "loss": 86.7788, + "step": 91130 + }, + { + "epoch": 0.3682171325605918, + "grad_norm": 674.9578247070312, + "learning_rate": 3.206081857858489e-05, + "loss": 89.1166, + "step": 91140 + }, + { + "epoch": 0.36825753382595944, + "grad_norm": 1009.33447265625, + "learning_rate": 3.20585908337805e-05, + "loss": 59.8594, + "step": 91150 + }, + { + "epoch": 0.3682979350913271, + "grad_norm": 473.0268249511719, + "learning_rate": 3.205636285388767e-05, + "loss": 71.7486, + "step": 91160 + }, + { + "epoch": 0.3683383363566947, + "grad_norm": 324.4518737792969, + "learning_rate": 3.205413463894984e-05, + "loss": 119.2562, + "step": 91170 + }, + { + "epoch": 0.3683787376220623, + "grad_norm": 599.016845703125, + "learning_rate": 3.2051906189010456e-05, + "loss": 102.7651, + "step": 91180 + }, + { + "epoch": 0.36841913888742994, + "grad_norm": 633.0952758789062, + "learning_rate": 3.204967750411295e-05, + "loss": 51.0471, + "step": 91190 + }, + { + "epoch": 0.3684595401527976, + "grad_norm": 922.2232666015625, + "learning_rate": 3.2047448584300775e-05, + "loss": 67.7489, + "step": 91200 + }, + { + "epoch": 0.3684999414181652, + "grad_norm": 572.1436157226562, + "learning_rate": 3.204521942961739e-05, + "loss": 65.7194, + "step": 91210 + }, + { + "epoch": 0.36854034268353286, + "grad_norm": 889.4647216796875, + "learning_rate": 3.2042990040106255e-05, + "loss": 120.3005, + "step": 91220 + }, + { + "epoch": 0.3685807439489005, + "grad_norm": 1936.33203125, + "learning_rate": 3.204076041581082e-05, + "loss": 126.1136, + "step": 91230 + }, + { + "epoch": 0.3686211452142681, + "grad_norm": 829.4356689453125, + "learning_rate": 3.2038530556774574e-05, + "loss": 94.5599, + "step": 91240 + }, + { + "epoch": 0.3686615464796357, + "grad_norm": 725.9364013671875, + "learning_rate": 3.203630046304097e-05, + "loss": 119.5168, + "step": 91250 + }, + { + "epoch": 0.36870194774500337, + "grad_norm": 275.42529296875, + "learning_rate": 3.203407013465349e-05, + "loss": 81.9102, + "step": 91260 + }, + { + "epoch": 0.368742349010371, + "grad_norm": 666.9178466796875, + "learning_rate": 3.2031839571655625e-05, + "loss": 109.208, + "step": 91270 + }, + { + "epoch": 0.36878275027573865, + "grad_norm": 688.0106201171875, + "learning_rate": 3.2029608774090845e-05, + "loss": 94.8495, + "step": 91280 + }, + { + "epoch": 0.3688231515411063, + "grad_norm": 950.3851318359375, + "learning_rate": 3.202737774200265e-05, + "loss": 94.6971, + "step": 91290 + }, + { + "epoch": 0.36886355280647387, + "grad_norm": 849.0550537109375, + "learning_rate": 3.202514647543454e-05, + "loss": 69.2847, + "step": 91300 + }, + { + "epoch": 0.3689039540718415, + "grad_norm": 581.130126953125, + "learning_rate": 3.2022914974429995e-05, + "loss": 87.3503, + "step": 91310 + }, + { + "epoch": 0.36894435533720915, + "grad_norm": 819.6801147460938, + "learning_rate": 3.2020683239032545e-05, + "loss": 73.0898, + "step": 91320 + }, + { + "epoch": 0.3689847566025768, + "grad_norm": 1002.6309814453125, + "learning_rate": 3.2018451269285677e-05, + "loss": 98.4634, + "step": 91330 + }, + { + "epoch": 0.36902515786794443, + "grad_norm": 391.92791748046875, + "learning_rate": 3.201621906523293e-05, + "loss": 68.445, + "step": 91340 + }, + { + "epoch": 0.36906555913331207, + "grad_norm": 1584.977783203125, + "learning_rate": 3.201398662691779e-05, + "loss": 127.3985, + "step": 91350 + }, + { + "epoch": 0.3691059603986797, + "grad_norm": 575.3108520507812, + "learning_rate": 3.2011753954383805e-05, + "loss": 99.502, + "step": 91360 + }, + { + "epoch": 0.3691463616640473, + "grad_norm": 695.5021362304688, + "learning_rate": 3.200952104767448e-05, + "loss": 62.7909, + "step": 91370 + }, + { + "epoch": 0.36918676292941494, + "grad_norm": 452.2756652832031, + "learning_rate": 3.200728790683338e-05, + "loss": 105.3119, + "step": 91380 + }, + { + "epoch": 0.3692271641947826, + "grad_norm": 566.0073852539062, + "learning_rate": 3.2005054531904006e-05, + "loss": 108.538, + "step": 91390 + }, + { + "epoch": 0.3692675654601502, + "grad_norm": 649.7351684570312, + "learning_rate": 3.200282092292991e-05, + "loss": 82.696, + "step": 91400 + }, + { + "epoch": 0.36930796672551786, + "grad_norm": 1896.89697265625, + "learning_rate": 3.200058707995465e-05, + "loss": 78.5928, + "step": 91410 + }, + { + "epoch": 0.3693483679908855, + "grad_norm": 923.6322631835938, + "learning_rate": 3.1998353003021766e-05, + "loss": 54.3606, + "step": 91420 + }, + { + "epoch": 0.3693887692562531, + "grad_norm": 979.223388671875, + "learning_rate": 3.199611869217481e-05, + "loss": 99.7926, + "step": 91430 + }, + { + "epoch": 0.3694291705216207, + "grad_norm": 658.6464233398438, + "learning_rate": 3.1993884147457345e-05, + "loss": 80.6549, + "step": 91440 + }, + { + "epoch": 0.36946957178698836, + "grad_norm": 430.50927734375, + "learning_rate": 3.199164936891293e-05, + "loss": 98.1813, + "step": 91450 + }, + { + "epoch": 0.369509973052356, + "grad_norm": 445.77679443359375, + "learning_rate": 3.198941435658514e-05, + "loss": 71.3693, + "step": 91460 + }, + { + "epoch": 0.36955037431772364, + "grad_norm": 874.7481689453125, + "learning_rate": 3.1987179110517546e-05, + "loss": 84.7144, + "step": 91470 + }, + { + "epoch": 0.3695907755830913, + "grad_norm": 539.3798217773438, + "learning_rate": 3.198494363075372e-05, + "loss": 97.3363, + "step": 91480 + }, + { + "epoch": 0.3696311768484589, + "grad_norm": 994.9490356445312, + "learning_rate": 3.198270791733725e-05, + "loss": 92.0692, + "step": 91490 + }, + { + "epoch": 0.3696715781138265, + "grad_norm": 660.1752319335938, + "learning_rate": 3.198047197031172e-05, + "loss": 84.7387, + "step": 91500 + }, + { + "epoch": 0.36971197937919414, + "grad_norm": 1072.267822265625, + "learning_rate": 3.197823578972072e-05, + "loss": 112.792, + "step": 91510 + }, + { + "epoch": 0.3697523806445618, + "grad_norm": 390.4635314941406, + "learning_rate": 3.1975999375607854e-05, + "loss": 88.5618, + "step": 91520 + }, + { + "epoch": 0.3697927819099294, + "grad_norm": 841.2089233398438, + "learning_rate": 3.1973762728016705e-05, + "loss": 118.9467, + "step": 91530 + }, + { + "epoch": 0.36983318317529706, + "grad_norm": 1226.7083740234375, + "learning_rate": 3.1971525846990886e-05, + "loss": 131.15, + "step": 91540 + }, + { + "epoch": 0.3698735844406647, + "grad_norm": 719.4632568359375, + "learning_rate": 3.1969288732574015e-05, + "loss": 86.1784, + "step": 91550 + }, + { + "epoch": 0.3699139857060323, + "grad_norm": 519.4171142578125, + "learning_rate": 3.196705138480969e-05, + "loss": 63.3932, + "step": 91560 + }, + { + "epoch": 0.36995438697139993, + "grad_norm": 1111.3648681640625, + "learning_rate": 3.196481380374154e-05, + "loss": 93.1909, + "step": 91570 + }, + { + "epoch": 0.36999478823676757, + "grad_norm": 558.6243286132812, + "learning_rate": 3.1962575989413185e-05, + "loss": 96.017, + "step": 91580 + }, + { + "epoch": 0.3700351895021352, + "grad_norm": 807.3783569335938, + "learning_rate": 3.196033794186826e-05, + "loss": 68.0391, + "step": 91590 + }, + { + "epoch": 0.37007559076750285, + "grad_norm": 914.8203125, + "learning_rate": 3.195809966115038e-05, + "loss": 72.0608, + "step": 91600 + }, + { + "epoch": 0.3701159920328705, + "grad_norm": 1581.810302734375, + "learning_rate": 3.1955861147303194e-05, + "loss": 67.1842, + "step": 91610 + }, + { + "epoch": 0.3701563932982381, + "grad_norm": 535.8658447265625, + "learning_rate": 3.195362240037034e-05, + "loss": 68.8309, + "step": 91620 + }, + { + "epoch": 0.3701967945636057, + "grad_norm": 546.390380859375, + "learning_rate": 3.1951383420395456e-05, + "loss": 62.8079, + "step": 91630 + }, + { + "epoch": 0.37023719582897335, + "grad_norm": 894.2689819335938, + "learning_rate": 3.194914420742221e-05, + "loss": 92.6154, + "step": 91640 + }, + { + "epoch": 0.370277597094341, + "grad_norm": 661.2052001953125, + "learning_rate": 3.194690476149425e-05, + "loss": 52.3533, + "step": 91650 + }, + { + "epoch": 0.37031799835970863, + "grad_norm": 1220.2864990234375, + "learning_rate": 3.194466508265522e-05, + "loss": 97.7406, + "step": 91660 + }, + { + "epoch": 0.3703583996250763, + "grad_norm": 277.8544921875, + "learning_rate": 3.1942425170948795e-05, + "loss": 57.4309, + "step": 91670 + }, + { + "epoch": 0.3703988008904439, + "grad_norm": 793.091552734375, + "learning_rate": 3.194018502641864e-05, + "loss": 64.0643, + "step": 91680 + }, + { + "epoch": 0.3704392021558115, + "grad_norm": 1195.7659912109375, + "learning_rate": 3.193794464910844e-05, + "loss": 64.8676, + "step": 91690 + }, + { + "epoch": 0.37047960342117914, + "grad_norm": 1068.720703125, + "learning_rate": 3.193570403906186e-05, + "loss": 98.8643, + "step": 91700 + }, + { + "epoch": 0.3705200046865468, + "grad_norm": 383.7998352050781, + "learning_rate": 3.193346319632258e-05, + "loss": 83.7354, + "step": 91710 + }, + { + "epoch": 0.3705604059519144, + "grad_norm": 972.02685546875, + "learning_rate": 3.193122212093429e-05, + "loss": 92.8159, + "step": 91720 + }, + { + "epoch": 0.37060080721728206, + "grad_norm": 366.7422180175781, + "learning_rate": 3.192898081294069e-05, + "loss": 103.0159, + "step": 91730 + }, + { + "epoch": 0.3706412084826497, + "grad_norm": 592.0652465820312, + "learning_rate": 3.192673927238547e-05, + "loss": 90.3065, + "step": 91740 + }, + { + "epoch": 0.3706816097480173, + "grad_norm": 1388.6058349609375, + "learning_rate": 3.1924497499312317e-05, + "loss": 119.6214, + "step": 91750 + }, + { + "epoch": 0.3707220110133849, + "grad_norm": 580.85791015625, + "learning_rate": 3.1922255493764956e-05, + "loss": 74.59, + "step": 91760 + }, + { + "epoch": 0.37076241227875256, + "grad_norm": 927.8380126953125, + "learning_rate": 3.192001325578708e-05, + "loss": 74.9823, + "step": 91770 + }, + { + "epoch": 0.3708028135441202, + "grad_norm": 590.2960815429688, + "learning_rate": 3.1917770785422406e-05, + "loss": 59.7204, + "step": 91780 + }, + { + "epoch": 0.37084321480948784, + "grad_norm": 970.12255859375, + "learning_rate": 3.1915528082714664e-05, + "loss": 74.3535, + "step": 91790 + }, + { + "epoch": 0.3708836160748555, + "grad_norm": 503.59814453125, + "learning_rate": 3.191328514770757e-05, + "loss": 85.6103, + "step": 91800 + }, + { + "epoch": 0.3709240173402231, + "grad_norm": 721.799072265625, + "learning_rate": 3.1911041980444836e-05, + "loss": 128.503, + "step": 91810 + }, + { + "epoch": 0.3709644186055907, + "grad_norm": 494.068115234375, + "learning_rate": 3.190879858097021e-05, + "loss": 65.7471, + "step": 91820 + }, + { + "epoch": 0.37100481987095835, + "grad_norm": 421.3589172363281, + "learning_rate": 3.190655494932742e-05, + "loss": 136.22, + "step": 91830 + }, + { + "epoch": 0.371045221136326, + "grad_norm": 932.4896850585938, + "learning_rate": 3.190431108556022e-05, + "loss": 69.2311, + "step": 91840 + }, + { + "epoch": 0.3710856224016936, + "grad_norm": 1012.1212768554688, + "learning_rate": 3.190206698971235e-05, + "loss": 71.0231, + "step": 91850 + }, + { + "epoch": 0.37112602366706127, + "grad_norm": 1676.547119140625, + "learning_rate": 3.189982266182755e-05, + "loss": 76.3915, + "step": 91860 + }, + { + "epoch": 0.3711664249324289, + "grad_norm": 573.4489135742188, + "learning_rate": 3.189757810194958e-05, + "loss": 81.2634, + "step": 91870 + }, + { + "epoch": 0.3712068261977965, + "grad_norm": 1570.279541015625, + "learning_rate": 3.18953333101222e-05, + "loss": 87.9239, + "step": 91880 + }, + { + "epoch": 0.37124722746316413, + "grad_norm": 788.8910522460938, + "learning_rate": 3.189308828638917e-05, + "loss": 82.7087, + "step": 91890 + }, + { + "epoch": 0.37128762872853177, + "grad_norm": 884.41259765625, + "learning_rate": 3.189084303079427e-05, + "loss": 68.5413, + "step": 91900 + }, + { + "epoch": 0.3713280299938994, + "grad_norm": 784.4596557617188, + "learning_rate": 3.1888597543381255e-05, + "loss": 87.2221, + "step": 91910 + }, + { + "epoch": 0.37136843125926705, + "grad_norm": 1396.572998046875, + "learning_rate": 3.1886351824193916e-05, + "loss": 61.0245, + "step": 91920 + }, + { + "epoch": 0.3714088325246347, + "grad_norm": 1364.83642578125, + "learning_rate": 3.188410587327602e-05, + "loss": 78.3537, + "step": 91930 + }, + { + "epoch": 0.3714492337900023, + "grad_norm": 772.4072875976562, + "learning_rate": 3.188185969067137e-05, + "loss": 87.6932, + "step": 91940 + }, + { + "epoch": 0.3714896350553699, + "grad_norm": 914.11669921875, + "learning_rate": 3.187961327642374e-05, + "loss": 80.6635, + "step": 91950 + }, + { + "epoch": 0.37153003632073756, + "grad_norm": 533.16845703125, + "learning_rate": 3.1877366630576945e-05, + "loss": 70.0793, + "step": 91960 + }, + { + "epoch": 0.3715704375861052, + "grad_norm": 660.4202270507812, + "learning_rate": 3.1875119753174766e-05, + "loss": 112.9557, + "step": 91970 + }, + { + "epoch": 0.37161083885147284, + "grad_norm": 1112.6015625, + "learning_rate": 3.187287264426101e-05, + "loss": 123.1163, + "step": 91980 + }, + { + "epoch": 0.3716512401168405, + "grad_norm": 522.26904296875, + "learning_rate": 3.18706253038795e-05, + "loss": 53.0709, + "step": 91990 + }, + { + "epoch": 0.3716916413822081, + "grad_norm": 417.6781311035156, + "learning_rate": 3.1868377732074034e-05, + "loss": 68.6318, + "step": 92000 + }, + { + "epoch": 0.3717320426475757, + "grad_norm": 895.1248779296875, + "learning_rate": 3.1866129928888425e-05, + "loss": 89.5749, + "step": 92010 + }, + { + "epoch": 0.37177244391294334, + "grad_norm": 501.8919372558594, + "learning_rate": 3.186388189436652e-05, + "loss": 63.5448, + "step": 92020 + }, + { + "epoch": 0.371812845178311, + "grad_norm": 422.7189636230469, + "learning_rate": 3.186163362855212e-05, + "loss": 75.2911, + "step": 92030 + }, + { + "epoch": 0.3718532464436786, + "grad_norm": 539.2811889648438, + "learning_rate": 3.185938513148906e-05, + "loss": 81.1963, + "step": 92040 + }, + { + "epoch": 0.37189364770904626, + "grad_norm": 613.2704467773438, + "learning_rate": 3.185713640322119e-05, + "loss": 71.9722, + "step": 92050 + }, + { + "epoch": 0.3719340489744139, + "grad_norm": 816.633056640625, + "learning_rate": 3.185488744379234e-05, + "loss": 95.9454, + "step": 92060 + }, + { + "epoch": 0.3719744502397815, + "grad_norm": 1042.883544921875, + "learning_rate": 3.185263825324635e-05, + "loss": 90.1927, + "step": 92070 + }, + { + "epoch": 0.3720148515051491, + "grad_norm": 909.7013549804688, + "learning_rate": 3.185038883162708e-05, + "loss": 103.4661, + "step": 92080 + }, + { + "epoch": 0.37205525277051676, + "grad_norm": 943.1971435546875, + "learning_rate": 3.184813917897838e-05, + "loss": 76.2466, + "step": 92090 + }, + { + "epoch": 0.3720956540358844, + "grad_norm": 519.3920288085938, + "learning_rate": 3.18458892953441e-05, + "loss": 69.6098, + "step": 92100 + }, + { + "epoch": 0.37213605530125204, + "grad_norm": 520.001953125, + "learning_rate": 3.1843639180768115e-05, + "loss": 102.4923, + "step": 92110 + }, + { + "epoch": 0.3721764565666197, + "grad_norm": 441.6025390625, + "learning_rate": 3.184138883529429e-05, + "loss": 82.1018, + "step": 92120 + }, + { + "epoch": 0.3722168578319873, + "grad_norm": 816.9365234375, + "learning_rate": 3.183913825896649e-05, + "loss": 116.9387, + "step": 92130 + }, + { + "epoch": 0.3722572590973549, + "grad_norm": 737.5056762695312, + "learning_rate": 3.1836887451828595e-05, + "loss": 104.9913, + "step": 92140 + }, + { + "epoch": 0.37229766036272255, + "grad_norm": 404.3524169921875, + "learning_rate": 3.183463641392448e-05, + "loss": 97.4966, + "step": 92150 + }, + { + "epoch": 0.3723380616280902, + "grad_norm": 524.3243408203125, + "learning_rate": 3.183238514529804e-05, + "loss": 69.7313, + "step": 92160 + }, + { + "epoch": 0.37237846289345783, + "grad_norm": 928.3333740234375, + "learning_rate": 3.183013364599316e-05, + "loss": 64.6497, + "step": 92170 + }, + { + "epoch": 0.37241886415882547, + "grad_norm": 744.5205078125, + "learning_rate": 3.1827881916053734e-05, + "loss": 89.614, + "step": 92180 + }, + { + "epoch": 0.3724592654241931, + "grad_norm": 976.8173217773438, + "learning_rate": 3.182562995552366e-05, + "loss": 95.2544, + "step": 92190 + }, + { + "epoch": 0.3724996666895607, + "grad_norm": 271.5019226074219, + "learning_rate": 3.182337776444684e-05, + "loss": 84.3125, + "step": 92200 + }, + { + "epoch": 0.37254006795492833, + "grad_norm": 1192.80517578125, + "learning_rate": 3.182112534286719e-05, + "loss": 82.1861, + "step": 92210 + }, + { + "epoch": 0.372580469220296, + "grad_norm": 487.5242919921875, + "learning_rate": 3.181887269082861e-05, + "loss": 128.2184, + "step": 92220 + }, + { + "epoch": 0.3726208704856636, + "grad_norm": 853.7452392578125, + "learning_rate": 3.1816619808375016e-05, + "loss": 85.5512, + "step": 92230 + }, + { + "epoch": 0.37266127175103125, + "grad_norm": 726.2578125, + "learning_rate": 3.1814366695550346e-05, + "loss": 74.2288, + "step": 92240 + }, + { + "epoch": 0.3727016730163989, + "grad_norm": 1436.91162109375, + "learning_rate": 3.181211335239851e-05, + "loss": 78.0795, + "step": 92250 + }, + { + "epoch": 0.3727420742817665, + "grad_norm": 1487.58642578125, + "learning_rate": 3.1809859778963445e-05, + "loss": 74.0526, + "step": 92260 + }, + { + "epoch": 0.3727824755471341, + "grad_norm": 593.8880615234375, + "learning_rate": 3.180760597528908e-05, + "loss": 77.304, + "step": 92270 + }, + { + "epoch": 0.37282287681250176, + "grad_norm": 969.373779296875, + "learning_rate": 3.180535194141936e-05, + "loss": 114.6659, + "step": 92280 + }, + { + "epoch": 0.3728632780778694, + "grad_norm": 801.3983154296875, + "learning_rate": 3.180309767739823e-05, + "loss": 95.6099, + "step": 92290 + }, + { + "epoch": 0.37290367934323704, + "grad_norm": 1782.012939453125, + "learning_rate": 3.1800843183269625e-05, + "loss": 77.4656, + "step": 92300 + }, + { + "epoch": 0.3729440806086047, + "grad_norm": 690.3163452148438, + "learning_rate": 3.179858845907752e-05, + "loss": 94.3876, + "step": 92310 + }, + { + "epoch": 0.3729844818739723, + "grad_norm": 958.7941284179688, + "learning_rate": 3.179633350486584e-05, + "loss": 103.0407, + "step": 92320 + }, + { + "epoch": 0.3730248831393399, + "grad_norm": 567.6779174804688, + "learning_rate": 3.179407832067858e-05, + "loss": 105.6042, + "step": 92330 + }, + { + "epoch": 0.37306528440470754, + "grad_norm": 813.747802734375, + "learning_rate": 3.179182290655969e-05, + "loss": 104.8024, + "step": 92340 + }, + { + "epoch": 0.3731056856700752, + "grad_norm": 557.308837890625, + "learning_rate": 3.178956726255314e-05, + "loss": 74.1157, + "step": 92350 + }, + { + "epoch": 0.3731460869354428, + "grad_norm": 1661.5557861328125, + "learning_rate": 3.1787311388702894e-05, + "loss": 79.2154, + "step": 92360 + }, + { + "epoch": 0.37318648820081046, + "grad_norm": 1259.5732421875, + "learning_rate": 3.178505528505296e-05, + "loss": 83.6677, + "step": 92370 + }, + { + "epoch": 0.3732268894661781, + "grad_norm": 348.1986083984375, + "learning_rate": 3.17827989516473e-05, + "loss": 110.1771, + "step": 92380 + }, + { + "epoch": 0.3732672907315457, + "grad_norm": 836.5503540039062, + "learning_rate": 3.1780542388529906e-05, + "loss": 106.0598, + "step": 92390 + }, + { + "epoch": 0.3733076919969133, + "grad_norm": 729.2714233398438, + "learning_rate": 3.177828559574477e-05, + "loss": 76.8469, + "step": 92400 + }, + { + "epoch": 0.37334809326228097, + "grad_norm": 536.6849365234375, + "learning_rate": 3.1776028573335906e-05, + "loss": 80.1603, + "step": 92410 + }, + { + "epoch": 0.3733884945276486, + "grad_norm": 756.0802001953125, + "learning_rate": 3.177377132134729e-05, + "loss": 105.2451, + "step": 92420 + }, + { + "epoch": 0.37342889579301625, + "grad_norm": 616.0418090820312, + "learning_rate": 3.177151383982295e-05, + "loss": 82.54, + "step": 92430 + }, + { + "epoch": 0.3734692970583839, + "grad_norm": 652.7342529296875, + "learning_rate": 3.176925612880688e-05, + "loss": 74.7339, + "step": 92440 + }, + { + "epoch": 0.3735096983237515, + "grad_norm": 571.6209716796875, + "learning_rate": 3.176699818834311e-05, + "loss": 68.9911, + "step": 92450 + }, + { + "epoch": 0.3735500995891191, + "grad_norm": 428.6630554199219, + "learning_rate": 3.1764740018475646e-05, + "loss": 89.0961, + "step": 92460 + }, + { + "epoch": 0.37359050085448675, + "grad_norm": 327.80474853515625, + "learning_rate": 3.1762481619248516e-05, + "loss": 69.5158, + "step": 92470 + }, + { + "epoch": 0.3736309021198544, + "grad_norm": 619.4699096679688, + "learning_rate": 3.1760222990705756e-05, + "loss": 103.5509, + "step": 92480 + }, + { + "epoch": 0.37367130338522203, + "grad_norm": 772.5634765625, + "learning_rate": 3.175796413289139e-05, + "loss": 77.9768, + "step": 92490 + }, + { + "epoch": 0.37371170465058967, + "grad_norm": 687.5281372070312, + "learning_rate": 3.1755705045849465e-05, + "loss": 96.0569, + "step": 92500 + }, + { + "epoch": 0.3737521059159573, + "grad_norm": 1378.552734375, + "learning_rate": 3.175344572962402e-05, + "loss": 117.4802, + "step": 92510 + }, + { + "epoch": 0.3737925071813249, + "grad_norm": 969.1366577148438, + "learning_rate": 3.175118618425909e-05, + "loss": 89.7203, + "step": 92520 + }, + { + "epoch": 0.37383290844669254, + "grad_norm": 1095.555908203125, + "learning_rate": 3.174892640979875e-05, + "loss": 100.3307, + "step": 92530 + }, + { + "epoch": 0.3738733097120602, + "grad_norm": 527.6351928710938, + "learning_rate": 3.174666640628702e-05, + "loss": 126.7232, + "step": 92540 + }, + { + "epoch": 0.3739137109774278, + "grad_norm": 660.1981811523438, + "learning_rate": 3.174440617376799e-05, + "loss": 76.4957, + "step": 92550 + }, + { + "epoch": 0.37395411224279546, + "grad_norm": 375.7703552246094, + "learning_rate": 3.1742145712285725e-05, + "loss": 98.1806, + "step": 92560 + }, + { + "epoch": 0.3739945135081631, + "grad_norm": 802.1222534179688, + "learning_rate": 3.173988502188428e-05, + "loss": 121.9643, + "step": 92570 + }, + { + "epoch": 0.3740349147735307, + "grad_norm": 785.3814086914062, + "learning_rate": 3.173762410260772e-05, + "loss": 99.7961, + "step": 92580 + }, + { + "epoch": 0.3740753160388983, + "grad_norm": 473.09381103515625, + "learning_rate": 3.173536295450014e-05, + "loss": 66.078, + "step": 92590 + }, + { + "epoch": 0.37411571730426596, + "grad_norm": 422.4671630859375, + "learning_rate": 3.173310157760563e-05, + "loss": 79.2691, + "step": 92600 + }, + { + "epoch": 0.3741561185696336, + "grad_norm": 995.4271240234375, + "learning_rate": 3.173083997196825e-05, + "loss": 95.5314, + "step": 92610 + }, + { + "epoch": 0.37419651983500124, + "grad_norm": 826.6963500976562, + "learning_rate": 3.172857813763211e-05, + "loss": 42.2892, + "step": 92620 + }, + { + "epoch": 0.3742369211003689, + "grad_norm": 851.070556640625, + "learning_rate": 3.172631607464131e-05, + "loss": 104.7198, + "step": 92630 + }, + { + "epoch": 0.3742773223657365, + "grad_norm": 585.6162109375, + "learning_rate": 3.1724053783039935e-05, + "loss": 54.8319, + "step": 92640 + }, + { + "epoch": 0.3743177236311041, + "grad_norm": 221.10385131835938, + "learning_rate": 3.172179126287209e-05, + "loss": 80.4364, + "step": 92650 + }, + { + "epoch": 0.37435812489647174, + "grad_norm": 407.0604248046875, + "learning_rate": 3.1719528514181894e-05, + "loss": 91.6954, + "step": 92660 + }, + { + "epoch": 0.3743985261618394, + "grad_norm": 461.81878662109375, + "learning_rate": 3.1717265537013454e-05, + "loss": 95.4252, + "step": 92670 + }, + { + "epoch": 0.374438927427207, + "grad_norm": 954.3992309570312, + "learning_rate": 3.1715002331410886e-05, + "loss": 72.2519, + "step": 92680 + }, + { + "epoch": 0.37447932869257466, + "grad_norm": 862.631591796875, + "learning_rate": 3.1712738897418325e-05, + "loss": 46.1957, + "step": 92690 + }, + { + "epoch": 0.3745197299579423, + "grad_norm": 525.878662109375, + "learning_rate": 3.1710475235079885e-05, + "loss": 59.447, + "step": 92700 + }, + { + "epoch": 0.3745601312233099, + "grad_norm": 906.461669921875, + "learning_rate": 3.17082113444397e-05, + "loss": 96.8806, + "step": 92710 + }, + { + "epoch": 0.37460053248867753, + "grad_norm": 1010.6650390625, + "learning_rate": 3.1705947225541915e-05, + "loss": 86.1426, + "step": 92720 + }, + { + "epoch": 0.37464093375404517, + "grad_norm": 342.9067687988281, + "learning_rate": 3.170368287843065e-05, + "loss": 84.3589, + "step": 92730 + }, + { + "epoch": 0.3746813350194128, + "grad_norm": 635.914794921875, + "learning_rate": 3.1701418303150067e-05, + "loss": 93.9852, + "step": 92740 + }, + { + "epoch": 0.37472173628478045, + "grad_norm": 758.6233520507812, + "learning_rate": 3.169915349974432e-05, + "loss": 94.871, + "step": 92750 + }, + { + "epoch": 0.3747621375501481, + "grad_norm": 589.8165893554688, + "learning_rate": 3.169688846825754e-05, + "loss": 65.1837, + "step": 92760 + }, + { + "epoch": 0.37480253881551573, + "grad_norm": 966.0321044921875, + "learning_rate": 3.16946232087339e-05, + "loss": 74.8339, + "step": 92770 + }, + { + "epoch": 0.3748429400808833, + "grad_norm": 930.7907104492188, + "learning_rate": 3.169235772121756e-05, + "loss": 101.4213, + "step": 92780 + }, + { + "epoch": 0.37488334134625095, + "grad_norm": 498.9374084472656, + "learning_rate": 3.169009200575268e-05, + "loss": 87.4845, + "step": 92790 + }, + { + "epoch": 0.3749237426116186, + "grad_norm": 527.0216674804688, + "learning_rate": 3.1687826062383444e-05, + "loss": 80.7212, + "step": 92800 + }, + { + "epoch": 0.37496414387698623, + "grad_norm": 110.05821228027344, + "learning_rate": 3.168555989115402e-05, + "loss": 73.9802, + "step": 92810 + }, + { + "epoch": 0.3750045451423539, + "grad_norm": 542.911376953125, + "learning_rate": 3.1683293492108595e-05, + "loss": 63.8032, + "step": 92820 + }, + { + "epoch": 0.3750449464077215, + "grad_norm": 805.3507690429688, + "learning_rate": 3.168102686529135e-05, + "loss": 63.8516, + "step": 92830 + }, + { + "epoch": 0.3750853476730891, + "grad_norm": 811.6924438476562, + "learning_rate": 3.167876001074646e-05, + "loss": 96.3836, + "step": 92840 + }, + { + "epoch": 0.37512574893845674, + "grad_norm": 592.9849853515625, + "learning_rate": 3.167649292851814e-05, + "loss": 94.8184, + "step": 92850 + }, + { + "epoch": 0.3751661502038244, + "grad_norm": 375.1654357910156, + "learning_rate": 3.167422561865058e-05, + "loss": 80.1432, + "step": 92860 + }, + { + "epoch": 0.375206551469192, + "grad_norm": 1117.7447509765625, + "learning_rate": 3.167195808118798e-05, + "loss": 100.6345, + "step": 92870 + }, + { + "epoch": 0.37524695273455966, + "grad_norm": 653.4371948242188, + "learning_rate": 3.166969031617455e-05, + "loss": 72.4591, + "step": 92880 + }, + { + "epoch": 0.3752873539999273, + "grad_norm": 969.9082641601562, + "learning_rate": 3.166742232365449e-05, + "loss": 86.3821, + "step": 92890 + }, + { + "epoch": 0.3753277552652949, + "grad_norm": 672.2846069335938, + "learning_rate": 3.166515410367203e-05, + "loss": 100.1701, + "step": 92900 + }, + { + "epoch": 0.3753681565306625, + "grad_norm": 902.8344116210938, + "learning_rate": 3.166288565627139e-05, + "loss": 94.0669, + "step": 92910 + }, + { + "epoch": 0.37540855779603016, + "grad_norm": 304.6333923339844, + "learning_rate": 3.166061698149679e-05, + "loss": 72.7057, + "step": 92920 + }, + { + "epoch": 0.3754489590613978, + "grad_norm": 631.9651489257812, + "learning_rate": 3.165834807939245e-05, + "loss": 81.0111, + "step": 92930 + }, + { + "epoch": 0.37548936032676544, + "grad_norm": 337.1204833984375, + "learning_rate": 3.165607895000262e-05, + "loss": 82.5685, + "step": 92940 + }, + { + "epoch": 0.3755297615921331, + "grad_norm": 817.29150390625, + "learning_rate": 3.1653809593371526e-05, + "loss": 90.6416, + "step": 92950 + }, + { + "epoch": 0.3755701628575007, + "grad_norm": 1063.0335693359375, + "learning_rate": 3.165154000954341e-05, + "loss": 91.0895, + "step": 92960 + }, + { + "epoch": 0.3756105641228683, + "grad_norm": 601.6986694335938, + "learning_rate": 3.164927019856253e-05, + "loss": 73.2763, + "step": 92970 + }, + { + "epoch": 0.37565096538823595, + "grad_norm": 623.3773803710938, + "learning_rate": 3.1647000160473126e-05, + "loss": 97.9237, + "step": 92980 + }, + { + "epoch": 0.3756913666536036, + "grad_norm": 1010.8591918945312, + "learning_rate": 3.164472989531946e-05, + "loss": 76.2869, + "step": 92990 + }, + { + "epoch": 0.3757317679189712, + "grad_norm": 1097.248779296875, + "learning_rate": 3.1642459403145794e-05, + "loss": 110.0098, + "step": 93000 + }, + { + "epoch": 0.37577216918433887, + "grad_norm": 824.2134399414062, + "learning_rate": 3.164018868399638e-05, + "loss": 86.9359, + "step": 93010 + }, + { + "epoch": 0.3758125704497065, + "grad_norm": 771.2564697265625, + "learning_rate": 3.16379177379155e-05, + "loss": 134.2927, + "step": 93020 + }, + { + "epoch": 0.3758529717150741, + "grad_norm": 468.6771240234375, + "learning_rate": 3.163564656494742e-05, + "loss": 105.2201, + "step": 93030 + }, + { + "epoch": 0.37589337298044173, + "grad_norm": 766.0885620117188, + "learning_rate": 3.163337516513642e-05, + "loss": 101.4774, + "step": 93040 + }, + { + "epoch": 0.37593377424580937, + "grad_norm": 711.7877807617188, + "learning_rate": 3.1631103538526774e-05, + "loss": 60.6867, + "step": 93050 + }, + { + "epoch": 0.375974175511177, + "grad_norm": 1107.321533203125, + "learning_rate": 3.162883168516279e-05, + "loss": 95.2145, + "step": 93060 + }, + { + "epoch": 0.37601457677654465, + "grad_norm": 1141.1590576171875, + "learning_rate": 3.1626559605088734e-05, + "loss": 132.4672, + "step": 93070 + }, + { + "epoch": 0.3760549780419123, + "grad_norm": 525.6072387695312, + "learning_rate": 3.1624287298348923e-05, + "loss": 83.2237, + "step": 93080 + }, + { + "epoch": 0.37609537930727993, + "grad_norm": 594.4111328125, + "learning_rate": 3.1622014764987637e-05, + "loss": 79.904, + "step": 93090 + }, + { + "epoch": 0.3761357805726475, + "grad_norm": 303.2572021484375, + "learning_rate": 3.1619742005049204e-05, + "loss": 80.5305, + "step": 93100 + }, + { + "epoch": 0.37617618183801516, + "grad_norm": 1182.0450439453125, + "learning_rate": 3.16174690185779e-05, + "loss": 107.2621, + "step": 93110 + }, + { + "epoch": 0.3762165831033828, + "grad_norm": 1456.569580078125, + "learning_rate": 3.161519580561807e-05, + "loss": 78.4724, + "step": 93120 + }, + { + "epoch": 0.37625698436875044, + "grad_norm": 692.3211059570312, + "learning_rate": 3.1612922366214014e-05, + "loss": 152.3834, + "step": 93130 + }, + { + "epoch": 0.3762973856341181, + "grad_norm": 700.4097290039062, + "learning_rate": 3.1610648700410057e-05, + "loss": 101.1486, + "step": 93140 + }, + { + "epoch": 0.3763377868994857, + "grad_norm": 1407.5277099609375, + "learning_rate": 3.160837480825052e-05, + "loss": 103.6474, + "step": 93150 + }, + { + "epoch": 0.3763781881648533, + "grad_norm": 497.6858215332031, + "learning_rate": 3.160610068977975e-05, + "loss": 48.5388, + "step": 93160 + }, + { + "epoch": 0.37641858943022094, + "grad_norm": 598.8803100585938, + "learning_rate": 3.1603826345042064e-05, + "loss": 98.3172, + "step": 93170 + }, + { + "epoch": 0.3764589906955886, + "grad_norm": 706.9510498046875, + "learning_rate": 3.160155177408181e-05, + "loss": 69.3694, + "step": 93180 + }, + { + "epoch": 0.3764993919609562, + "grad_norm": 1630.2930908203125, + "learning_rate": 3.159927697694334e-05, + "loss": 107.2761, + "step": 93190 + }, + { + "epoch": 0.37653979322632386, + "grad_norm": 603.3856811523438, + "learning_rate": 3.1597001953670985e-05, + "loss": 51.558, + "step": 93200 + }, + { + "epoch": 0.3765801944916915, + "grad_norm": 650.1831665039062, + "learning_rate": 3.159472670430911e-05, + "loss": 93.5502, + "step": 93210 + }, + { + "epoch": 0.3766205957570591, + "grad_norm": 615.457763671875, + "learning_rate": 3.159245122890207e-05, + "loss": 72.7198, + "step": 93220 + }, + { + "epoch": 0.3766609970224267, + "grad_norm": 1474.615478515625, + "learning_rate": 3.159017552749422e-05, + "loss": 97.6608, + "step": 93230 + }, + { + "epoch": 0.37670139828779436, + "grad_norm": 876.66845703125, + "learning_rate": 3.158789960012993e-05, + "loss": 110.8025, + "step": 93240 + }, + { + "epoch": 0.376741799553162, + "grad_norm": 1333.076416015625, + "learning_rate": 3.1585623446853574e-05, + "loss": 106.7185, + "step": 93250 + }, + { + "epoch": 0.37678220081852964, + "grad_norm": 309.13885498046875, + "learning_rate": 3.158334706770953e-05, + "loss": 81.4015, + "step": 93260 + }, + { + "epoch": 0.3768226020838973, + "grad_norm": 760.3046264648438, + "learning_rate": 3.158107046274217e-05, + "loss": 59.9304, + "step": 93270 + }, + { + "epoch": 0.3768630033492649, + "grad_norm": 518.9866943359375, + "learning_rate": 3.157879363199588e-05, + "loss": 90.522, + "step": 93280 + }, + { + "epoch": 0.3769034046146325, + "grad_norm": 532.4112548828125, + "learning_rate": 3.1576516575515034e-05, + "loss": 57.6518, + "step": 93290 + }, + { + "epoch": 0.37694380588000015, + "grad_norm": 1207.026123046875, + "learning_rate": 3.157423929334405e-05, + "loss": 72.4829, + "step": 93300 + }, + { + "epoch": 0.3769842071453678, + "grad_norm": 805.7740478515625, + "learning_rate": 3.157196178552731e-05, + "loss": 71.4595, + "step": 93310 + }, + { + "epoch": 0.37702460841073543, + "grad_norm": 598.9077758789062, + "learning_rate": 3.156968405210921e-05, + "loss": 77.2815, + "step": 93320 + }, + { + "epoch": 0.37706500967610307, + "grad_norm": 1134.4290771484375, + "learning_rate": 3.156740609313417e-05, + "loss": 73.4317, + "step": 93330 + }, + { + "epoch": 0.3771054109414707, + "grad_norm": 880.5693359375, + "learning_rate": 3.156512790864659e-05, + "loss": 109.6154, + "step": 93340 + }, + { + "epoch": 0.3771458122068383, + "grad_norm": 1783.1243896484375, + "learning_rate": 3.1562849498690894e-05, + "loss": 77.8074, + "step": 93350 + }, + { + "epoch": 0.37718621347220593, + "grad_norm": 936.6047973632812, + "learning_rate": 3.1560570863311486e-05, + "loss": 72.4911, + "step": 93360 + }, + { + "epoch": 0.3772266147375736, + "grad_norm": 895.5580444335938, + "learning_rate": 3.15582920025528e-05, + "loss": 87.0404, + "step": 93370 + }, + { + "epoch": 0.3772670160029412, + "grad_norm": 447.0865783691406, + "learning_rate": 3.1556012916459264e-05, + "loss": 96.2881, + "step": 93380 + }, + { + "epoch": 0.37730741726830885, + "grad_norm": 525.6939697265625, + "learning_rate": 3.15537336050753e-05, + "loss": 61.4852, + "step": 93390 + }, + { + "epoch": 0.3773478185336765, + "grad_norm": 1471.582763671875, + "learning_rate": 3.155145406844535e-05, + "loss": 116.5592, + "step": 93400 + }, + { + "epoch": 0.37738821979904413, + "grad_norm": 986.4295043945312, + "learning_rate": 3.154917430661387e-05, + "loss": 75.9448, + "step": 93410 + }, + { + "epoch": 0.3774286210644117, + "grad_norm": 872.0475463867188, + "learning_rate": 3.154689431962528e-05, + "loss": 96.9466, + "step": 93420 + }, + { + "epoch": 0.37746902232977936, + "grad_norm": 1491.79052734375, + "learning_rate": 3.1544614107524044e-05, + "loss": 89.5816, + "step": 93430 + }, + { + "epoch": 0.377509423595147, + "grad_norm": 1264.8699951171875, + "learning_rate": 3.154233367035461e-05, + "loss": 65.996, + "step": 93440 + }, + { + "epoch": 0.37754982486051464, + "grad_norm": 1168.2513427734375, + "learning_rate": 3.154005300816144e-05, + "loss": 70.5069, + "step": 93450 + }, + { + "epoch": 0.3775902261258823, + "grad_norm": 622.3053588867188, + "learning_rate": 3.153777212098899e-05, + "loss": 66.9592, + "step": 93460 + }, + { + "epoch": 0.3776306273912499, + "grad_norm": 832.2103881835938, + "learning_rate": 3.1535491008881735e-05, + "loss": 80.1528, + "step": 93470 + }, + { + "epoch": 0.3776710286566175, + "grad_norm": 573.6228637695312, + "learning_rate": 3.153320967188415e-05, + "loss": 64.7804, + "step": 93480 + }, + { + "epoch": 0.37771142992198514, + "grad_norm": 834.2877807617188, + "learning_rate": 3.1530928110040696e-05, + "loss": 62.259, + "step": 93490 + }, + { + "epoch": 0.3777518311873528, + "grad_norm": 1042.3695068359375, + "learning_rate": 3.1528646323395865e-05, + "loss": 75.9198, + "step": 93500 + }, + { + "epoch": 0.3777922324527204, + "grad_norm": 743.5816040039062, + "learning_rate": 3.152636431199414e-05, + "loss": 89.2089, + "step": 93510 + }, + { + "epoch": 0.37783263371808806, + "grad_norm": 481.5408935546875, + "learning_rate": 3.152408207588001e-05, + "loss": 60.3174, + "step": 93520 + }, + { + "epoch": 0.3778730349834557, + "grad_norm": 1535.644775390625, + "learning_rate": 3.152179961509797e-05, + "loss": 128.186, + "step": 93530 + }, + { + "epoch": 0.3779134362488233, + "grad_norm": 593.4260864257812, + "learning_rate": 3.1519516929692506e-05, + "loss": 65.2896, + "step": 93540 + }, + { + "epoch": 0.3779538375141909, + "grad_norm": 730.3113403320312, + "learning_rate": 3.1517234019708136e-05, + "loss": 121.4789, + "step": 93550 + }, + { + "epoch": 0.37799423877955857, + "grad_norm": 714.9961547851562, + "learning_rate": 3.151495088518936e-05, + "loss": 75.7723, + "step": 93560 + }, + { + "epoch": 0.3780346400449262, + "grad_norm": 986.8922119140625, + "learning_rate": 3.1512667526180686e-05, + "loss": 61.5398, + "step": 93570 + }, + { + "epoch": 0.37807504131029385, + "grad_norm": 995.9443359375, + "learning_rate": 3.1510383942726626e-05, + "loss": 91.894, + "step": 93580 + }, + { + "epoch": 0.3781154425756615, + "grad_norm": 641.1580810546875, + "learning_rate": 3.1508100134871705e-05, + "loss": 93.6394, + "step": 93590 + }, + { + "epoch": 0.3781558438410291, + "grad_norm": 221.00621032714844, + "learning_rate": 3.150581610266046e-05, + "loss": 44.1914, + "step": 93600 + }, + { + "epoch": 0.3781962451063967, + "grad_norm": 2307.4404296875, + "learning_rate": 3.150353184613739e-05, + "loss": 109.3009, + "step": 93610 + }, + { + "epoch": 0.37823664637176435, + "grad_norm": 1513.2333984375, + "learning_rate": 3.150124736534705e-05, + "loss": 61.9328, + "step": 93620 + }, + { + "epoch": 0.378277047637132, + "grad_norm": 365.6341247558594, + "learning_rate": 3.149896266033398e-05, + "loss": 57.8303, + "step": 93630 + }, + { + "epoch": 0.37831744890249963, + "grad_norm": 897.9816284179688, + "learning_rate": 3.149667773114271e-05, + "loss": 136.3096, + "step": 93640 + }, + { + "epoch": 0.37835785016786727, + "grad_norm": 774.9037475585938, + "learning_rate": 3.1494392577817775e-05, + "loss": 92.7342, + "step": 93650 + }, + { + "epoch": 0.3783982514332349, + "grad_norm": 590.1085815429688, + "learning_rate": 3.149210720040375e-05, + "loss": 69.2387, + "step": 93660 + }, + { + "epoch": 0.3784386526986025, + "grad_norm": 891.5955200195312, + "learning_rate": 3.148982159894518e-05, + "loss": 92.8091, + "step": 93670 + }, + { + "epoch": 0.37847905396397014, + "grad_norm": 560.4688110351562, + "learning_rate": 3.1487535773486624e-05, + "loss": 65.2126, + "step": 93680 + }, + { + "epoch": 0.3785194552293378, + "grad_norm": 434.64697265625, + "learning_rate": 3.148524972407263e-05, + "loss": 53.9323, + "step": 93690 + }, + { + "epoch": 0.3785598564947054, + "grad_norm": 1081.05810546875, + "learning_rate": 3.14829634507478e-05, + "loss": 109.4487, + "step": 93700 + }, + { + "epoch": 0.37860025776007306, + "grad_norm": 1594.9530029296875, + "learning_rate": 3.148067695355667e-05, + "loss": 118.6275, + "step": 93710 + }, + { + "epoch": 0.3786406590254407, + "grad_norm": 457.5696105957031, + "learning_rate": 3.1478390232543835e-05, + "loss": 83.9116, + "step": 93720 + }, + { + "epoch": 0.37868106029080834, + "grad_norm": 617.6077880859375, + "learning_rate": 3.147610328775388e-05, + "loss": 97.3796, + "step": 93730 + }, + { + "epoch": 0.3787214615561759, + "grad_norm": 650.8566284179688, + "learning_rate": 3.1473816119231374e-05, + "loss": 75.4274, + "step": 93740 + }, + { + "epoch": 0.37876186282154356, + "grad_norm": 807.961181640625, + "learning_rate": 3.147152872702092e-05, + "loss": 74.7625, + "step": 93750 + }, + { + "epoch": 0.3788022640869112, + "grad_norm": 595.8689575195312, + "learning_rate": 3.146924111116711e-05, + "loss": 86.1226, + "step": 93760 + }, + { + "epoch": 0.37884266535227884, + "grad_norm": 972.371826171875, + "learning_rate": 3.146695327171454e-05, + "loss": 86.3187, + "step": 93770 + }, + { + "epoch": 0.3788830666176465, + "grad_norm": 600.6989135742188, + "learning_rate": 3.146466520870781e-05, + "loss": 119.6286, + "step": 93780 + }, + { + "epoch": 0.3789234678830141, + "grad_norm": 820.0493774414062, + "learning_rate": 3.146237692219153e-05, + "loss": 139.1718, + "step": 93790 + }, + { + "epoch": 0.3789638691483817, + "grad_norm": 818.5054321289062, + "learning_rate": 3.146008841221031e-05, + "loss": 140.3855, + "step": 93800 + }, + { + "epoch": 0.37900427041374934, + "grad_norm": 722.6071166992188, + "learning_rate": 3.145779967880877e-05, + "loss": 88.2628, + "step": 93810 + }, + { + "epoch": 0.379044671679117, + "grad_norm": 1076.6058349609375, + "learning_rate": 3.1455510722031516e-05, + "loss": 101.8152, + "step": 93820 + }, + { + "epoch": 0.3790850729444846, + "grad_norm": 588.5738525390625, + "learning_rate": 3.145322154192319e-05, + "loss": 83.6346, + "step": 93830 + }, + { + "epoch": 0.37912547420985226, + "grad_norm": 1396.5843505859375, + "learning_rate": 3.145093213852842e-05, + "loss": 99.8408, + "step": 93840 + }, + { + "epoch": 0.3791658754752199, + "grad_norm": 643.7491455078125, + "learning_rate": 3.144864251189182e-05, + "loss": 150.5373, + "step": 93850 + }, + { + "epoch": 0.3792062767405875, + "grad_norm": 624.2734985351562, + "learning_rate": 3.144635266205804e-05, + "loss": 63.4565, + "step": 93860 + }, + { + "epoch": 0.37924667800595513, + "grad_norm": 539.47216796875, + "learning_rate": 3.1444062589071735e-05, + "loss": 68.5071, + "step": 93870 + }, + { + "epoch": 0.37928707927132277, + "grad_norm": 597.1256713867188, + "learning_rate": 3.1441772292977534e-05, + "loss": 87.8506, + "step": 93880 + }, + { + "epoch": 0.3793274805366904, + "grad_norm": 479.0013427734375, + "learning_rate": 3.1439481773820086e-05, + "loss": 101.5229, + "step": 93890 + }, + { + "epoch": 0.37936788180205805, + "grad_norm": 325.4750671386719, + "learning_rate": 3.143719103164405e-05, + "loss": 59.6618, + "step": 93900 + }, + { + "epoch": 0.3794082830674257, + "grad_norm": 451.485107421875, + "learning_rate": 3.143490006649409e-05, + "loss": 98.2269, + "step": 93910 + }, + { + "epoch": 0.37944868433279333, + "grad_norm": 847.2384643554688, + "learning_rate": 3.143260887841487e-05, + "loss": 71.9805, + "step": 93920 + }, + { + "epoch": 0.3794890855981609, + "grad_norm": 577.7083129882812, + "learning_rate": 3.1430317467451045e-05, + "loss": 71.5828, + "step": 93930 + }, + { + "epoch": 0.37952948686352855, + "grad_norm": 409.325439453125, + "learning_rate": 3.1428025833647306e-05, + "loss": 90.3106, + "step": 93940 + }, + { + "epoch": 0.3795698881288962, + "grad_norm": 505.71832275390625, + "learning_rate": 3.1425733977048313e-05, + "loss": 88.6146, + "step": 93950 + }, + { + "epoch": 0.37961028939426383, + "grad_norm": 962.36962890625, + "learning_rate": 3.142344189769876e-05, + "loss": 99.527, + "step": 93960 + }, + { + "epoch": 0.3796506906596315, + "grad_norm": 932.5904541015625, + "learning_rate": 3.142114959564332e-05, + "loss": 78.4391, + "step": 93970 + }, + { + "epoch": 0.3796910919249991, + "grad_norm": 951.7392578125, + "learning_rate": 3.141885707092669e-05, + "loss": 58.107, + "step": 93980 + }, + { + "epoch": 0.3797314931903667, + "grad_norm": 681.7964477539062, + "learning_rate": 3.141656432359356e-05, + "loss": 94.7439, + "step": 93990 + }, + { + "epoch": 0.37977189445573434, + "grad_norm": 559.481201171875, + "learning_rate": 3.141427135368864e-05, + "loss": 102.9742, + "step": 94000 + }, + { + "epoch": 0.379812295721102, + "grad_norm": 621.3411865234375, + "learning_rate": 3.141197816125662e-05, + "loss": 60.842, + "step": 94010 + }, + { + "epoch": 0.3798526969864696, + "grad_norm": 958.55859375, + "learning_rate": 3.14096847463422e-05, + "loss": 66.7173, + "step": 94020 + }, + { + "epoch": 0.37989309825183726, + "grad_norm": 579.0009155273438, + "learning_rate": 3.140739110899011e-05, + "loss": 71.9154, + "step": 94030 + }, + { + "epoch": 0.3799334995172049, + "grad_norm": 938.1077270507812, + "learning_rate": 3.140509724924506e-05, + "loss": 121.2289, + "step": 94040 + }, + { + "epoch": 0.37997390078257254, + "grad_norm": 797.98486328125, + "learning_rate": 3.1402803167151766e-05, + "loss": 120.3417, + "step": 94050 + }, + { + "epoch": 0.3800143020479401, + "grad_norm": 715.2781982421875, + "learning_rate": 3.140050886275496e-05, + "loss": 76.2277, + "step": 94060 + }, + { + "epoch": 0.38005470331330776, + "grad_norm": 874.4389038085938, + "learning_rate": 3.1398214336099345e-05, + "loss": 93.5464, + "step": 94070 + }, + { + "epoch": 0.3800951045786754, + "grad_norm": 401.1094665527344, + "learning_rate": 3.139591958722969e-05, + "loss": 108.0332, + "step": 94080 + }, + { + "epoch": 0.38013550584404304, + "grad_norm": 137.48606872558594, + "learning_rate": 3.139362461619071e-05, + "loss": 40.0286, + "step": 94090 + }, + { + "epoch": 0.3801759071094107, + "grad_norm": 1376.386474609375, + "learning_rate": 3.139132942302715e-05, + "loss": 84.3158, + "step": 94100 + }, + { + "epoch": 0.3802163083747783, + "grad_norm": 1468.6754150390625, + "learning_rate": 3.138903400778376e-05, + "loss": 119.4853, + "step": 94110 + }, + { + "epoch": 0.3802567096401459, + "grad_norm": 549.1961669921875, + "learning_rate": 3.1386738370505296e-05, + "loss": 63.7568, + "step": 94120 + }, + { + "epoch": 0.38029711090551355, + "grad_norm": 771.1744995117188, + "learning_rate": 3.138444251123649e-05, + "loss": 124.9971, + "step": 94130 + }, + { + "epoch": 0.3803375121708812, + "grad_norm": 1346.43994140625, + "learning_rate": 3.138214643002213e-05, + "loss": 97.0745, + "step": 94140 + }, + { + "epoch": 0.3803779134362488, + "grad_norm": 1424.850830078125, + "learning_rate": 3.1379850126906966e-05, + "loss": 64.6352, + "step": 94150 + }, + { + "epoch": 0.38041831470161647, + "grad_norm": 1063.0472412109375, + "learning_rate": 3.137755360193575e-05, + "loss": 75.1929, + "step": 94160 + }, + { + "epoch": 0.3804587159669841, + "grad_norm": 829.8480834960938, + "learning_rate": 3.137525685515329e-05, + "loss": 59.3759, + "step": 94170 + }, + { + "epoch": 0.3804991172323517, + "grad_norm": 549.8694458007812, + "learning_rate": 3.137295988660432e-05, + "loss": 122.9896, + "step": 94180 + }, + { + "epoch": 0.38053951849771933, + "grad_norm": 413.39739990234375, + "learning_rate": 3.1370662696333653e-05, + "loss": 109.8793, + "step": 94190 + }, + { + "epoch": 0.38057991976308697, + "grad_norm": 404.18109130859375, + "learning_rate": 3.136836528438606e-05, + "loss": 63.1509, + "step": 94200 + }, + { + "epoch": 0.3806203210284546, + "grad_norm": 974.7642211914062, + "learning_rate": 3.136606765080634e-05, + "loss": 119.1633, + "step": 94210 + }, + { + "epoch": 0.38066072229382225, + "grad_norm": 503.6430358886719, + "learning_rate": 3.136376979563927e-05, + "loss": 82.9245, + "step": 94220 + }, + { + "epoch": 0.3807011235591899, + "grad_norm": 1490.7425537109375, + "learning_rate": 3.136147171892966e-05, + "loss": 95.239, + "step": 94230 + }, + { + "epoch": 0.38074152482455753, + "grad_norm": 539.0478515625, + "learning_rate": 3.135917342072231e-05, + "loss": 109.7265, + "step": 94240 + }, + { + "epoch": 0.3807819260899251, + "grad_norm": 788.018310546875, + "learning_rate": 3.135687490106203e-05, + "loss": 122.4285, + "step": 94250 + }, + { + "epoch": 0.38082232735529276, + "grad_norm": 790.9324951171875, + "learning_rate": 3.135457615999362e-05, + "loss": 62.8189, + "step": 94260 + }, + { + "epoch": 0.3808627286206604, + "grad_norm": 513.1051025390625, + "learning_rate": 3.13522771975619e-05, + "loss": 66.8298, + "step": 94270 + }, + { + "epoch": 0.38090312988602804, + "grad_norm": 963.4957885742188, + "learning_rate": 3.13499780138117e-05, + "loss": 72.8026, + "step": 94280 + }, + { + "epoch": 0.3809435311513957, + "grad_norm": 694.3912963867188, + "learning_rate": 3.134767860878784e-05, + "loss": 79.0527, + "step": 94290 + }, + { + "epoch": 0.3809839324167633, + "grad_norm": 546.7415161132812, + "learning_rate": 3.134537898253514e-05, + "loss": 117.3017, + "step": 94300 + }, + { + "epoch": 0.3810243336821309, + "grad_norm": 1144.7481689453125, + "learning_rate": 3.1343079135098425e-05, + "loss": 98.9095, + "step": 94310 + }, + { + "epoch": 0.38106473494749854, + "grad_norm": 780.5405883789062, + "learning_rate": 3.134077906652255e-05, + "loss": 69.924, + "step": 94320 + }, + { + "epoch": 0.3811051362128662, + "grad_norm": 1586.7001953125, + "learning_rate": 3.133847877685235e-05, + "loss": 67.1664, + "step": 94330 + }, + { + "epoch": 0.3811455374782338, + "grad_norm": 581.62548828125, + "learning_rate": 3.133617826613266e-05, + "loss": 96.434, + "step": 94340 + }, + { + "epoch": 0.38118593874360146, + "grad_norm": 438.6290283203125, + "learning_rate": 3.1333877534408345e-05, + "loss": 111.0052, + "step": 94350 + }, + { + "epoch": 0.3812263400089691, + "grad_norm": 717.0590209960938, + "learning_rate": 3.133157658172425e-05, + "loss": 108.8189, + "step": 94360 + }, + { + "epoch": 0.3812667412743367, + "grad_norm": 843.5151977539062, + "learning_rate": 3.1329275408125245e-05, + "loss": 94.6043, + "step": 94370 + }, + { + "epoch": 0.3813071425397043, + "grad_norm": 707.2042236328125, + "learning_rate": 3.132697401365617e-05, + "loss": 84.5008, + "step": 94380 + }, + { + "epoch": 0.38134754380507196, + "grad_norm": 301.42523193359375, + "learning_rate": 3.132467239836191e-05, + "loss": 60.0137, + "step": 94390 + }, + { + "epoch": 0.3813879450704396, + "grad_norm": 1485.394775390625, + "learning_rate": 3.132237056228733e-05, + "loss": 69.4156, + "step": 94400 + }, + { + "epoch": 0.38142834633580724, + "grad_norm": 1053.291259765625, + "learning_rate": 3.132006850547732e-05, + "loss": 73.6123, + "step": 94410 + }, + { + "epoch": 0.3814687476011749, + "grad_norm": 509.4904479980469, + "learning_rate": 3.131776622797673e-05, + "loss": 98.7468, + "step": 94420 + }, + { + "epoch": 0.3815091488665425, + "grad_norm": 1441.953369140625, + "learning_rate": 3.131546372983047e-05, + "loss": 110.5504, + "step": 94430 + }, + { + "epoch": 0.3815495501319101, + "grad_norm": 379.231201171875, + "learning_rate": 3.131316101108341e-05, + "loss": 98.7633, + "step": 94440 + }, + { + "epoch": 0.38158995139727775, + "grad_norm": 901.2622680664062, + "learning_rate": 3.131085807178046e-05, + "loss": 89.8795, + "step": 94450 + }, + { + "epoch": 0.3816303526626454, + "grad_norm": 2390.21044921875, + "learning_rate": 3.13085549119665e-05, + "loss": 77.2757, + "step": 94460 + }, + { + "epoch": 0.38167075392801303, + "grad_norm": 1468.2431640625, + "learning_rate": 3.130625153168645e-05, + "loss": 109.4148, + "step": 94470 + }, + { + "epoch": 0.38171115519338067, + "grad_norm": 678.0813598632812, + "learning_rate": 3.13039479309852e-05, + "loss": 81.6445, + "step": 94480 + }, + { + "epoch": 0.3817515564587483, + "grad_norm": 792.4326171875, + "learning_rate": 3.130164410990767e-05, + "loss": 86.8622, + "step": 94490 + }, + { + "epoch": 0.3817919577241159, + "grad_norm": 932.5657348632812, + "learning_rate": 3.129934006849876e-05, + "loss": 90.4738, + "step": 94500 + }, + { + "epoch": 0.38183235898948353, + "grad_norm": 560.557861328125, + "learning_rate": 3.1297035806803407e-05, + "loss": 99.9487, + "step": 94510 + }, + { + "epoch": 0.3818727602548512, + "grad_norm": 1049.0196533203125, + "learning_rate": 3.129473132486652e-05, + "loss": 106.9519, + "step": 94520 + }, + { + "epoch": 0.3819131615202188, + "grad_norm": 1126.9598388671875, + "learning_rate": 3.1292426622733036e-05, + "loss": 61.362, + "step": 94530 + }, + { + "epoch": 0.38195356278558645, + "grad_norm": 602.106689453125, + "learning_rate": 3.1290121700447876e-05, + "loss": 51.4647, + "step": 94540 + }, + { + "epoch": 0.3819939640509541, + "grad_norm": 1018.1676025390625, + "learning_rate": 3.128781655805599e-05, + "loss": 77.1158, + "step": 94550 + }, + { + "epoch": 0.38203436531632173, + "grad_norm": 1199.679443359375, + "learning_rate": 3.12855111956023e-05, + "loss": 110.6073, + "step": 94560 + }, + { + "epoch": 0.3820747665816893, + "grad_norm": 1780.564697265625, + "learning_rate": 3.128320561313176e-05, + "loss": 109.8977, + "step": 94570 + }, + { + "epoch": 0.38211516784705696, + "grad_norm": 389.7140197753906, + "learning_rate": 3.1280899810689316e-05, + "loss": 112.5056, + "step": 94580 + }, + { + "epoch": 0.3821555691124246, + "grad_norm": 497.46417236328125, + "learning_rate": 3.127859378831992e-05, + "loss": 109.2446, + "step": 94590 + }, + { + "epoch": 0.38219597037779224, + "grad_norm": 701.5946044921875, + "learning_rate": 3.1276287546068536e-05, + "loss": 66.7616, + "step": 94600 + }, + { + "epoch": 0.3822363716431599, + "grad_norm": 564.2448120117188, + "learning_rate": 3.1273981083980126e-05, + "loss": 91.3174, + "step": 94610 + }, + { + "epoch": 0.3822767729085275, + "grad_norm": 473.546875, + "learning_rate": 3.127167440209964e-05, + "loss": 82.8581, + "step": 94620 + }, + { + "epoch": 0.3823171741738951, + "grad_norm": 1212.6865234375, + "learning_rate": 3.1269367500472065e-05, + "loss": 87.4089, + "step": 94630 + }, + { + "epoch": 0.38235757543926274, + "grad_norm": 1153.588134765625, + "learning_rate": 3.126706037914237e-05, + "loss": 67.6954, + "step": 94640 + }, + { + "epoch": 0.3823979767046304, + "grad_norm": 706.65625, + "learning_rate": 3.126475303815554e-05, + "loss": 70.3367, + "step": 94650 + }, + { + "epoch": 0.382438377969998, + "grad_norm": 1117.6512451171875, + "learning_rate": 3.1262445477556536e-05, + "loss": 109.4286, + "step": 94660 + }, + { + "epoch": 0.38247877923536566, + "grad_norm": 545.5320434570312, + "learning_rate": 3.126013769739036e-05, + "loss": 61.0722, + "step": 94670 + }, + { + "epoch": 0.3825191805007333, + "grad_norm": 1014.8693237304688, + "learning_rate": 3.1257829697702004e-05, + "loss": 114.981, + "step": 94680 + }, + { + "epoch": 0.3825595817661009, + "grad_norm": 674.58056640625, + "learning_rate": 3.125552147853646e-05, + "loss": 82.7764, + "step": 94690 + }, + { + "epoch": 0.3825999830314685, + "grad_norm": 386.62371826171875, + "learning_rate": 3.125321303993874e-05, + "loss": 72.866, + "step": 94700 + }, + { + "epoch": 0.38264038429683617, + "grad_norm": 1812.0938720703125, + "learning_rate": 3.125090438195383e-05, + "loss": 210.2141, + "step": 94710 + }, + { + "epoch": 0.3826807855622038, + "grad_norm": 704.1511840820312, + "learning_rate": 3.124859550462675e-05, + "loss": 116.7745, + "step": 94720 + }, + { + "epoch": 0.38272118682757145, + "grad_norm": 2045.539306640625, + "learning_rate": 3.124628640800251e-05, + "loss": 127.3926, + "step": 94730 + }, + { + "epoch": 0.3827615880929391, + "grad_norm": 1072.1534423828125, + "learning_rate": 3.124397709212613e-05, + "loss": 69.3937, + "step": 94740 + }, + { + "epoch": 0.3828019893583067, + "grad_norm": 1438.6357421875, + "learning_rate": 3.124166755704261e-05, + "loss": 112.0817, + "step": 94750 + }, + { + "epoch": 0.3828423906236743, + "grad_norm": 537.7516479492188, + "learning_rate": 3.123935780279701e-05, + "loss": 86.1748, + "step": 94760 + }, + { + "epoch": 0.38288279188904195, + "grad_norm": 808.9105224609375, + "learning_rate": 3.123704782943434e-05, + "loss": 71.6184, + "step": 94770 + }, + { + "epoch": 0.3829231931544096, + "grad_norm": 1015.2103881835938, + "learning_rate": 3.123473763699964e-05, + "loss": 98.5759, + "step": 94780 + }, + { + "epoch": 0.38296359441977723, + "grad_norm": 837.1531372070312, + "learning_rate": 3.1232427225537935e-05, + "loss": 78.4143, + "step": 94790 + }, + { + "epoch": 0.38300399568514487, + "grad_norm": 609.2176513671875, + "learning_rate": 3.123011659509429e-05, + "loss": 95.2255, + "step": 94800 + }, + { + "epoch": 0.3830443969505125, + "grad_norm": 567.2532958984375, + "learning_rate": 3.122780574571373e-05, + "loss": 87.069, + "step": 94810 + }, + { + "epoch": 0.3830847982158801, + "grad_norm": 449.8392333984375, + "learning_rate": 3.122549467744133e-05, + "loss": 114.9735, + "step": 94820 + }, + { + "epoch": 0.38312519948124774, + "grad_norm": 535.75244140625, + "learning_rate": 3.122318339032212e-05, + "loss": 90.101, + "step": 94830 + }, + { + "epoch": 0.3831656007466154, + "grad_norm": 1017.4208984375, + "learning_rate": 3.122087188440118e-05, + "loss": 77.9901, + "step": 94840 + }, + { + "epoch": 0.383206002011983, + "grad_norm": 623.8526000976562, + "learning_rate": 3.121856015972356e-05, + "loss": 63.8859, + "step": 94850 + }, + { + "epoch": 0.38324640327735066, + "grad_norm": 407.8451843261719, + "learning_rate": 3.121624821633434e-05, + "loss": 66.7445, + "step": 94860 + }, + { + "epoch": 0.3832868045427183, + "grad_norm": 707.3828735351562, + "learning_rate": 3.121393605427858e-05, + "loss": 108.1807, + "step": 94870 + }, + { + "epoch": 0.38332720580808594, + "grad_norm": 129.0882110595703, + "learning_rate": 3.1211623673601366e-05, + "loss": 106.5007, + "step": 94880 + }, + { + "epoch": 0.3833676070734535, + "grad_norm": 417.38470458984375, + "learning_rate": 3.120931107434778e-05, + "loss": 66.4345, + "step": 94890 + }, + { + "epoch": 0.38340800833882116, + "grad_norm": 417.1510314941406, + "learning_rate": 3.1206998256562894e-05, + "loss": 85.8913, + "step": 94900 + }, + { + "epoch": 0.3834484096041888, + "grad_norm": 508.8289794921875, + "learning_rate": 3.120468522029182e-05, + "loss": 123.1464, + "step": 94910 + }, + { + "epoch": 0.38348881086955644, + "grad_norm": 673.1595458984375, + "learning_rate": 3.1202371965579625e-05, + "loss": 87.7965, + "step": 94920 + }, + { + "epoch": 0.3835292121349241, + "grad_norm": 1843.7117919921875, + "learning_rate": 3.120005849247143e-05, + "loss": 128.7029, + "step": 94930 + }, + { + "epoch": 0.3835696134002917, + "grad_norm": 518.3389892578125, + "learning_rate": 3.119774480101233e-05, + "loss": 124.2809, + "step": 94940 + }, + { + "epoch": 0.3836100146656593, + "grad_norm": 564.5494995117188, + "learning_rate": 3.119543089124743e-05, + "loss": 105.6082, + "step": 94950 + }, + { + "epoch": 0.38365041593102694, + "grad_norm": 760.1812133789062, + "learning_rate": 3.119311676322183e-05, + "loss": 61.0502, + "step": 94960 + }, + { + "epoch": 0.3836908171963946, + "grad_norm": 929.8595581054688, + "learning_rate": 3.1190802416980664e-05, + "loss": 95.8559, + "step": 94970 + }, + { + "epoch": 0.3837312184617622, + "grad_norm": 537.5167236328125, + "learning_rate": 3.118848785256905e-05, + "loss": 75.3892, + "step": 94980 + }, + { + "epoch": 0.38377161972712986, + "grad_norm": 285.7078552246094, + "learning_rate": 3.118617307003209e-05, + "loss": 80.3693, + "step": 94990 + }, + { + "epoch": 0.3838120209924975, + "grad_norm": 896.2296142578125, + "learning_rate": 3.1183858069414936e-05, + "loss": 79.7897, + "step": 95000 + }, + { + "epoch": 0.3838524222578651, + "grad_norm": 809.4661254882812, + "learning_rate": 3.118154285076271e-05, + "loss": 61.7738, + "step": 95010 + }, + { + "epoch": 0.38389282352323273, + "grad_norm": 731.2603149414062, + "learning_rate": 3.1179227414120556e-05, + "loss": 135.2069, + "step": 95020 + }, + { + "epoch": 0.38393322478860037, + "grad_norm": 1142.4989013671875, + "learning_rate": 3.11769117595336e-05, + "loss": 119.9472, + "step": 95030 + }, + { + "epoch": 0.383973626053968, + "grad_norm": 846.0628051757812, + "learning_rate": 3.1174595887047e-05, + "loss": 106.6959, + "step": 95040 + }, + { + "epoch": 0.38401402731933565, + "grad_norm": 574.8919067382812, + "learning_rate": 3.11722797967059e-05, + "loss": 94.7029, + "step": 95050 + }, + { + "epoch": 0.3840544285847033, + "grad_norm": 786.2127075195312, + "learning_rate": 3.1169963488555445e-05, + "loss": 102.3809, + "step": 95060 + }, + { + "epoch": 0.38409482985007093, + "grad_norm": 613.4180908203125, + "learning_rate": 3.116764696264081e-05, + "loss": 90.7443, + "step": 95070 + }, + { + "epoch": 0.3841352311154385, + "grad_norm": 417.14776611328125, + "learning_rate": 3.116533021900714e-05, + "loss": 79.2148, + "step": 95080 + }, + { + "epoch": 0.38417563238080615, + "grad_norm": 1821.736572265625, + "learning_rate": 3.116301325769961e-05, + "loss": 101.5793, + "step": 95090 + }, + { + "epoch": 0.3842160336461738, + "grad_norm": 685.1289672851562, + "learning_rate": 3.11606960787634e-05, + "loss": 98.8885, + "step": 95100 + }, + { + "epoch": 0.38425643491154143, + "grad_norm": 775.8993530273438, + "learning_rate": 3.1158378682243666e-05, + "loss": 96.6337, + "step": 95110 + }, + { + "epoch": 0.3842968361769091, + "grad_norm": 799.635498046875, + "learning_rate": 3.1156061068185595e-05, + "loss": 89.0835, + "step": 95120 + }, + { + "epoch": 0.3843372374422767, + "grad_norm": 1200.2591552734375, + "learning_rate": 3.115374323663437e-05, + "loss": 101.1146, + "step": 95130 + }, + { + "epoch": 0.3843776387076443, + "grad_norm": 732.625244140625, + "learning_rate": 3.115142518763518e-05, + "loss": 134.7351, + "step": 95140 + }, + { + "epoch": 0.38441803997301194, + "grad_norm": 927.8818359375, + "learning_rate": 3.1149106921233216e-05, + "loss": 153.1726, + "step": 95150 + }, + { + "epoch": 0.3844584412383796, + "grad_norm": 603.0083618164062, + "learning_rate": 3.114678843747367e-05, + "loss": 76.4384, + "step": 95160 + }, + { + "epoch": 0.3844988425037472, + "grad_norm": 532.0037841796875, + "learning_rate": 3.114446973640175e-05, + "loss": 67.9376, + "step": 95170 + }, + { + "epoch": 0.38453924376911486, + "grad_norm": 889.0305786132812, + "learning_rate": 3.114215081806265e-05, + "loss": 76.0862, + "step": 95180 + }, + { + "epoch": 0.3845796450344825, + "grad_norm": 358.51812744140625, + "learning_rate": 3.113983168250158e-05, + "loss": 119.3925, + "step": 95190 + }, + { + "epoch": 0.38462004629985014, + "grad_norm": 536.9822387695312, + "learning_rate": 3.113751232976376e-05, + "loss": 82.6199, + "step": 95200 + }, + { + "epoch": 0.3846604475652177, + "grad_norm": 535.374755859375, + "learning_rate": 3.1135192759894403e-05, + "loss": 95.8929, + "step": 95210 + }, + { + "epoch": 0.38470084883058536, + "grad_norm": 1949.5418701171875, + "learning_rate": 3.1132872972938735e-05, + "loss": 42.8243, + "step": 95220 + }, + { + "epoch": 0.384741250095953, + "grad_norm": 513.3097534179688, + "learning_rate": 3.113055296894197e-05, + "loss": 57.9779, + "step": 95230 + }, + { + "epoch": 0.38478165136132064, + "grad_norm": 720.063720703125, + "learning_rate": 3.112823274794935e-05, + "loss": 76.9726, + "step": 95240 + }, + { + "epoch": 0.3848220526266883, + "grad_norm": 867.4934692382812, + "learning_rate": 3.11259123100061e-05, + "loss": 81.3233, + "step": 95250 + }, + { + "epoch": 0.3848624538920559, + "grad_norm": 621.0916137695312, + "learning_rate": 3.112359165515746e-05, + "loss": 66.3737, + "step": 95260 + }, + { + "epoch": 0.3849028551574235, + "grad_norm": 957.8388671875, + "learning_rate": 3.112127078344868e-05, + "loss": 139.1678, + "step": 95270 + }, + { + "epoch": 0.38494325642279115, + "grad_norm": 423.520263671875, + "learning_rate": 3.111894969492499e-05, + "loss": 56.7875, + "step": 95280 + }, + { + "epoch": 0.3849836576881588, + "grad_norm": 541.2357177734375, + "learning_rate": 3.1116628389631655e-05, + "loss": 109.9888, + "step": 95290 + }, + { + "epoch": 0.3850240589535264, + "grad_norm": 820.701171875, + "learning_rate": 3.1114306867613935e-05, + "loss": 79.7006, + "step": 95300 + }, + { + "epoch": 0.38506446021889407, + "grad_norm": 684.2330322265625, + "learning_rate": 3.111198512891707e-05, + "loss": 71.4986, + "step": 95310 + }, + { + "epoch": 0.3851048614842617, + "grad_norm": 2856.724609375, + "learning_rate": 3.110966317358634e-05, + "loss": 101.3403, + "step": 95320 + }, + { + "epoch": 0.3851452627496293, + "grad_norm": 1214.7276611328125, + "learning_rate": 3.1107341001667007e-05, + "loss": 111.6822, + "step": 95330 + }, + { + "epoch": 0.38518566401499693, + "grad_norm": 1267.687744140625, + "learning_rate": 3.110501861320434e-05, + "loss": 98.1808, + "step": 95340 + }, + { + "epoch": 0.38522606528036457, + "grad_norm": 578.503173828125, + "learning_rate": 3.110269600824362e-05, + "loss": 89.2344, + "step": 95350 + }, + { + "epoch": 0.3852664665457322, + "grad_norm": 885.7776489257812, + "learning_rate": 3.110037318683012e-05, + "loss": 56.4376, + "step": 95360 + }, + { + "epoch": 0.38530686781109985, + "grad_norm": 549.1149291992188, + "learning_rate": 3.109805014900914e-05, + "loss": 99.0782, + "step": 95370 + }, + { + "epoch": 0.3853472690764675, + "grad_norm": 941.8616333007812, + "learning_rate": 3.109572689482595e-05, + "loss": 69.7804, + "step": 95380 + }, + { + "epoch": 0.38538767034183513, + "grad_norm": 832.53125, + "learning_rate": 3.1093403424325855e-05, + "loss": 84.5855, + "step": 95390 + }, + { + "epoch": 0.3854280716072027, + "grad_norm": 712.7125854492188, + "learning_rate": 3.109107973755415e-05, + "loss": 59.2584, + "step": 95400 + }, + { + "epoch": 0.38546847287257036, + "grad_norm": 639.0912475585938, + "learning_rate": 3.1088755834556135e-05, + "loss": 80.6065, + "step": 95410 + }, + { + "epoch": 0.385508874137938, + "grad_norm": 1063.8660888671875, + "learning_rate": 3.108643171537711e-05, + "loss": 84.6094, + "step": 95420 + }, + { + "epoch": 0.38554927540330564, + "grad_norm": 1128.5555419921875, + "learning_rate": 3.10841073800624e-05, + "loss": 79.9747, + "step": 95430 + }, + { + "epoch": 0.3855896766686733, + "grad_norm": 798.6315307617188, + "learning_rate": 3.10817828286573e-05, + "loss": 108.9347, + "step": 95440 + }, + { + "epoch": 0.3856300779340409, + "grad_norm": 890.1449584960938, + "learning_rate": 3.107945806120715e-05, + "loss": 79.5384, + "step": 95450 + }, + { + "epoch": 0.3856704791994085, + "grad_norm": 764.8078002929688, + "learning_rate": 3.1077133077757257e-05, + "loss": 134.5472, + "step": 95460 + }, + { + "epoch": 0.38571088046477614, + "grad_norm": 723.615966796875, + "learning_rate": 3.107480787835295e-05, + "loss": 91.2642, + "step": 95470 + }, + { + "epoch": 0.3857512817301438, + "grad_norm": 676.7576904296875, + "learning_rate": 3.107248246303956e-05, + "loss": 70.5248, + "step": 95480 + }, + { + "epoch": 0.3857916829955114, + "grad_norm": 898.5016479492188, + "learning_rate": 3.1070156831862424e-05, + "loss": 68.3117, + "step": 95490 + }, + { + "epoch": 0.38583208426087906, + "grad_norm": 1279.8253173828125, + "learning_rate": 3.106783098486688e-05, + "loss": 79.1195, + "step": 95500 + }, + { + "epoch": 0.3858724855262467, + "grad_norm": 832.0548095703125, + "learning_rate": 3.106550492209828e-05, + "loss": 96.2294, + "step": 95510 + }, + { + "epoch": 0.38591288679161434, + "grad_norm": 1052.313720703125, + "learning_rate": 3.1063178643601957e-05, + "loss": 91.3209, + "step": 95520 + }, + { + "epoch": 0.3859532880569819, + "grad_norm": 803.5862426757812, + "learning_rate": 3.106085214942328e-05, + "loss": 68.9828, + "step": 95530 + }, + { + "epoch": 0.38599368932234956, + "grad_norm": 482.01934814453125, + "learning_rate": 3.105852543960759e-05, + "loss": 110.4135, + "step": 95540 + }, + { + "epoch": 0.3860340905877172, + "grad_norm": 453.99493408203125, + "learning_rate": 3.105619851420025e-05, + "loss": 109.5462, + "step": 95550 + }, + { + "epoch": 0.38607449185308484, + "grad_norm": 512.3849487304688, + "learning_rate": 3.105387137324663e-05, + "loss": 125.9299, + "step": 95560 + }, + { + "epoch": 0.3861148931184525, + "grad_norm": 1029.5325927734375, + "learning_rate": 3.1051544016792096e-05, + "loss": 84.147, + "step": 95570 + }, + { + "epoch": 0.3861552943838201, + "grad_norm": 589.3048095703125, + "learning_rate": 3.1049216444882024e-05, + "loss": 66.2789, + "step": 95580 + }, + { + "epoch": 0.3861956956491877, + "grad_norm": 703.6959838867188, + "learning_rate": 3.104688865756179e-05, + "loss": 96.377, + "step": 95590 + }, + { + "epoch": 0.38623609691455535, + "grad_norm": 693.5499267578125, + "learning_rate": 3.1044560654876775e-05, + "loss": 139.3513, + "step": 95600 + }, + { + "epoch": 0.386276498179923, + "grad_norm": 811.3140869140625, + "learning_rate": 3.104223243687236e-05, + "loss": 64.5596, + "step": 95610 + }, + { + "epoch": 0.38631689944529063, + "grad_norm": 646.5933837890625, + "learning_rate": 3.1039904003593946e-05, + "loss": 88.9157, + "step": 95620 + }, + { + "epoch": 0.38635730071065827, + "grad_norm": 547.7163696289062, + "learning_rate": 3.103757535508691e-05, + "loss": 60.8257, + "step": 95630 + }, + { + "epoch": 0.3863977019760259, + "grad_norm": 870.7453002929688, + "learning_rate": 3.103524649139667e-05, + "loss": 82.8208, + "step": 95640 + }, + { + "epoch": 0.3864381032413935, + "grad_norm": 439.86962890625, + "learning_rate": 3.103291741256861e-05, + "loss": 73.4113, + "step": 95650 + }, + { + "epoch": 0.38647850450676113, + "grad_norm": 1465.887939453125, + "learning_rate": 3.103058811864815e-05, + "loss": 104.1528, + "step": 95660 + }, + { + "epoch": 0.3865189057721288, + "grad_norm": 860.9616088867188, + "learning_rate": 3.102825860968069e-05, + "loss": 96.7483, + "step": 95670 + }, + { + "epoch": 0.3865593070374964, + "grad_norm": 451.9622497558594, + "learning_rate": 3.102592888571166e-05, + "loss": 259.6797, + "step": 95680 + }, + { + "epoch": 0.38659970830286405, + "grad_norm": 1129.623779296875, + "learning_rate": 3.102359894678645e-05, + "loss": 107.582, + "step": 95690 + }, + { + "epoch": 0.3866401095682317, + "grad_norm": 478.9415283203125, + "learning_rate": 3.1021268792950524e-05, + "loss": 88.0418, + "step": 95700 + }, + { + "epoch": 0.38668051083359933, + "grad_norm": 437.5119934082031, + "learning_rate": 3.101893842424928e-05, + "loss": 82.6388, + "step": 95710 + }, + { + "epoch": 0.3867209120989669, + "grad_norm": 616.69873046875, + "learning_rate": 3.101660784072816e-05, + "loss": 93.8743, + "step": 95720 + }, + { + "epoch": 0.38676131336433456, + "grad_norm": 390.3985900878906, + "learning_rate": 3.10142770424326e-05, + "loss": 84.056, + "step": 95730 + }, + { + "epoch": 0.3868017146297022, + "grad_norm": 458.10546875, + "learning_rate": 3.1011946029408035e-05, + "loss": 100.7985, + "step": 95740 + }, + { + "epoch": 0.38684211589506984, + "grad_norm": 584.795654296875, + "learning_rate": 3.1009614801699914e-05, + "loss": 80.9153, + "step": 95750 + }, + { + "epoch": 0.3868825171604375, + "grad_norm": 643.602294921875, + "learning_rate": 3.100728335935369e-05, + "loss": 113.7123, + "step": 95760 + }, + { + "epoch": 0.3869229184258051, + "grad_norm": 879.37109375, + "learning_rate": 3.10049517024148e-05, + "loss": 82.1858, + "step": 95770 + }, + { + "epoch": 0.3869633196911727, + "grad_norm": 581.0502319335938, + "learning_rate": 3.1002619830928715e-05, + "loss": 54.2651, + "step": 95780 + }, + { + "epoch": 0.38700372095654034, + "grad_norm": 695.6017456054688, + "learning_rate": 3.100028774494089e-05, + "loss": 80.8802, + "step": 95790 + }, + { + "epoch": 0.387044122221908, + "grad_norm": 976.1390991210938, + "learning_rate": 3.09979554444968e-05, + "loss": 61.174, + "step": 95800 + }, + { + "epoch": 0.3870845234872756, + "grad_norm": 783.8121337890625, + "learning_rate": 3.09956229296419e-05, + "loss": 104.0637, + "step": 95810 + }, + { + "epoch": 0.38712492475264326, + "grad_norm": 926.6199340820312, + "learning_rate": 3.0993290200421676e-05, + "loss": 80.1523, + "step": 95820 + }, + { + "epoch": 0.3871653260180109, + "grad_norm": 802.8307495117188, + "learning_rate": 3.099095725688159e-05, + "loss": 72.9094, + "step": 95830 + }, + { + "epoch": 0.38720572728337854, + "grad_norm": 1079.244873046875, + "learning_rate": 3.098862409906714e-05, + "loss": 103.7808, + "step": 95840 + }, + { + "epoch": 0.3872461285487461, + "grad_norm": 549.5213623046875, + "learning_rate": 3.09862907270238e-05, + "loss": 71.2151, + "step": 95850 + }, + { + "epoch": 0.38728652981411377, + "grad_norm": 1079.0091552734375, + "learning_rate": 3.098395714079708e-05, + "loss": 97.4606, + "step": 95860 + }, + { + "epoch": 0.3873269310794814, + "grad_norm": 319.2004089355469, + "learning_rate": 3.0981623340432446e-05, + "loss": 82.449, + "step": 95870 + }, + { + "epoch": 0.38736733234484905, + "grad_norm": 526.2009887695312, + "learning_rate": 3.097928932597542e-05, + "loss": 60.8121, + "step": 95880 + }, + { + "epoch": 0.3874077336102167, + "grad_norm": 881.2676391601562, + "learning_rate": 3.097695509747149e-05, + "loss": 99.8244, + "step": 95890 + }, + { + "epoch": 0.3874481348755843, + "grad_norm": 707.560546875, + "learning_rate": 3.0974620654966175e-05, + "loss": 85.64, + "step": 95900 + }, + { + "epoch": 0.3874885361409519, + "grad_norm": 479.57598876953125, + "learning_rate": 3.097228599850498e-05, + "loss": 89.9828, + "step": 95910 + }, + { + "epoch": 0.38752893740631955, + "grad_norm": 477.0884704589844, + "learning_rate": 3.096995112813341e-05, + "loss": 87.8353, + "step": 95920 + }, + { + "epoch": 0.3875693386716872, + "grad_norm": 386.4364929199219, + "learning_rate": 3.0967616043897e-05, + "loss": 58.4233, + "step": 95930 + }, + { + "epoch": 0.38760973993705483, + "grad_norm": 577.2306518554688, + "learning_rate": 3.096528074584128e-05, + "loss": 84.2169, + "step": 95940 + }, + { + "epoch": 0.38765014120242247, + "grad_norm": 349.28436279296875, + "learning_rate": 3.0962945234011755e-05, + "loss": 79.0374, + "step": 95950 + }, + { + "epoch": 0.3876905424677901, + "grad_norm": 608.3618774414062, + "learning_rate": 3.096060950845397e-05, + "loss": 64.6979, + "step": 95960 + }, + { + "epoch": 0.3877309437331577, + "grad_norm": 823.87939453125, + "learning_rate": 3.0958273569213456e-05, + "loss": 115.831, + "step": 95970 + }, + { + "epoch": 0.38777134499852534, + "grad_norm": 1148.812255859375, + "learning_rate": 3.095593741633577e-05, + "loss": 111.911, + "step": 95980 + }, + { + "epoch": 0.387811746263893, + "grad_norm": 617.203369140625, + "learning_rate": 3.095360104986643e-05, + "loss": 92.9322, + "step": 95990 + }, + { + "epoch": 0.3878521475292606, + "grad_norm": 1474.8809814453125, + "learning_rate": 3.095126446985101e-05, + "loss": 106.791, + "step": 96000 + }, + { + "epoch": 0.38789254879462826, + "grad_norm": 1326.8182373046875, + "learning_rate": 3.0948927676335044e-05, + "loss": 112.9692, + "step": 96010 + }, + { + "epoch": 0.3879329500599959, + "grad_norm": 559.6285400390625, + "learning_rate": 3.09465906693641e-05, + "loss": 98.5289, + "step": 96020 + }, + { + "epoch": 0.38797335132536354, + "grad_norm": 870.7410888671875, + "learning_rate": 3.0944253448983725e-05, + "loss": 140.0265, + "step": 96030 + }, + { + "epoch": 0.3880137525907311, + "grad_norm": 301.7960205078125, + "learning_rate": 3.0941916015239505e-05, + "loss": 53.1204, + "step": 96040 + }, + { + "epoch": 0.38805415385609876, + "grad_norm": 757.552001953125, + "learning_rate": 3.0939578368176997e-05, + "loss": 74.6599, + "step": 96050 + }, + { + "epoch": 0.3880945551214664, + "grad_norm": 726.8504638671875, + "learning_rate": 3.093724050784178e-05, + "loss": 78.6737, + "step": 96060 + }, + { + "epoch": 0.38813495638683404, + "grad_norm": 368.93328857421875, + "learning_rate": 3.093490243427942e-05, + "loss": 62.6013, + "step": 96070 + }, + { + "epoch": 0.3881753576522017, + "grad_norm": 517.4122924804688, + "learning_rate": 3.093256414753551e-05, + "loss": 71.7008, + "step": 96080 + }, + { + "epoch": 0.3882157589175693, + "grad_norm": 840.2767333984375, + "learning_rate": 3.093022564765564e-05, + "loss": 103.6704, + "step": 96090 + }, + { + "epoch": 0.3882561601829369, + "grad_norm": 643.90478515625, + "learning_rate": 3.092788693468539e-05, + "loss": 67.0252, + "step": 96100 + }, + { + "epoch": 0.38829656144830454, + "grad_norm": 600.3934326171875, + "learning_rate": 3.092554800867035e-05, + "loss": 54.8227, + "step": 96110 + }, + { + "epoch": 0.3883369627136722, + "grad_norm": 654.4700927734375, + "learning_rate": 3.092320886965614e-05, + "loss": 86.7017, + "step": 96120 + }, + { + "epoch": 0.3883773639790398, + "grad_norm": 699.956787109375, + "learning_rate": 3.0920869517688336e-05, + "loss": 125.053, + "step": 96130 + }, + { + "epoch": 0.38841776524440746, + "grad_norm": 1009.407958984375, + "learning_rate": 3.091852995281257e-05, + "loss": 98.0712, + "step": 96140 + }, + { + "epoch": 0.3884581665097751, + "grad_norm": 886.2274780273438, + "learning_rate": 3.091619017507443e-05, + "loss": 74.5138, + "step": 96150 + }, + { + "epoch": 0.38849856777514274, + "grad_norm": 803.284912109375, + "learning_rate": 3.091385018451955e-05, + "loss": 120.6429, + "step": 96160 + }, + { + "epoch": 0.38853896904051033, + "grad_norm": 392.4866943359375, + "learning_rate": 3.091150998119354e-05, + "loss": 76.1643, + "step": 96170 + }, + { + "epoch": 0.38857937030587797, + "grad_norm": 2471.52392578125, + "learning_rate": 3.090916956514203e-05, + "loss": 125.5558, + "step": 96180 + }, + { + "epoch": 0.3886197715712456, + "grad_norm": 782.3226928710938, + "learning_rate": 3.090682893641064e-05, + "loss": 92.7032, + "step": 96190 + }, + { + "epoch": 0.38866017283661325, + "grad_norm": 691.4236450195312, + "learning_rate": 3.090448809504501e-05, + "loss": 81.0173, + "step": 96200 + }, + { + "epoch": 0.3887005741019809, + "grad_norm": 717.126953125, + "learning_rate": 3.090214704109076e-05, + "loss": 75.0317, + "step": 96210 + }, + { + "epoch": 0.38874097536734853, + "grad_norm": 1242.1875, + "learning_rate": 3.089980577459354e-05, + "loss": 106.0926, + "step": 96220 + }, + { + "epoch": 0.3887813766327161, + "grad_norm": 347.3780822753906, + "learning_rate": 3.0897464295599e-05, + "loss": 84.0991, + "step": 96230 + }, + { + "epoch": 0.38882177789808375, + "grad_norm": 919.9573974609375, + "learning_rate": 3.0895122604152784e-05, + "loss": 92.0765, + "step": 96240 + }, + { + "epoch": 0.3888621791634514, + "grad_norm": 731.8493041992188, + "learning_rate": 3.0892780700300544e-05, + "loss": 69.4465, + "step": 96250 + }, + { + "epoch": 0.38890258042881903, + "grad_norm": 512.4259033203125, + "learning_rate": 3.0890438584087936e-05, + "loss": 122.4495, + "step": 96260 + }, + { + "epoch": 0.3889429816941867, + "grad_norm": 817.7256469726562, + "learning_rate": 3.088809625556062e-05, + "loss": 57.533, + "step": 96270 + }, + { + "epoch": 0.3889833829595543, + "grad_norm": 705.458984375, + "learning_rate": 3.088575371476426e-05, + "loss": 73.4365, + "step": 96280 + }, + { + "epoch": 0.3890237842249219, + "grad_norm": 637.3275756835938, + "learning_rate": 3.0883410961744536e-05, + "loss": 55.96, + "step": 96290 + }, + { + "epoch": 0.38906418549028954, + "grad_norm": 657.9631958007812, + "learning_rate": 3.08810679965471e-05, + "loss": 104.5706, + "step": 96300 + }, + { + "epoch": 0.3891045867556572, + "grad_norm": 1264.8553466796875, + "learning_rate": 3.087872481921765e-05, + "loss": 137.2773, + "step": 96310 + }, + { + "epoch": 0.3891449880210248, + "grad_norm": 648.2803955078125, + "learning_rate": 3.087638142980185e-05, + "loss": 95.9504, + "step": 96320 + }, + { + "epoch": 0.38918538928639246, + "grad_norm": 500.7957763671875, + "learning_rate": 3.0874037828345406e-05, + "loss": 85.0105, + "step": 96330 + }, + { + "epoch": 0.3892257905517601, + "grad_norm": 1078.1077880859375, + "learning_rate": 3.0871694014893985e-05, + "loss": 77.7343, + "step": 96340 + }, + { + "epoch": 0.38926619181712774, + "grad_norm": 621.4526977539062, + "learning_rate": 3.08693499894933e-05, + "loss": 59.6901, + "step": 96350 + }, + { + "epoch": 0.3893065930824953, + "grad_norm": 637.1646118164062, + "learning_rate": 3.0867005752189037e-05, + "loss": 84.5536, + "step": 96360 + }, + { + "epoch": 0.38934699434786296, + "grad_norm": 635.1956176757812, + "learning_rate": 3.086466130302691e-05, + "loss": 108.0934, + "step": 96370 + }, + { + "epoch": 0.3893873956132306, + "grad_norm": 801.22216796875, + "learning_rate": 3.086231664205261e-05, + "loss": 86.4523, + "step": 96380 + }, + { + "epoch": 0.38942779687859824, + "grad_norm": 504.58770751953125, + "learning_rate": 3.085997176931185e-05, + "loss": 75.0431, + "step": 96390 + }, + { + "epoch": 0.3894681981439659, + "grad_norm": 682.9896240234375, + "learning_rate": 3.0857626684850355e-05, + "loss": 110.7812, + "step": 96400 + }, + { + "epoch": 0.3895085994093335, + "grad_norm": 1005.5950317382812, + "learning_rate": 3.085528138871384e-05, + "loss": 60.0741, + "step": 96410 + }, + { + "epoch": 0.3895490006747011, + "grad_norm": 884.9194946289062, + "learning_rate": 3.085293588094802e-05, + "loss": 63.6746, + "step": 96420 + }, + { + "epoch": 0.38958940194006875, + "grad_norm": 596.928955078125, + "learning_rate": 3.085059016159863e-05, + "loss": 57.4864, + "step": 96430 + }, + { + "epoch": 0.3896298032054364, + "grad_norm": 847.5093383789062, + "learning_rate": 3.08482442307114e-05, + "loss": 63.5935, + "step": 96440 + }, + { + "epoch": 0.389670204470804, + "grad_norm": 1240.4024658203125, + "learning_rate": 3.084589808833206e-05, + "loss": 85.0007, + "step": 96450 + }, + { + "epoch": 0.38971060573617167, + "grad_norm": 878.6253051757812, + "learning_rate": 3.084355173450636e-05, + "loss": 63.0488, + "step": 96460 + }, + { + "epoch": 0.3897510070015393, + "grad_norm": 1035.58251953125, + "learning_rate": 3.084120516928003e-05, + "loss": 66.4905, + "step": 96470 + }, + { + "epoch": 0.38979140826690695, + "grad_norm": 515.03759765625, + "learning_rate": 3.0838858392698825e-05, + "loss": 52.2766, + "step": 96480 + }, + { + "epoch": 0.38983180953227453, + "grad_norm": 1099.466064453125, + "learning_rate": 3.08365114048085e-05, + "loss": 87.894, + "step": 96490 + }, + { + "epoch": 0.38987221079764217, + "grad_norm": 897.2124633789062, + "learning_rate": 3.08341642056548e-05, + "loss": 83.0752, + "step": 96500 + }, + { + "epoch": 0.3899126120630098, + "grad_norm": 710.0145263671875, + "learning_rate": 3.083181679528349e-05, + "loss": 59.8088, + "step": 96510 + }, + { + "epoch": 0.38995301332837745, + "grad_norm": 510.3114929199219, + "learning_rate": 3.0829469173740344e-05, + "loss": 74.4131, + "step": 96520 + }, + { + "epoch": 0.3899934145937451, + "grad_norm": 1219.8282470703125, + "learning_rate": 3.082712134107111e-05, + "loss": 82.0991, + "step": 96530 + }, + { + "epoch": 0.39003381585911273, + "grad_norm": 1056.698486328125, + "learning_rate": 3.0824773297321585e-05, + "loss": 115.2212, + "step": 96540 + }, + { + "epoch": 0.3900742171244803, + "grad_norm": 487.8681640625, + "learning_rate": 3.082242504253752e-05, + "loss": 71.4116, + "step": 96550 + }, + { + "epoch": 0.39011461838984796, + "grad_norm": 1221.7821044921875, + "learning_rate": 3.082007657676471e-05, + "loss": 66.2803, + "step": 96560 + }, + { + "epoch": 0.3901550196552156, + "grad_norm": 1128.1923828125, + "learning_rate": 3.081772790004894e-05, + "loss": 79.6686, + "step": 96570 + }, + { + "epoch": 0.39019542092058324, + "grad_norm": 726.4423828125, + "learning_rate": 3.081537901243599e-05, + "loss": 78.7462, + "step": 96580 + }, + { + "epoch": 0.3902358221859509, + "grad_norm": 849.8167724609375, + "learning_rate": 3.081302991397166e-05, + "loss": 83.0079, + "step": 96590 + }, + { + "epoch": 0.3902762234513185, + "grad_norm": 338.8612976074219, + "learning_rate": 3.081068060470174e-05, + "loss": 88.7557, + "step": 96600 + }, + { + "epoch": 0.3903166247166861, + "grad_norm": 1795.5322265625, + "learning_rate": 3.080833108467204e-05, + "loss": 116.2812, + "step": 96610 + }, + { + "epoch": 0.39035702598205374, + "grad_norm": 816.1395263671875, + "learning_rate": 3.080598135392837e-05, + "loss": 244.8392, + "step": 96620 + }, + { + "epoch": 0.3903974272474214, + "grad_norm": 759.6920166015625, + "learning_rate": 3.080363141251652e-05, + "loss": 89.456, + "step": 96630 + }, + { + "epoch": 0.390437828512789, + "grad_norm": 1007.2218627929688, + "learning_rate": 3.080128126048232e-05, + "loss": 102.8157, + "step": 96640 + }, + { + "epoch": 0.39047822977815666, + "grad_norm": 1321.600830078125, + "learning_rate": 3.0798930897871576e-05, + "loss": 94.8896, + "step": 96650 + }, + { + "epoch": 0.3905186310435243, + "grad_norm": 469.6827087402344, + "learning_rate": 3.079658032473011e-05, + "loss": 65.9005, + "step": 96660 + }, + { + "epoch": 0.39055903230889194, + "grad_norm": 685.8162231445312, + "learning_rate": 3.079422954110376e-05, + "loss": 84.8531, + "step": 96670 + }, + { + "epoch": 0.3905994335742595, + "grad_norm": 781.5623168945312, + "learning_rate": 3.0791878547038345e-05, + "loss": 148.2049, + "step": 96680 + }, + { + "epoch": 0.39063983483962716, + "grad_norm": 498.0014343261719, + "learning_rate": 3.0789527342579705e-05, + "loss": 75.5226, + "step": 96690 + }, + { + "epoch": 0.3906802361049948, + "grad_norm": 707.8844604492188, + "learning_rate": 3.078717592777367e-05, + "loss": 106.7873, + "step": 96700 + }, + { + "epoch": 0.39072063737036244, + "grad_norm": 1159.0010986328125, + "learning_rate": 3.078482430266609e-05, + "loss": 93.5542, + "step": 96710 + }, + { + "epoch": 0.3907610386357301, + "grad_norm": 515.0947875976562, + "learning_rate": 3.07824724673028e-05, + "loss": 93.9025, + "step": 96720 + }, + { + "epoch": 0.3908014399010977, + "grad_norm": 722.517822265625, + "learning_rate": 3.078012042172967e-05, + "loss": 111.9917, + "step": 96730 + }, + { + "epoch": 0.3908418411664653, + "grad_norm": 411.4763488769531, + "learning_rate": 3.077776816599253e-05, + "loss": 61.8083, + "step": 96740 + }, + { + "epoch": 0.39088224243183295, + "grad_norm": 993.5257568359375, + "learning_rate": 3.0775415700137266e-05, + "loss": 115.8156, + "step": 96750 + }, + { + "epoch": 0.3909226436972006, + "grad_norm": 469.0652770996094, + "learning_rate": 3.077306302420971e-05, + "loss": 67.1879, + "step": 96760 + }, + { + "epoch": 0.39096304496256823, + "grad_norm": 861.5034790039062, + "learning_rate": 3.0770710138255754e-05, + "loss": 95.6457, + "step": 96770 + }, + { + "epoch": 0.39100344622793587, + "grad_norm": 1133.357421875, + "learning_rate": 3.076835704232126e-05, + "loss": 78.2084, + "step": 96780 + }, + { + "epoch": 0.3910438474933035, + "grad_norm": 292.7736511230469, + "learning_rate": 3.076600373645209e-05, + "loss": 82.8944, + "step": 96790 + }, + { + "epoch": 0.39108424875867115, + "grad_norm": 825.4213256835938, + "learning_rate": 3.0763650220694145e-05, + "loss": 102.8883, + "step": 96800 + }, + { + "epoch": 0.39112465002403873, + "grad_norm": 415.4195861816406, + "learning_rate": 3.0761296495093294e-05, + "loss": 116.7448, + "step": 96810 + }, + { + "epoch": 0.3911650512894064, + "grad_norm": 563.1967163085938, + "learning_rate": 3.075894255969543e-05, + "loss": 56.6253, + "step": 96820 + }, + { + "epoch": 0.391205452554774, + "grad_norm": 883.0142211914062, + "learning_rate": 3.075658841454644e-05, + "loss": 59.7687, + "step": 96830 + }, + { + "epoch": 0.39124585382014165, + "grad_norm": 911.0853271484375, + "learning_rate": 3.075423405969222e-05, + "loss": 65.7999, + "step": 96840 + }, + { + "epoch": 0.3912862550855093, + "grad_norm": 760.2576293945312, + "learning_rate": 3.075187949517867e-05, + "loss": 96.6613, + "step": 96850 + }, + { + "epoch": 0.39132665635087693, + "grad_norm": 1176.1065673828125, + "learning_rate": 3.07495247210517e-05, + "loss": 126.6594, + "step": 96860 + }, + { + "epoch": 0.3913670576162445, + "grad_norm": 727.1702270507812, + "learning_rate": 3.07471697373572e-05, + "loss": 73.9309, + "step": 96870 + }, + { + "epoch": 0.39140745888161216, + "grad_norm": 657.903076171875, + "learning_rate": 3.0744814544141096e-05, + "loss": 111.3555, + "step": 96880 + }, + { + "epoch": 0.3914478601469798, + "grad_norm": 363.2109375, + "learning_rate": 3.07424591414493e-05, + "loss": 118.8594, + "step": 96890 + }, + { + "epoch": 0.39148826141234744, + "grad_norm": 890.1277465820312, + "learning_rate": 3.0740103529327744e-05, + "loss": 112.4339, + "step": 96900 + }, + { + "epoch": 0.3915286626777151, + "grad_norm": 2147.576416015625, + "learning_rate": 3.0737747707822334e-05, + "loss": 112.5734, + "step": 96910 + }, + { + "epoch": 0.3915690639430827, + "grad_norm": 549.5736083984375, + "learning_rate": 3.0735391676978993e-05, + "loss": 67.0437, + "step": 96920 + }, + { + "epoch": 0.3916094652084503, + "grad_norm": 857.1650390625, + "learning_rate": 3.0733035436843684e-05, + "loss": 101.8063, + "step": 96930 + }, + { + "epoch": 0.39164986647381794, + "grad_norm": 689.0807495117188, + "learning_rate": 3.073067898746231e-05, + "loss": 77.7691, + "step": 96940 + }, + { + "epoch": 0.3916902677391856, + "grad_norm": 819.2293090820312, + "learning_rate": 3.0728322328880825e-05, + "loss": 90.1204, + "step": 96950 + }, + { + "epoch": 0.3917306690045532, + "grad_norm": 427.70562744140625, + "learning_rate": 3.0725965461145175e-05, + "loss": 80.7639, + "step": 96960 + }, + { + "epoch": 0.39177107026992086, + "grad_norm": 355.0552673339844, + "learning_rate": 3.072360838430131e-05, + "loss": 77.5942, + "step": 96970 + }, + { + "epoch": 0.3918114715352885, + "grad_norm": 967.6082763671875, + "learning_rate": 3.0721251098395175e-05, + "loss": 84.967, + "step": 96980 + }, + { + "epoch": 0.39185187280065614, + "grad_norm": 556.22021484375, + "learning_rate": 3.0718893603472725e-05, + "loss": 66.5321, + "step": 96990 + }, + { + "epoch": 0.3918922740660237, + "grad_norm": 767.8169555664062, + "learning_rate": 3.0716535899579936e-05, + "loss": 61.7622, + "step": 97000 + }, + { + "epoch": 0.39193267533139137, + "grad_norm": 797.4469604492188, + "learning_rate": 3.071417798676276e-05, + "loss": 86.0451, + "step": 97010 + }, + { + "epoch": 0.391973076596759, + "grad_norm": 392.773193359375, + "learning_rate": 3.0711819865067165e-05, + "loss": 90.8626, + "step": 97020 + }, + { + "epoch": 0.39201347786212665, + "grad_norm": 345.17547607421875, + "learning_rate": 3.070946153453913e-05, + "loss": 65.1858, + "step": 97030 + }, + { + "epoch": 0.3920538791274943, + "grad_norm": 951.6724243164062, + "learning_rate": 3.070710299522462e-05, + "loss": 62.3224, + "step": 97040 + }, + { + "epoch": 0.3920942803928619, + "grad_norm": 754.866455078125, + "learning_rate": 3.070474424716963e-05, + "loss": 86.8699, + "step": 97050 + }, + { + "epoch": 0.3921346816582295, + "grad_norm": 659.4210205078125, + "learning_rate": 3.0702385290420145e-05, + "loss": 105.2997, + "step": 97060 + }, + { + "epoch": 0.39217508292359715, + "grad_norm": 794.5908203125, + "learning_rate": 3.070002612502215e-05, + "loss": 82.2918, + "step": 97070 + }, + { + "epoch": 0.3922154841889648, + "grad_norm": 649.2097778320312, + "learning_rate": 3.069766675102162e-05, + "loss": 71.5879, + "step": 97080 + }, + { + "epoch": 0.39225588545433243, + "grad_norm": 825.7255249023438, + "learning_rate": 3.0695307168464584e-05, + "loss": 93.7175, + "step": 97090 + }, + { + "epoch": 0.39229628671970007, + "grad_norm": 636.9070434570312, + "learning_rate": 3.069294737739703e-05, + "loss": 101.6263, + "step": 97100 + }, + { + "epoch": 0.3923366879850677, + "grad_norm": 822.4561767578125, + "learning_rate": 3.069058737786496e-05, + "loss": 57.5888, + "step": 97110 + }, + { + "epoch": 0.39237708925043535, + "grad_norm": 548.2891235351562, + "learning_rate": 3.068822716991438e-05, + "loss": 65.2678, + "step": 97120 + }, + { + "epoch": 0.39241749051580294, + "grad_norm": 1085.88525390625, + "learning_rate": 3.068586675359131e-05, + "loss": 93.542, + "step": 97130 + }, + { + "epoch": 0.3924578917811706, + "grad_norm": 1076.5572509765625, + "learning_rate": 3.0683506128941774e-05, + "loss": 91.8148, + "step": 97140 + }, + { + "epoch": 0.3924982930465382, + "grad_norm": 677.1206665039062, + "learning_rate": 3.0681145296011774e-05, + "loss": 104.0458, + "step": 97150 + }, + { + "epoch": 0.39253869431190586, + "grad_norm": 492.255859375, + "learning_rate": 3.067878425484735e-05, + "loss": 74.2042, + "step": 97160 + }, + { + "epoch": 0.3925790955772735, + "grad_norm": 526.3909912109375, + "learning_rate": 3.067642300549453e-05, + "loss": 65.9965, + "step": 97170 + }, + { + "epoch": 0.39261949684264114, + "grad_norm": 773.382568359375, + "learning_rate": 3.067406154799935e-05, + "loss": 92.1618, + "step": 97180 + }, + { + "epoch": 0.3926598981080087, + "grad_norm": 722.2803955078125, + "learning_rate": 3.0671699882407845e-05, + "loss": 106.479, + "step": 97190 + }, + { + "epoch": 0.39270029937337636, + "grad_norm": 488.80450439453125, + "learning_rate": 3.066933800876606e-05, + "loss": 88.1114, + "step": 97200 + }, + { + "epoch": 0.392740700638744, + "grad_norm": 585.908203125, + "learning_rate": 3.066697592712003e-05, + "loss": 70.4683, + "step": 97210 + }, + { + "epoch": 0.39278110190411164, + "grad_norm": 493.4122619628906, + "learning_rate": 3.066461363751582e-05, + "loss": 55.5688, + "step": 97220 + }, + { + "epoch": 0.3928215031694793, + "grad_norm": 1119.3834228515625, + "learning_rate": 3.066225113999946e-05, + "loss": 145.9591, + "step": 97230 + }, + { + "epoch": 0.3928619044348469, + "grad_norm": 416.3084411621094, + "learning_rate": 3.0659888434617045e-05, + "loss": 89.764, + "step": 97240 + }, + { + "epoch": 0.3929023057002145, + "grad_norm": 546.7301635742188, + "learning_rate": 3.06575255214146e-05, + "loss": 69.1713, + "step": 97250 + }, + { + "epoch": 0.39294270696558214, + "grad_norm": 233.58908081054688, + "learning_rate": 3.0655162400438214e-05, + "loss": 86.425, + "step": 97260 + }, + { + "epoch": 0.3929831082309498, + "grad_norm": 1670.6375732421875, + "learning_rate": 3.0652799071733956e-05, + "loss": 89.0, + "step": 97270 + }, + { + "epoch": 0.3930235094963174, + "grad_norm": 1600.6556396484375, + "learning_rate": 3.06504355353479e-05, + "loss": 108.7917, + "step": 97280 + }, + { + "epoch": 0.39306391076168506, + "grad_norm": 1270.0401611328125, + "learning_rate": 3.0648071791326114e-05, + "loss": 73.9757, + "step": 97290 + }, + { + "epoch": 0.3931043120270527, + "grad_norm": 927.1881713867188, + "learning_rate": 3.064570783971468e-05, + "loss": 122.3317, + "step": 97300 + }, + { + "epoch": 0.39314471329242034, + "grad_norm": 855.7843017578125, + "learning_rate": 3.0643343680559704e-05, + "loss": 96.7096, + "step": 97310 + }, + { + "epoch": 0.39318511455778793, + "grad_norm": 753.4629516601562, + "learning_rate": 3.064097931390725e-05, + "loss": 80.4563, + "step": 97320 + }, + { + "epoch": 0.39322551582315557, + "grad_norm": 801.4183349609375, + "learning_rate": 3.0638614739803435e-05, + "loss": 63.5125, + "step": 97330 + }, + { + "epoch": 0.3932659170885232, + "grad_norm": 1515.1300048828125, + "learning_rate": 3.063624995829434e-05, + "loss": 76.4695, + "step": 97340 + }, + { + "epoch": 0.39330631835389085, + "grad_norm": 648.81982421875, + "learning_rate": 3.0633884969426085e-05, + "loss": 59.9005, + "step": 97350 + }, + { + "epoch": 0.3933467196192585, + "grad_norm": 622.2833251953125, + "learning_rate": 3.063151977324476e-05, + "loss": 116.8134, + "step": 97360 + }, + { + "epoch": 0.39338712088462613, + "grad_norm": 792.2048950195312, + "learning_rate": 3.0629154369796494e-05, + "loss": 102.0067, + "step": 97370 + }, + { + "epoch": 0.3934275221499937, + "grad_norm": 1313.85986328125, + "learning_rate": 3.0626788759127384e-05, + "loss": 81.1807, + "step": 97380 + }, + { + "epoch": 0.39346792341536135, + "grad_norm": 547.7554321289062, + "learning_rate": 3.062442294128356e-05, + "loss": 71.2151, + "step": 97390 + }, + { + "epoch": 0.393508324680729, + "grad_norm": 648.1655883789062, + "learning_rate": 3.0622056916311134e-05, + "loss": 115.7443, + "step": 97400 + }, + { + "epoch": 0.39354872594609663, + "grad_norm": 613.3399047851562, + "learning_rate": 3.061969068425625e-05, + "loss": 90.2084, + "step": 97410 + }, + { + "epoch": 0.3935891272114643, + "grad_norm": 429.7276611328125, + "learning_rate": 3.0617324245165014e-05, + "loss": 133.9549, + "step": 97420 + }, + { + "epoch": 0.3936295284768319, + "grad_norm": 994.2064208984375, + "learning_rate": 3.061495759908359e-05, + "loss": 85.955, + "step": 97430 + }, + { + "epoch": 0.3936699297421995, + "grad_norm": 1166.8663330078125, + "learning_rate": 3.0612590746058094e-05, + "loss": 102.4687, + "step": 97440 + }, + { + "epoch": 0.39371033100756714, + "grad_norm": 539.4866333007812, + "learning_rate": 3.0610223686134685e-05, + "loss": 96.3874, + "step": 97450 + }, + { + "epoch": 0.3937507322729348, + "grad_norm": 574.26416015625, + "learning_rate": 3.06078564193595e-05, + "loss": 74.3263, + "step": 97460 + }, + { + "epoch": 0.3937911335383024, + "grad_norm": 319.1677551269531, + "learning_rate": 3.060548894577869e-05, + "loss": 66.5722, + "step": 97470 + }, + { + "epoch": 0.39383153480367006, + "grad_norm": 1223.851806640625, + "learning_rate": 3.060312126543842e-05, + "loss": 83.5869, + "step": 97480 + }, + { + "epoch": 0.3938719360690377, + "grad_norm": 360.67669677734375, + "learning_rate": 3.060075337838483e-05, + "loss": 49.6615, + "step": 97490 + }, + { + "epoch": 0.39391233733440534, + "grad_norm": 1603.10302734375, + "learning_rate": 3.05983852846641e-05, + "loss": 87.2969, + "step": 97500 + }, + { + "epoch": 0.3939527385997729, + "grad_norm": 1448.779541015625, + "learning_rate": 3.05960169843224e-05, + "loss": 94.8405, + "step": 97510 + }, + { + "epoch": 0.39399313986514056, + "grad_norm": 603.0029296875, + "learning_rate": 3.0593648477405885e-05, + "loss": 97.9926, + "step": 97520 + }, + { + "epoch": 0.3940335411305082, + "grad_norm": 990.88525390625, + "learning_rate": 3.0591279763960735e-05, + "loss": 84.3614, + "step": 97530 + }, + { + "epoch": 0.39407394239587584, + "grad_norm": 856.7296752929688, + "learning_rate": 3.058891084403315e-05, + "loss": 130.6562, + "step": 97540 + }, + { + "epoch": 0.3941143436612435, + "grad_norm": 242.98887634277344, + "learning_rate": 3.0586541717669286e-05, + "loss": 67.5632, + "step": 97550 + }, + { + "epoch": 0.3941547449266111, + "grad_norm": 978.5390625, + "learning_rate": 3.058417238491534e-05, + "loss": 106.3573, + "step": 97560 + }, + { + "epoch": 0.3941951461919787, + "grad_norm": 362.48345947265625, + "learning_rate": 3.058180284581751e-05, + "loss": 70.2811, + "step": 97570 + }, + { + "epoch": 0.39423554745734635, + "grad_norm": 427.5005798339844, + "learning_rate": 3.0579433100421974e-05, + "loss": 65.4352, + "step": 97580 + }, + { + "epoch": 0.394275948722714, + "grad_norm": 1359.7452392578125, + "learning_rate": 3.0577063148774955e-05, + "loss": 80.8788, + "step": 97590 + }, + { + "epoch": 0.3943163499880816, + "grad_norm": 487.5601501464844, + "learning_rate": 3.057469299092264e-05, + "loss": 142.4071, + "step": 97600 + }, + { + "epoch": 0.39435675125344927, + "grad_norm": 648.5489501953125, + "learning_rate": 3.0572322626911235e-05, + "loss": 76.4466, + "step": 97610 + }, + { + "epoch": 0.3943971525188169, + "grad_norm": 574.6464233398438, + "learning_rate": 3.056995205678697e-05, + "loss": 68.3041, + "step": 97620 + }, + { + "epoch": 0.39443755378418455, + "grad_norm": 499.7304992675781, + "learning_rate": 3.056758128059603e-05, + "loss": 94.4749, + "step": 97630 + }, + { + "epoch": 0.39447795504955213, + "grad_norm": 1182.07470703125, + "learning_rate": 3.056521029838467e-05, + "loss": 95.0368, + "step": 97640 + }, + { + "epoch": 0.39451835631491977, + "grad_norm": 1022.74609375, + "learning_rate": 3.056283911019909e-05, + "loss": 125.6033, + "step": 97650 + }, + { + "epoch": 0.3945587575802874, + "grad_norm": 565.9879760742188, + "learning_rate": 3.056046771608552e-05, + "loss": 75.4657, + "step": 97660 + }, + { + "epoch": 0.39459915884565505, + "grad_norm": 888.713623046875, + "learning_rate": 3.0558096116090206e-05, + "loss": 99.4497, + "step": 97670 + }, + { + "epoch": 0.3946395601110227, + "grad_norm": 889.120849609375, + "learning_rate": 3.055572431025936e-05, + "loss": 94.511, + "step": 97680 + }, + { + "epoch": 0.39467996137639033, + "grad_norm": 1015.9010620117188, + "learning_rate": 3.055335229863924e-05, + "loss": 66.3761, + "step": 97690 + }, + { + "epoch": 0.3947203626417579, + "grad_norm": 621.0772705078125, + "learning_rate": 3.0550980081276075e-05, + "loss": 59.0628, + "step": 97700 + }, + { + "epoch": 0.39476076390712556, + "grad_norm": 605.052734375, + "learning_rate": 3.054860765821613e-05, + "loss": 98.0753, + "step": 97710 + }, + { + "epoch": 0.3948011651724932, + "grad_norm": 531.7883911132812, + "learning_rate": 3.054623502950565e-05, + "loss": 73.0067, + "step": 97720 + }, + { + "epoch": 0.39484156643786084, + "grad_norm": 1916.9913330078125, + "learning_rate": 3.054386219519088e-05, + "loss": 120.5387, + "step": 97730 + }, + { + "epoch": 0.3948819677032285, + "grad_norm": 794.3257446289062, + "learning_rate": 3.0541489155318096e-05, + "loss": 86.6966, + "step": 97740 + }, + { + "epoch": 0.3949223689685961, + "grad_norm": 1333.75146484375, + "learning_rate": 3.0539115909933554e-05, + "loss": 98.608, + "step": 97750 + }, + { + "epoch": 0.3949627702339637, + "grad_norm": 665.3630981445312, + "learning_rate": 3.053674245908352e-05, + "loss": 91.4593, + "step": 97760 + }, + { + "epoch": 0.39500317149933134, + "grad_norm": 627.7733154296875, + "learning_rate": 3.0534368802814266e-05, + "loss": 126.0689, + "step": 97770 + }, + { + "epoch": 0.395043572764699, + "grad_norm": 749.7200927734375, + "learning_rate": 3.053199494117206e-05, + "loss": 82.6285, + "step": 97780 + }, + { + "epoch": 0.3950839740300666, + "grad_norm": 833.488525390625, + "learning_rate": 3.0529620874203204e-05, + "loss": 105.4218, + "step": 97790 + }, + { + "epoch": 0.39512437529543426, + "grad_norm": 479.1595458984375, + "learning_rate": 3.052724660195396e-05, + "loss": 53.666, + "step": 97800 + }, + { + "epoch": 0.3951647765608019, + "grad_norm": 1871.7012939453125, + "learning_rate": 3.052487212447063e-05, + "loss": 117.0253, + "step": 97810 + }, + { + "epoch": 0.39520517782616954, + "grad_norm": 324.8230285644531, + "learning_rate": 3.05224974417995e-05, + "loss": 61.8451, + "step": 97820 + }, + { + "epoch": 0.3952455790915371, + "grad_norm": 521.6089477539062, + "learning_rate": 3.0520122553986855e-05, + "loss": 72.3224, + "step": 97830 + }, + { + "epoch": 0.39528598035690476, + "grad_norm": 1259.6353759765625, + "learning_rate": 3.051774746107901e-05, + "loss": 111.6072, + "step": 97840 + }, + { + "epoch": 0.3953263816222724, + "grad_norm": 446.57196044921875, + "learning_rate": 3.051537216312227e-05, + "loss": 76.1046, + "step": 97850 + }, + { + "epoch": 0.39536678288764004, + "grad_norm": 641.167724609375, + "learning_rate": 3.0512996660162923e-05, + "loss": 82.9897, + "step": 97860 + }, + { + "epoch": 0.3954071841530077, + "grad_norm": 893.676513671875, + "learning_rate": 3.05106209522473e-05, + "loss": 116.2442, + "step": 97870 + }, + { + "epoch": 0.3954475854183753, + "grad_norm": 397.1409606933594, + "learning_rate": 3.0508245039421713e-05, + "loss": 50.9891, + "step": 97880 + }, + { + "epoch": 0.3954879866837429, + "grad_norm": 650.3959350585938, + "learning_rate": 3.050586892173248e-05, + "loss": 91.4745, + "step": 97890 + }, + { + "epoch": 0.39552838794911055, + "grad_norm": 959.4873657226562, + "learning_rate": 3.0503492599225918e-05, + "loss": 138.1638, + "step": 97900 + }, + { + "epoch": 0.3955687892144782, + "grad_norm": 506.4880676269531, + "learning_rate": 3.0501116071948363e-05, + "loss": 89.7207, + "step": 97910 + }, + { + "epoch": 0.39560919047984583, + "grad_norm": 331.8837585449219, + "learning_rate": 3.0498739339946143e-05, + "loss": 63.1453, + "step": 97920 + }, + { + "epoch": 0.39564959174521347, + "grad_norm": 518.7547607421875, + "learning_rate": 3.0496362403265594e-05, + "loss": 81.8267, + "step": 97930 + }, + { + "epoch": 0.3956899930105811, + "grad_norm": 912.126953125, + "learning_rate": 3.0493985261953056e-05, + "loss": 134.0775, + "step": 97940 + }, + { + "epoch": 0.39573039427594875, + "grad_norm": 725.7486572265625, + "learning_rate": 3.0491607916054867e-05, + "loss": 56.3376, + "step": 97950 + }, + { + "epoch": 0.39577079554131633, + "grad_norm": 1448.6094970703125, + "learning_rate": 3.048923036561739e-05, + "loss": 79.0782, + "step": 97960 + }, + { + "epoch": 0.395811196806684, + "grad_norm": 508.8824768066406, + "learning_rate": 3.0486852610686967e-05, + "loss": 84.799, + "step": 97970 + }, + { + "epoch": 0.3958515980720516, + "grad_norm": 596.7879638671875, + "learning_rate": 3.0484474651309947e-05, + "loss": 71.2581, + "step": 97980 + }, + { + "epoch": 0.39589199933741925, + "grad_norm": 936.7969360351562, + "learning_rate": 3.04820964875327e-05, + "loss": 68.5976, + "step": 97990 + }, + { + "epoch": 0.3959324006027869, + "grad_norm": 291.3824462890625, + "learning_rate": 3.0479718119401588e-05, + "loss": 107.212, + "step": 98000 + }, + { + "epoch": 0.39597280186815453, + "grad_norm": 656.869384765625, + "learning_rate": 3.0477339546962974e-05, + "loss": 52.5708, + "step": 98010 + }, + { + "epoch": 0.3960132031335221, + "grad_norm": 591.0899658203125, + "learning_rate": 3.0474960770263223e-05, + "loss": 83.754, + "step": 98020 + }, + { + "epoch": 0.39605360439888976, + "grad_norm": 670.6320190429688, + "learning_rate": 3.0472581789348728e-05, + "loss": 105.7439, + "step": 98030 + }, + { + "epoch": 0.3960940056642574, + "grad_norm": 565.4133911132812, + "learning_rate": 3.0470202604265858e-05, + "loss": 62.5353, + "step": 98040 + }, + { + "epoch": 0.39613440692962504, + "grad_norm": 1179.569091796875, + "learning_rate": 3.0467823215060997e-05, + "loss": 64.8064, + "step": 98050 + }, + { + "epoch": 0.3961748081949927, + "grad_norm": 591.3402709960938, + "learning_rate": 3.046544362178053e-05, + "loss": 133.6274, + "step": 98060 + }, + { + "epoch": 0.3962152094603603, + "grad_norm": 326.0115051269531, + "learning_rate": 3.0463063824470854e-05, + "loss": 88.5847, + "step": 98070 + }, + { + "epoch": 0.3962556107257279, + "grad_norm": 1052.79150390625, + "learning_rate": 3.0460683823178366e-05, + "loss": 115.2567, + "step": 98080 + }, + { + "epoch": 0.39629601199109554, + "grad_norm": 753.7482299804688, + "learning_rate": 3.045830361794946e-05, + "loss": 68.8814, + "step": 98090 + }, + { + "epoch": 0.3963364132564632, + "grad_norm": 988.0963745117188, + "learning_rate": 3.0455923208830532e-05, + "loss": 89.5338, + "step": 98100 + }, + { + "epoch": 0.3963768145218308, + "grad_norm": 716.3264770507812, + "learning_rate": 3.0453542595868008e-05, + "loss": 80.6079, + "step": 98110 + }, + { + "epoch": 0.39641721578719846, + "grad_norm": 1781.496337890625, + "learning_rate": 3.045116177910829e-05, + "loss": 96.1841, + "step": 98120 + }, + { + "epoch": 0.3964576170525661, + "grad_norm": 795.7607421875, + "learning_rate": 3.044878075859778e-05, + "loss": 80.5134, + "step": 98130 + }, + { + "epoch": 0.39649801831793374, + "grad_norm": 599.345947265625, + "learning_rate": 3.0446399534382923e-05, + "loss": 53.8889, + "step": 98140 + }, + { + "epoch": 0.3965384195833013, + "grad_norm": 1013.7322387695312, + "learning_rate": 3.0444018106510122e-05, + "loss": 64.7844, + "step": 98150 + }, + { + "epoch": 0.39657882084866897, + "grad_norm": 585.30419921875, + "learning_rate": 3.0441636475025816e-05, + "loss": 76.0617, + "step": 98160 + }, + { + "epoch": 0.3966192221140366, + "grad_norm": 701.74951171875, + "learning_rate": 3.0439254639976427e-05, + "loss": 87.0865, + "step": 98170 + }, + { + "epoch": 0.39665962337940425, + "grad_norm": 848.7946166992188, + "learning_rate": 3.0436872601408392e-05, + "loss": 88.0441, + "step": 98180 + }, + { + "epoch": 0.3967000246447719, + "grad_norm": 614.0780639648438, + "learning_rate": 3.0434490359368155e-05, + "loss": 74.0084, + "step": 98190 + }, + { + "epoch": 0.3967404259101395, + "grad_norm": 845.06591796875, + "learning_rate": 3.0432107913902162e-05, + "loss": 88.8436, + "step": 98200 + }, + { + "epoch": 0.3967808271755071, + "grad_norm": 1059.6448974609375, + "learning_rate": 3.0429725265056843e-05, + "loss": 68.996, + "step": 98210 + }, + { + "epoch": 0.39682122844087475, + "grad_norm": 938.8751220703125, + "learning_rate": 3.0427342412878666e-05, + "loss": 64.346, + "step": 98220 + }, + { + "epoch": 0.3968616297062424, + "grad_norm": 908.12890625, + "learning_rate": 3.042495935741409e-05, + "loss": 80.7446, + "step": 98230 + }, + { + "epoch": 0.39690203097161003, + "grad_norm": 962.1690063476562, + "learning_rate": 3.0422576098709554e-05, + "loss": 77.9753, + "step": 98240 + }, + { + "epoch": 0.39694243223697767, + "grad_norm": 694.2074584960938, + "learning_rate": 3.0420192636811535e-05, + "loss": 80.3033, + "step": 98250 + }, + { + "epoch": 0.3969828335023453, + "grad_norm": 669.4580688476562, + "learning_rate": 3.0417808971766488e-05, + "loss": 83.268, + "step": 98260 + }, + { + "epoch": 0.39702323476771295, + "grad_norm": 682.94921875, + "learning_rate": 3.0415425103620893e-05, + "loss": 73.6248, + "step": 98270 + }, + { + "epoch": 0.39706363603308054, + "grad_norm": 902.498779296875, + "learning_rate": 3.041304103242123e-05, + "loss": 84.3309, + "step": 98280 + }, + { + "epoch": 0.3971040372984482, + "grad_norm": 676.5170288085938, + "learning_rate": 3.041065675821397e-05, + "loss": 81.9521, + "step": 98290 + }, + { + "epoch": 0.3971444385638158, + "grad_norm": 350.4389343261719, + "learning_rate": 3.0408272281045598e-05, + "loss": 70.996, + "step": 98300 + }, + { + "epoch": 0.39718483982918346, + "grad_norm": 854.7051391601562, + "learning_rate": 3.0405887600962592e-05, + "loss": 80.5255, + "step": 98310 + }, + { + "epoch": 0.3972252410945511, + "grad_norm": 705.3930053710938, + "learning_rate": 3.0403502718011456e-05, + "loss": 72.2984, + "step": 98320 + }, + { + "epoch": 0.39726564235991874, + "grad_norm": 593.5565185546875, + "learning_rate": 3.040111763223868e-05, + "loss": 61.1411, + "step": 98330 + }, + { + "epoch": 0.3973060436252863, + "grad_norm": 558.498291015625, + "learning_rate": 3.0398732343690755e-05, + "loss": 74.2613, + "step": 98340 + }, + { + "epoch": 0.39734644489065396, + "grad_norm": 549.3425903320312, + "learning_rate": 3.039634685241419e-05, + "loss": 62.4232, + "step": 98350 + }, + { + "epoch": 0.3973868461560216, + "grad_norm": 774.7797241210938, + "learning_rate": 3.0393961158455494e-05, + "loss": 111.8854, + "step": 98360 + }, + { + "epoch": 0.39742724742138924, + "grad_norm": 394.28302001953125, + "learning_rate": 3.0391575261861175e-05, + "loss": 69.6373, + "step": 98370 + }, + { + "epoch": 0.3974676486867569, + "grad_norm": 560.3698120117188, + "learning_rate": 3.0389189162677746e-05, + "loss": 62.2665, + "step": 98380 + }, + { + "epoch": 0.3975080499521245, + "grad_norm": 986.34619140625, + "learning_rate": 3.0386802860951722e-05, + "loss": 91.5001, + "step": 98390 + }, + { + "epoch": 0.3975484512174921, + "grad_norm": 1135.082763671875, + "learning_rate": 3.038441635672963e-05, + "loss": 92.2774, + "step": 98400 + }, + { + "epoch": 0.39758885248285974, + "grad_norm": 704.1770629882812, + "learning_rate": 3.0382029650058e-05, + "loss": 80.8532, + "step": 98410 + }, + { + "epoch": 0.3976292537482274, + "grad_norm": 765.6166381835938, + "learning_rate": 3.037964274098335e-05, + "loss": 67.4885, + "step": 98420 + }, + { + "epoch": 0.397669655013595, + "grad_norm": 865.6827392578125, + "learning_rate": 3.0377255629552222e-05, + "loss": 78.3381, + "step": 98430 + }, + { + "epoch": 0.39771005627896266, + "grad_norm": 663.5968627929688, + "learning_rate": 3.037486831581115e-05, + "loss": 77.0189, + "step": 98440 + }, + { + "epoch": 0.3977504575443303, + "grad_norm": 1004.5172729492188, + "learning_rate": 3.0372480799806686e-05, + "loss": 100.9848, + "step": 98450 + }, + { + "epoch": 0.39779085880969794, + "grad_norm": 1230.0789794921875, + "learning_rate": 3.0370093081585366e-05, + "loss": 140.0623, + "step": 98460 + }, + { + "epoch": 0.39783126007506553, + "grad_norm": 662.4204711914062, + "learning_rate": 3.036770516119374e-05, + "loss": 139.6113, + "step": 98470 + }, + { + "epoch": 0.39787166134043317, + "grad_norm": 1101.733642578125, + "learning_rate": 3.0365317038678362e-05, + "loss": 70.0445, + "step": 98480 + }, + { + "epoch": 0.3979120626058008, + "grad_norm": 1371.86083984375, + "learning_rate": 3.0362928714085804e-05, + "loss": 68.7417, + "step": 98490 + }, + { + "epoch": 0.39795246387116845, + "grad_norm": 586.881591796875, + "learning_rate": 3.036054018746261e-05, + "loss": 68.8815, + "step": 98500 + }, + { + "epoch": 0.3979928651365361, + "grad_norm": 547.4058837890625, + "learning_rate": 3.0358151458855345e-05, + "loss": 85.4842, + "step": 98510 + }, + { + "epoch": 0.39803326640190373, + "grad_norm": 906.71630859375, + "learning_rate": 3.0355762528310588e-05, + "loss": 51.5357, + "step": 98520 + }, + { + "epoch": 0.3980736676672713, + "grad_norm": 1788.11279296875, + "learning_rate": 3.035337339587491e-05, + "loss": 80.5319, + "step": 98530 + }, + { + "epoch": 0.39811406893263895, + "grad_norm": 1238.6217041015625, + "learning_rate": 3.035098406159489e-05, + "loss": 123.4659, + "step": 98540 + }, + { + "epoch": 0.3981544701980066, + "grad_norm": 906.12255859375, + "learning_rate": 3.0348594525517102e-05, + "loss": 72.1918, + "step": 98550 + }, + { + "epoch": 0.39819487146337423, + "grad_norm": 306.21539306640625, + "learning_rate": 3.0346204787688137e-05, + "loss": 91.2362, + "step": 98560 + }, + { + "epoch": 0.3982352727287419, + "grad_norm": 383.43341064453125, + "learning_rate": 3.0343814848154586e-05, + "loss": 47.9069, + "step": 98570 + }, + { + "epoch": 0.3982756739941095, + "grad_norm": 815.9132080078125, + "learning_rate": 3.0341424706963035e-05, + "loss": 90.6244, + "step": 98580 + }, + { + "epoch": 0.39831607525947715, + "grad_norm": 778.144775390625, + "learning_rate": 3.033903436416009e-05, + "loss": 85.0665, + "step": 98590 + }, + { + "epoch": 0.39835647652484474, + "grad_norm": 1028.6190185546875, + "learning_rate": 3.0336643819792342e-05, + "loss": 97.0894, + "step": 98600 + }, + { + "epoch": 0.3983968777902124, + "grad_norm": 888.2212524414062, + "learning_rate": 3.0334253073906406e-05, + "loss": 103.8706, + "step": 98610 + }, + { + "epoch": 0.39843727905558, + "grad_norm": 563.32373046875, + "learning_rate": 3.0331862126548883e-05, + "loss": 74.2032, + "step": 98620 + }, + { + "epoch": 0.39847768032094766, + "grad_norm": 495.56427001953125, + "learning_rate": 3.0329470977766387e-05, + "loss": 89.3122, + "step": 98630 + }, + { + "epoch": 0.3985180815863153, + "grad_norm": 739.9896240234375, + "learning_rate": 3.0327079627605534e-05, + "loss": 65.3959, + "step": 98640 + }, + { + "epoch": 0.39855848285168294, + "grad_norm": 625.0350952148438, + "learning_rate": 3.0324688076112953e-05, + "loss": 63.1011, + "step": 98650 + }, + { + "epoch": 0.3985988841170505, + "grad_norm": 483.5847473144531, + "learning_rate": 3.0322296323335263e-05, + "loss": 101.6274, + "step": 98660 + }, + { + "epoch": 0.39863928538241816, + "grad_norm": 725.4774169921875, + "learning_rate": 3.031990436931909e-05, + "loss": 60.2615, + "step": 98670 + }, + { + "epoch": 0.3986796866477858, + "grad_norm": 562.6581420898438, + "learning_rate": 3.0317512214111066e-05, + "loss": 74.9159, + "step": 98680 + }, + { + "epoch": 0.39872008791315344, + "grad_norm": 1077.9549560546875, + "learning_rate": 3.031511985775783e-05, + "loss": 89.4985, + "step": 98690 + }, + { + "epoch": 0.3987604891785211, + "grad_norm": 487.0472412109375, + "learning_rate": 3.0312727300306024e-05, + "loss": 71.7898, + "step": 98700 + }, + { + "epoch": 0.3988008904438887, + "grad_norm": 507.8310241699219, + "learning_rate": 3.0310334541802287e-05, + "loss": 106.8057, + "step": 98710 + }, + { + "epoch": 0.3988412917092563, + "grad_norm": 515.6994018554688, + "learning_rate": 3.030794158229327e-05, + "loss": 92.5903, + "step": 98720 + }, + { + "epoch": 0.39888169297462395, + "grad_norm": 2138.306884765625, + "learning_rate": 3.030554842182563e-05, + "loss": 96.4932, + "step": 98730 + }, + { + "epoch": 0.3989220942399916, + "grad_norm": 544.39794921875, + "learning_rate": 3.0303155060446014e-05, + "loss": 63.6475, + "step": 98740 + }, + { + "epoch": 0.3989624955053592, + "grad_norm": 636.1758422851562, + "learning_rate": 3.030076149820109e-05, + "loss": 82.0563, + "step": 98750 + }, + { + "epoch": 0.39900289677072687, + "grad_norm": 768.5596923828125, + "learning_rate": 3.029836773513751e-05, + "loss": 117.9781, + "step": 98760 + }, + { + "epoch": 0.3990432980360945, + "grad_norm": 667.6884765625, + "learning_rate": 3.0295973771301956e-05, + "loss": 60.8177, + "step": 98770 + }, + { + "epoch": 0.39908369930146215, + "grad_norm": 468.7426452636719, + "learning_rate": 3.0293579606741084e-05, + "loss": 59.5037, + "step": 98780 + }, + { + "epoch": 0.39912410056682973, + "grad_norm": 498.9030456542969, + "learning_rate": 3.0291185241501587e-05, + "loss": 108.4103, + "step": 98790 + }, + { + "epoch": 0.39916450183219737, + "grad_norm": 942.607177734375, + "learning_rate": 3.028879067563013e-05, + "loss": 57.9381, + "step": 98800 + }, + { + "epoch": 0.399204903097565, + "grad_norm": 298.77880859375, + "learning_rate": 3.0286395909173403e-05, + "loss": 74.9718, + "step": 98810 + }, + { + "epoch": 0.39924530436293265, + "grad_norm": 1164.162353515625, + "learning_rate": 3.0284000942178095e-05, + "loss": 83.3274, + "step": 98820 + }, + { + "epoch": 0.3992857056283003, + "grad_norm": 385.4676513671875, + "learning_rate": 3.028160577469089e-05, + "loss": 112.2805, + "step": 98830 + }, + { + "epoch": 0.39932610689366793, + "grad_norm": 1375.18701171875, + "learning_rate": 3.0279210406758493e-05, + "loss": 128.8985, + "step": 98840 + }, + { + "epoch": 0.3993665081590355, + "grad_norm": 958.577392578125, + "learning_rate": 3.0276814838427593e-05, + "loss": 94.9293, + "step": 98850 + }, + { + "epoch": 0.39940690942440316, + "grad_norm": 587.9575805664062, + "learning_rate": 3.02744190697449e-05, + "loss": 81.9739, + "step": 98860 + }, + { + "epoch": 0.3994473106897708, + "grad_norm": 785.913330078125, + "learning_rate": 3.027202310075711e-05, + "loss": 86.037, + "step": 98870 + }, + { + "epoch": 0.39948771195513844, + "grad_norm": 518.3419799804688, + "learning_rate": 3.026962693151094e-05, + "loss": 68.4747, + "step": 98880 + }, + { + "epoch": 0.3995281132205061, + "grad_norm": 437.0054016113281, + "learning_rate": 3.0267230562053113e-05, + "loss": 56.3913, + "step": 98890 + }, + { + "epoch": 0.3995685144858737, + "grad_norm": 435.235107421875, + "learning_rate": 3.0264833992430343e-05, + "loss": 119.2782, + "step": 98900 + }, + { + "epoch": 0.39960891575124136, + "grad_norm": 792.4741821289062, + "learning_rate": 3.0262437222689344e-05, + "loss": 87.0967, + "step": 98910 + }, + { + "epoch": 0.39964931701660894, + "grad_norm": 690.3010864257812, + "learning_rate": 3.0260040252876856e-05, + "loss": 89.4784, + "step": 98920 + }, + { + "epoch": 0.3996897182819766, + "grad_norm": 635.8654174804688, + "learning_rate": 3.0257643083039587e-05, + "loss": 99.8019, + "step": 98930 + }, + { + "epoch": 0.3997301195473442, + "grad_norm": 811.9600219726562, + "learning_rate": 3.0255245713224294e-05, + "loss": 97.4382, + "step": 98940 + }, + { + "epoch": 0.39977052081271186, + "grad_norm": 589.4910888671875, + "learning_rate": 3.0252848143477706e-05, + "loss": 61.2517, + "step": 98950 + }, + { + "epoch": 0.3998109220780795, + "grad_norm": 499.3891906738281, + "learning_rate": 3.025045037384656e-05, + "loss": 85.1492, + "step": 98960 + }, + { + "epoch": 0.39985132334344714, + "grad_norm": 643.8739013671875, + "learning_rate": 3.0248052404377613e-05, + "loss": 65.5834, + "step": 98970 + }, + { + "epoch": 0.3998917246088147, + "grad_norm": 1272.871337890625, + "learning_rate": 3.0245654235117605e-05, + "loss": 143.5662, + "step": 98980 + }, + { + "epoch": 0.39993212587418236, + "grad_norm": 520.2876586914062, + "learning_rate": 3.0243255866113292e-05, + "loss": 81.902, + "step": 98990 + }, + { + "epoch": 0.39997252713955, + "grad_norm": 1611.1883544921875, + "learning_rate": 3.024085729741143e-05, + "loss": 97.9037, + "step": 99000 + }, + { + "epoch": 0.40001292840491764, + "grad_norm": 812.41748046875, + "learning_rate": 3.0238458529058792e-05, + "loss": 94.681, + "step": 99010 + }, + { + "epoch": 0.4000533296702853, + "grad_norm": 1087.806640625, + "learning_rate": 3.023605956110213e-05, + "loss": 97.537, + "step": 99020 + }, + { + "epoch": 0.4000937309356529, + "grad_norm": 545.8439331054688, + "learning_rate": 3.0233660393588217e-05, + "loss": 97.0779, + "step": 99030 + }, + { + "epoch": 0.4001341322010205, + "grad_norm": 920.5435180664062, + "learning_rate": 3.0231261026563817e-05, + "loss": 78.5169, + "step": 99040 + }, + { + "epoch": 0.40017453346638815, + "grad_norm": 1438.809326171875, + "learning_rate": 3.022886146007572e-05, + "loss": 95.4042, + "step": 99050 + }, + { + "epoch": 0.4002149347317558, + "grad_norm": 650.44091796875, + "learning_rate": 3.0226461694170706e-05, + "loss": 67.4997, + "step": 99060 + }, + { + "epoch": 0.40025533599712343, + "grad_norm": 437.4232177734375, + "learning_rate": 3.022406172889556e-05, + "loss": 78.8292, + "step": 99070 + }, + { + "epoch": 0.40029573726249107, + "grad_norm": 431.8522644042969, + "learning_rate": 3.0221661564297062e-05, + "loss": 65.4507, + "step": 99080 + }, + { + "epoch": 0.4003361385278587, + "grad_norm": 627.8270263671875, + "learning_rate": 3.021926120042201e-05, + "loss": 89.4808, + "step": 99090 + }, + { + "epoch": 0.40037653979322635, + "grad_norm": 550.3790893554688, + "learning_rate": 3.02168606373172e-05, + "loss": 77.0566, + "step": 99100 + }, + { + "epoch": 0.40041694105859393, + "grad_norm": 825.8607177734375, + "learning_rate": 3.0214459875029437e-05, + "loss": 83.2452, + "step": 99110 + }, + { + "epoch": 0.4004573423239616, + "grad_norm": 315.41314697265625, + "learning_rate": 3.0212058913605512e-05, + "loss": 85.2819, + "step": 99120 + }, + { + "epoch": 0.4004977435893292, + "grad_norm": 464.0275573730469, + "learning_rate": 3.0209657753092242e-05, + "loss": 77.3277, + "step": 99130 + }, + { + "epoch": 0.40053814485469685, + "grad_norm": 1253.79052734375, + "learning_rate": 3.0207256393536438e-05, + "loss": 90.5315, + "step": 99140 + }, + { + "epoch": 0.4005785461200645, + "grad_norm": 698.44970703125, + "learning_rate": 3.020485483498492e-05, + "loss": 77.992, + "step": 99150 + }, + { + "epoch": 0.40061894738543213, + "grad_norm": 1181.1234130859375, + "learning_rate": 3.0202453077484496e-05, + "loss": 109.0137, + "step": 99160 + }, + { + "epoch": 0.4006593486507997, + "grad_norm": 1740.4613037109375, + "learning_rate": 3.0200051121081996e-05, + "loss": 91.0694, + "step": 99170 + }, + { + "epoch": 0.40069974991616736, + "grad_norm": 1024.923828125, + "learning_rate": 3.0197648965824258e-05, + "loss": 91.2833, + "step": 99180 + }, + { + "epoch": 0.400740151181535, + "grad_norm": 541.4926147460938, + "learning_rate": 3.01952466117581e-05, + "loss": 79.982, + "step": 99190 + }, + { + "epoch": 0.40078055244690264, + "grad_norm": 716.5746459960938, + "learning_rate": 3.0192844058930356e-05, + "loss": 76.8696, + "step": 99200 + }, + { + "epoch": 0.4008209537122703, + "grad_norm": 1153.876953125, + "learning_rate": 3.019044130738787e-05, + "loss": 121.415, + "step": 99210 + }, + { + "epoch": 0.4008613549776379, + "grad_norm": 1168.063232421875, + "learning_rate": 3.0188038357177497e-05, + "loss": 59.6883, + "step": 99220 + }, + { + "epoch": 0.40090175624300556, + "grad_norm": 366.7137451171875, + "learning_rate": 3.0185635208346053e-05, + "loss": 59.9613, + "step": 99230 + }, + { + "epoch": 0.40094215750837314, + "grad_norm": 428.3357849121094, + "learning_rate": 3.0183231860940412e-05, + "loss": 76.3481, + "step": 99240 + }, + { + "epoch": 0.4009825587737408, + "grad_norm": 1282.789794921875, + "learning_rate": 3.018082831500743e-05, + "loss": 76.8095, + "step": 99250 + }, + { + "epoch": 0.4010229600391084, + "grad_norm": 957.7493896484375, + "learning_rate": 3.0178424570593954e-05, + "loss": 80.6471, + "step": 99260 + }, + { + "epoch": 0.40106336130447606, + "grad_norm": 766.3256225585938, + "learning_rate": 3.0176020627746853e-05, + "loss": 128.2472, + "step": 99270 + }, + { + "epoch": 0.4011037625698437, + "grad_norm": 568.9763793945312, + "learning_rate": 3.0173616486512983e-05, + "loss": 100.8278, + "step": 99280 + }, + { + "epoch": 0.40114416383521134, + "grad_norm": 1855.404052734375, + "learning_rate": 3.017121214693923e-05, + "loss": 92.8454, + "step": 99290 + }, + { + "epoch": 0.4011845651005789, + "grad_norm": 853.8922119140625, + "learning_rate": 3.016880760907246e-05, + "loss": 90.6379, + "step": 99300 + }, + { + "epoch": 0.40122496636594657, + "grad_norm": 546.8419189453125, + "learning_rate": 3.0166402872959547e-05, + "loss": 95.8965, + "step": 99310 + }, + { + "epoch": 0.4012653676313142, + "grad_norm": 515.338623046875, + "learning_rate": 3.0163997938647377e-05, + "loss": 93.8508, + "step": 99320 + }, + { + "epoch": 0.40130576889668185, + "grad_norm": 441.02252197265625, + "learning_rate": 3.0161592806182826e-05, + "loss": 83.6632, + "step": 99330 + }, + { + "epoch": 0.4013461701620495, + "grad_norm": 789.6459350585938, + "learning_rate": 3.01591874756128e-05, + "loss": 99.5036, + "step": 99340 + }, + { + "epoch": 0.4013865714274171, + "grad_norm": 617.9428100585938, + "learning_rate": 3.0156781946984187e-05, + "loss": 91.9187, + "step": 99350 + }, + { + "epoch": 0.4014269726927847, + "grad_norm": 1097.3756103515625, + "learning_rate": 3.0154376220343883e-05, + "loss": 58.1674, + "step": 99360 + }, + { + "epoch": 0.40146737395815235, + "grad_norm": 727.1930541992188, + "learning_rate": 3.0151970295738775e-05, + "loss": 75.1414, + "step": 99370 + }, + { + "epoch": 0.40150777522352, + "grad_norm": 495.87744140625, + "learning_rate": 3.0149564173215786e-05, + "loss": 64.4659, + "step": 99380 + }, + { + "epoch": 0.40154817648888763, + "grad_norm": 1119.5023193359375, + "learning_rate": 3.014715785282182e-05, + "loss": 109.454, + "step": 99390 + }, + { + "epoch": 0.40158857775425527, + "grad_norm": 301.1625061035156, + "learning_rate": 3.0144751334603787e-05, + "loss": 78.9939, + "step": 99400 + }, + { + "epoch": 0.4016289790196229, + "grad_norm": 318.4393615722656, + "learning_rate": 3.01423446186086e-05, + "loss": 123.0972, + "step": 99410 + }, + { + "epoch": 0.40166938028499055, + "grad_norm": 599.4600830078125, + "learning_rate": 3.013993770488318e-05, + "loss": 89.7091, + "step": 99420 + }, + { + "epoch": 0.40170978155035814, + "grad_norm": 719.4998779296875, + "learning_rate": 3.0137530593474467e-05, + "loss": 73.5025, + "step": 99430 + }, + { + "epoch": 0.4017501828157258, + "grad_norm": 897.5112915039062, + "learning_rate": 3.0135123284429366e-05, + "loss": 95.5938, + "step": 99440 + }, + { + "epoch": 0.4017905840810934, + "grad_norm": 999.29345703125, + "learning_rate": 3.013271577779482e-05, + "loss": 83.9119, + "step": 99450 + }, + { + "epoch": 0.40183098534646106, + "grad_norm": 819.5819702148438, + "learning_rate": 3.0130308073617765e-05, + "loss": 92.7972, + "step": 99460 + }, + { + "epoch": 0.4018713866118287, + "grad_norm": 710.673828125, + "learning_rate": 3.012790017194514e-05, + "loss": 97.8923, + "step": 99470 + }, + { + "epoch": 0.40191178787719634, + "grad_norm": 578.9378662109375, + "learning_rate": 3.0125492072823884e-05, + "loss": 45.1159, + "step": 99480 + }, + { + "epoch": 0.4019521891425639, + "grad_norm": 469.8042297363281, + "learning_rate": 3.0123083776300946e-05, + "loss": 80.9406, + "step": 99490 + }, + { + "epoch": 0.40199259040793156, + "grad_norm": 808.1786499023438, + "learning_rate": 3.0120675282423274e-05, + "loss": 113.9149, + "step": 99500 + }, + { + "epoch": 0.4020329916732992, + "grad_norm": 1038.155517578125, + "learning_rate": 3.011826659123784e-05, + "loss": 57.0254, + "step": 99510 + }, + { + "epoch": 0.40207339293866684, + "grad_norm": 562.3511352539062, + "learning_rate": 3.011585770279158e-05, + "loss": 81.9031, + "step": 99520 + }, + { + "epoch": 0.4021137942040345, + "grad_norm": 761.211181640625, + "learning_rate": 3.011344861713147e-05, + "loss": 84.0078, + "step": 99530 + }, + { + "epoch": 0.4021541954694021, + "grad_norm": 846.1482543945312, + "learning_rate": 3.0111039334304474e-05, + "loss": 101.6299, + "step": 99540 + }, + { + "epoch": 0.40219459673476976, + "grad_norm": 611.5349731445312, + "learning_rate": 3.0108629854357557e-05, + "loss": 81.4837, + "step": 99550 + }, + { + "epoch": 0.40223499800013734, + "grad_norm": 674.4642944335938, + "learning_rate": 3.0106220177337696e-05, + "loss": 84.659, + "step": 99560 + }, + { + "epoch": 0.402275399265505, + "grad_norm": 745.9607543945312, + "learning_rate": 3.010381030329187e-05, + "loss": 94.468, + "step": 99570 + }, + { + "epoch": 0.4023158005308726, + "grad_norm": 950.1898193359375, + "learning_rate": 3.010140023226706e-05, + "loss": 57.8517, + "step": 99580 + }, + { + "epoch": 0.40235620179624026, + "grad_norm": 672.491943359375, + "learning_rate": 3.0098989964310254e-05, + "loss": 71.7278, + "step": 99590 + }, + { + "epoch": 0.4023966030616079, + "grad_norm": 1000.3580932617188, + "learning_rate": 3.009657949946844e-05, + "loss": 82.2842, + "step": 99600 + }, + { + "epoch": 0.40243700432697554, + "grad_norm": 582.4872436523438, + "learning_rate": 3.009416883778861e-05, + "loss": 52.8281, + "step": 99610 + }, + { + "epoch": 0.40247740559234313, + "grad_norm": 756.071044921875, + "learning_rate": 3.009175797931776e-05, + "loss": 64.7278, + "step": 99620 + }, + { + "epoch": 0.40251780685771077, + "grad_norm": 775.9392700195312, + "learning_rate": 3.0089346924102892e-05, + "loss": 100.1059, + "step": 99630 + }, + { + "epoch": 0.4025582081230784, + "grad_norm": 988.809326171875, + "learning_rate": 3.0086935672191012e-05, + "loss": 74.0253, + "step": 99640 + }, + { + "epoch": 0.40259860938844605, + "grad_norm": 809.4541625976562, + "learning_rate": 3.008452422362913e-05, + "loss": 64.973, + "step": 99650 + }, + { + "epoch": 0.4026390106538137, + "grad_norm": 787.6926879882812, + "learning_rate": 3.0082112578464252e-05, + "loss": 81.0303, + "step": 99660 + }, + { + "epoch": 0.40267941191918133, + "grad_norm": 768.64013671875, + "learning_rate": 3.0079700736743406e-05, + "loss": 90.8321, + "step": 99670 + }, + { + "epoch": 0.4027198131845489, + "grad_norm": 610.850341796875, + "learning_rate": 3.0077288698513595e-05, + "loss": 76.1741, + "step": 99680 + }, + { + "epoch": 0.40276021444991655, + "grad_norm": 400.5533142089844, + "learning_rate": 3.0074876463821855e-05, + "loss": 61.6648, + "step": 99690 + }, + { + "epoch": 0.4028006157152842, + "grad_norm": 486.5260314941406, + "learning_rate": 3.007246403271522e-05, + "loss": 86.8623, + "step": 99700 + }, + { + "epoch": 0.40284101698065183, + "grad_norm": 812.2846069335938, + "learning_rate": 3.0070051405240712e-05, + "loss": 62.821, + "step": 99710 + }, + { + "epoch": 0.4028814182460195, + "grad_norm": 639.4912109375, + "learning_rate": 3.006763858144536e-05, + "loss": 51.6889, + "step": 99720 + }, + { + "epoch": 0.4029218195113871, + "grad_norm": 649.7606811523438, + "learning_rate": 3.006522556137621e-05, + "loss": 109.2611, + "step": 99730 + }, + { + "epoch": 0.40296222077675475, + "grad_norm": 481.4685974121094, + "learning_rate": 3.006281234508031e-05, + "loss": 82.8276, + "step": 99740 + }, + { + "epoch": 0.40300262204212234, + "grad_norm": 590.0535278320312, + "learning_rate": 3.0060398932604707e-05, + "loss": 70.5272, + "step": 99750 + }, + { + "epoch": 0.40304302330749, + "grad_norm": 475.1201171875, + "learning_rate": 3.0057985323996443e-05, + "loss": 108.1122, + "step": 99760 + }, + { + "epoch": 0.4030834245728576, + "grad_norm": 902.373291015625, + "learning_rate": 3.0055571519302572e-05, + "loss": 95.9382, + "step": 99770 + }, + { + "epoch": 0.40312382583822526, + "grad_norm": 577.8598022460938, + "learning_rate": 3.0053157518570163e-05, + "loss": 100.4101, + "step": 99780 + }, + { + "epoch": 0.4031642271035929, + "grad_norm": 993.1498413085938, + "learning_rate": 3.0050743321846273e-05, + "loss": 117.3123, + "step": 99790 + }, + { + "epoch": 0.40320462836896054, + "grad_norm": 1818.2655029296875, + "learning_rate": 3.0048328929177963e-05, + "loss": 112.9532, + "step": 99800 + }, + { + "epoch": 0.4032450296343281, + "grad_norm": 759.5618286132812, + "learning_rate": 3.0045914340612312e-05, + "loss": 115.8396, + "step": 99810 + }, + { + "epoch": 0.40328543089969576, + "grad_norm": 807.02734375, + "learning_rate": 3.0043499556196384e-05, + "loss": 66.4623, + "step": 99820 + }, + { + "epoch": 0.4033258321650634, + "grad_norm": 1302.1141357421875, + "learning_rate": 3.0041084575977267e-05, + "loss": 69.505, + "step": 99830 + }, + { + "epoch": 0.40336623343043104, + "grad_norm": 650.6442260742188, + "learning_rate": 3.0038669400002035e-05, + "loss": 71.3178, + "step": 99840 + }, + { + "epoch": 0.4034066346957987, + "grad_norm": 611.0090942382812, + "learning_rate": 3.003625402831777e-05, + "loss": 73.2803, + "step": 99850 + }, + { + "epoch": 0.4034470359611663, + "grad_norm": 627.5121459960938, + "learning_rate": 3.003383846097157e-05, + "loss": 74.7921, + "step": 99860 + }, + { + "epoch": 0.40348743722653396, + "grad_norm": 522.5289306640625, + "learning_rate": 3.0031422698010523e-05, + "loss": 82.5244, + "step": 99870 + }, + { + "epoch": 0.40352783849190155, + "grad_norm": 581.9075317382812, + "learning_rate": 3.002900673948173e-05, + "loss": 60.1666, + "step": 99880 + }, + { + "epoch": 0.4035682397572692, + "grad_norm": 1812.654296875, + "learning_rate": 3.0026590585432277e-05, + "loss": 87.8793, + "step": 99890 + }, + { + "epoch": 0.4036086410226368, + "grad_norm": 293.38922119140625, + "learning_rate": 3.0024174235909286e-05, + "loss": 92.9023, + "step": 99900 + }, + { + "epoch": 0.40364904228800447, + "grad_norm": 921.0210571289062, + "learning_rate": 3.0021757690959856e-05, + "loss": 66.1603, + "step": 99910 + }, + { + "epoch": 0.4036894435533721, + "grad_norm": 711.600830078125, + "learning_rate": 3.0019340950631103e-05, + "loss": 118.6453, + "step": 99920 + }, + { + "epoch": 0.40372984481873975, + "grad_norm": 1509.9072265625, + "learning_rate": 3.0016924014970138e-05, + "loss": 129.1589, + "step": 99930 + }, + { + "epoch": 0.40377024608410733, + "grad_norm": 1741.27587890625, + "learning_rate": 3.001450688402408e-05, + "loss": 109.9117, + "step": 99940 + }, + { + "epoch": 0.40381064734947497, + "grad_norm": 581.2483520507812, + "learning_rate": 3.0012089557840056e-05, + "loss": 76.3403, + "step": 99950 + }, + { + "epoch": 0.4038510486148426, + "grad_norm": 694.52001953125, + "learning_rate": 3.000967203646519e-05, + "loss": 79.8633, + "step": 99960 + }, + { + "epoch": 0.40389144988021025, + "grad_norm": 619.0547485351562, + "learning_rate": 3.000725431994662e-05, + "loss": 77.9999, + "step": 99970 + }, + { + "epoch": 0.4039318511455779, + "grad_norm": 930.580078125, + "learning_rate": 3.000483640833147e-05, + "loss": 60.7052, + "step": 99980 + }, + { + "epoch": 0.40397225241094553, + "grad_norm": 811.60107421875, + "learning_rate": 3.0002418301666886e-05, + "loss": 68.211, + "step": 99990 + }, + { + "epoch": 0.4040126536763131, + "grad_norm": 1005.753173828125, + "learning_rate": 3.0000000000000004e-05, + "loss": 88.0757, + "step": 100000 + }, + { + "epoch": 0.40405305494168076, + "grad_norm": 1044.51123046875, + "learning_rate": 2.9997581503377976e-05, + "loss": 74.6847, + "step": 100010 + }, + { + "epoch": 0.4040934562070484, + "grad_norm": 579.8829345703125, + "learning_rate": 2.9995162811847947e-05, + "loss": 57.4261, + "step": 100020 + }, + { + "epoch": 0.40413385747241604, + "grad_norm": 481.3171081542969, + "learning_rate": 2.999274392545707e-05, + "loss": 84.3498, + "step": 100030 + }, + { + "epoch": 0.4041742587377837, + "grad_norm": 672.8743896484375, + "learning_rate": 2.999032484425252e-05, + "loss": 123.9463, + "step": 100040 + }, + { + "epoch": 0.4042146600031513, + "grad_norm": 1254.467529296875, + "learning_rate": 2.9987905568281433e-05, + "loss": 165.23, + "step": 100050 + }, + { + "epoch": 0.40425506126851896, + "grad_norm": 2743.522216796875, + "learning_rate": 2.9985486097590987e-05, + "loss": 114.0662, + "step": 100060 + }, + { + "epoch": 0.40429546253388654, + "grad_norm": 1022.5794677734375, + "learning_rate": 2.9983066432228348e-05, + "loss": 72.9322, + "step": 100070 + }, + { + "epoch": 0.4043358637992542, + "grad_norm": 688.6197509765625, + "learning_rate": 2.9980646572240685e-05, + "loss": 66.4144, + "step": 100080 + }, + { + "epoch": 0.4043762650646218, + "grad_norm": 473.9969482421875, + "learning_rate": 2.9978226517675185e-05, + "loss": 119.6013, + "step": 100090 + }, + { + "epoch": 0.40441666632998946, + "grad_norm": 898.3356323242188, + "learning_rate": 2.997580626857902e-05, + "loss": 106.5996, + "step": 100100 + }, + { + "epoch": 0.4044570675953571, + "grad_norm": 393.857177734375, + "learning_rate": 2.9973385824999376e-05, + "loss": 72.3067, + "step": 100110 + }, + { + "epoch": 0.40449746886072474, + "grad_norm": 798.6058349609375, + "learning_rate": 2.997096518698344e-05, + "loss": 87.6277, + "step": 100120 + }, + { + "epoch": 0.4045378701260923, + "grad_norm": 660.4166870117188, + "learning_rate": 2.9968544354578403e-05, + "loss": 56.2534, + "step": 100130 + }, + { + "epoch": 0.40457827139145996, + "grad_norm": 578.5296020507812, + "learning_rate": 2.9966123327831465e-05, + "loss": 96.7626, + "step": 100140 + }, + { + "epoch": 0.4046186726568276, + "grad_norm": 769.2327270507812, + "learning_rate": 2.996370210678982e-05, + "loss": 139.0472, + "step": 100150 + }, + { + "epoch": 0.40465907392219524, + "grad_norm": 582.416748046875, + "learning_rate": 2.9961280691500675e-05, + "loss": 96.4617, + "step": 100160 + }, + { + "epoch": 0.4046994751875629, + "grad_norm": 621.7333374023438, + "learning_rate": 2.9958859082011227e-05, + "loss": 78.7402, + "step": 100170 + }, + { + "epoch": 0.4047398764529305, + "grad_norm": 682.61279296875, + "learning_rate": 2.9956437278368706e-05, + "loss": 77.1927, + "step": 100180 + }, + { + "epoch": 0.40478027771829816, + "grad_norm": 618.4622802734375, + "learning_rate": 2.99540152806203e-05, + "loss": 67.9092, + "step": 100190 + }, + { + "epoch": 0.40482067898366575, + "grad_norm": 509.2160339355469, + "learning_rate": 2.9951593088813255e-05, + "loss": 93.2919, + "step": 100200 + }, + { + "epoch": 0.4048610802490334, + "grad_norm": 753.4013671875, + "learning_rate": 2.9949170702994773e-05, + "loss": 91.9624, + "step": 100210 + }, + { + "epoch": 0.40490148151440103, + "grad_norm": 702.132080078125, + "learning_rate": 2.9946748123212086e-05, + "loss": 104.3526, + "step": 100220 + }, + { + "epoch": 0.40494188277976867, + "grad_norm": 954.725830078125, + "learning_rate": 2.9944325349512424e-05, + "loss": 125.4185, + "step": 100230 + }, + { + "epoch": 0.4049822840451363, + "grad_norm": 1119.644287109375, + "learning_rate": 2.9941902381943023e-05, + "loss": 66.7066, + "step": 100240 + }, + { + "epoch": 0.40502268531050395, + "grad_norm": 942.4599609375, + "learning_rate": 2.9939479220551112e-05, + "loss": 61.2632, + "step": 100250 + }, + { + "epoch": 0.40506308657587153, + "grad_norm": 424.134521484375, + "learning_rate": 2.9937055865383935e-05, + "loss": 65.2411, + "step": 100260 + }, + { + "epoch": 0.4051034878412392, + "grad_norm": 591.7462768554688, + "learning_rate": 2.993463231648874e-05, + "loss": 90.9724, + "step": 100270 + }, + { + "epoch": 0.4051438891066068, + "grad_norm": 1091.8057861328125, + "learning_rate": 2.9932208573912774e-05, + "loss": 70.017, + "step": 100280 + }, + { + "epoch": 0.40518429037197445, + "grad_norm": 611.049072265625, + "learning_rate": 2.9929784637703288e-05, + "loss": 71.099, + "step": 100290 + }, + { + "epoch": 0.4052246916373421, + "grad_norm": 372.52899169921875, + "learning_rate": 2.992736050790754e-05, + "loss": 62.347, + "step": 100300 + }, + { + "epoch": 0.40526509290270973, + "grad_norm": 312.66607666015625, + "learning_rate": 2.9924936184572786e-05, + "loss": 84.9656, + "step": 100310 + }, + { + "epoch": 0.4053054941680773, + "grad_norm": 1239.732666015625, + "learning_rate": 2.9922511667746295e-05, + "loss": 87.0674, + "step": 100320 + }, + { + "epoch": 0.40534589543344496, + "grad_norm": 793.163330078125, + "learning_rate": 2.9920086957475324e-05, + "loss": 81.3484, + "step": 100330 + }, + { + "epoch": 0.4053862966988126, + "grad_norm": 507.73883056640625, + "learning_rate": 2.9917662053807154e-05, + "loss": 83.2812, + "step": 100340 + }, + { + "epoch": 0.40542669796418024, + "grad_norm": 726.0172119140625, + "learning_rate": 2.9915236956789057e-05, + "loss": 97.0176, + "step": 100350 + }, + { + "epoch": 0.4054670992295479, + "grad_norm": 1194.90576171875, + "learning_rate": 2.9912811666468307e-05, + "loss": 56.0741, + "step": 100360 + }, + { + "epoch": 0.4055075004949155, + "grad_norm": 769.7332153320312, + "learning_rate": 2.9910386182892197e-05, + "loss": 110.9879, + "step": 100370 + }, + { + "epoch": 0.40554790176028316, + "grad_norm": 1908.300048828125, + "learning_rate": 2.9907960506108e-05, + "loss": 79.4383, + "step": 100380 + }, + { + "epoch": 0.40558830302565074, + "grad_norm": 518.9051513671875, + "learning_rate": 2.9905534636163018e-05, + "loss": 52.9195, + "step": 100390 + }, + { + "epoch": 0.4056287042910184, + "grad_norm": 632.683349609375, + "learning_rate": 2.9903108573104532e-05, + "loss": 65.071, + "step": 100400 + }, + { + "epoch": 0.405669105556386, + "grad_norm": 574.9016723632812, + "learning_rate": 2.9900682316979855e-05, + "loss": 75.5231, + "step": 100410 + }, + { + "epoch": 0.40570950682175366, + "grad_norm": 1464.5843505859375, + "learning_rate": 2.989825586783627e-05, + "loss": 133.5178, + "step": 100420 + }, + { + "epoch": 0.4057499080871213, + "grad_norm": 1709.2393798828125, + "learning_rate": 2.9895829225721092e-05, + "loss": 102.5327, + "step": 100430 + }, + { + "epoch": 0.40579030935248894, + "grad_norm": 726.5798950195312, + "learning_rate": 2.9893402390681627e-05, + "loss": 121.7555, + "step": 100440 + }, + { + "epoch": 0.4058307106178565, + "grad_norm": 802.743408203125, + "learning_rate": 2.9890975362765194e-05, + "loss": 77.8284, + "step": 100450 + }, + { + "epoch": 0.40587111188322417, + "grad_norm": 661.9882202148438, + "learning_rate": 2.9888548142019105e-05, + "loss": 64.1428, + "step": 100460 + }, + { + "epoch": 0.4059115131485918, + "grad_norm": 674.7613525390625, + "learning_rate": 2.9886120728490672e-05, + "loss": 104.7718, + "step": 100470 + }, + { + "epoch": 0.40595191441395945, + "grad_norm": 974.1311645507812, + "learning_rate": 2.988369312222723e-05, + "loss": 79.2047, + "step": 100480 + }, + { + "epoch": 0.4059923156793271, + "grad_norm": 560.33740234375, + "learning_rate": 2.988126532327611e-05, + "loss": 50.3904, + "step": 100490 + }, + { + "epoch": 0.4060327169446947, + "grad_norm": 937.815673828125, + "learning_rate": 2.987883733168462e-05, + "loss": 107.9306, + "step": 100500 + }, + { + "epoch": 0.4060731182100623, + "grad_norm": 1006.9942016601562, + "learning_rate": 2.9876409147500117e-05, + "loss": 82.4775, + "step": 100510 + }, + { + "epoch": 0.40611351947542995, + "grad_norm": 783.78076171875, + "learning_rate": 2.9873980770769937e-05, + "loss": 86.6137, + "step": 100520 + }, + { + "epoch": 0.4061539207407976, + "grad_norm": 1039.523193359375, + "learning_rate": 2.987155220154142e-05, + "loss": 86.0332, + "step": 100530 + }, + { + "epoch": 0.40619432200616523, + "grad_norm": 570.5504760742188, + "learning_rate": 2.9869123439861903e-05, + "loss": 81.0905, + "step": 100540 + }, + { + "epoch": 0.40623472327153287, + "grad_norm": 347.6532897949219, + "learning_rate": 2.9866694485778742e-05, + "loss": 68.5162, + "step": 100550 + }, + { + "epoch": 0.4062751245369005, + "grad_norm": 1221.93310546875, + "learning_rate": 2.98642653393393e-05, + "loss": 83.6081, + "step": 100560 + }, + { + "epoch": 0.40631552580226815, + "grad_norm": 612.1166381835938, + "learning_rate": 2.9861836000590925e-05, + "loss": 77.6172, + "step": 100570 + }, + { + "epoch": 0.40635592706763574, + "grad_norm": 675.7503662109375, + "learning_rate": 2.9859406469580975e-05, + "loss": 84.4147, + "step": 100580 + }, + { + "epoch": 0.4063963283330034, + "grad_norm": 854.9896850585938, + "learning_rate": 2.9856976746356824e-05, + "loss": 82.614, + "step": 100590 + }, + { + "epoch": 0.406436729598371, + "grad_norm": 1035.9854736328125, + "learning_rate": 2.9854546830965833e-05, + "loss": 85.3482, + "step": 100600 + }, + { + "epoch": 0.40647713086373866, + "grad_norm": 1160.717529296875, + "learning_rate": 2.9852116723455383e-05, + "loss": 86.7701, + "step": 100610 + }, + { + "epoch": 0.4065175321291063, + "grad_norm": 779.1151733398438, + "learning_rate": 2.9849686423872842e-05, + "loss": 159.6758, + "step": 100620 + }, + { + "epoch": 0.40655793339447394, + "grad_norm": 791.1476440429688, + "learning_rate": 2.9847255932265592e-05, + "loss": 98.0529, + "step": 100630 + }, + { + "epoch": 0.4065983346598415, + "grad_norm": 694.6356811523438, + "learning_rate": 2.984482524868102e-05, + "loss": 56.6254, + "step": 100640 + }, + { + "epoch": 0.40663873592520916, + "grad_norm": 723.6224365234375, + "learning_rate": 2.9842394373166512e-05, + "loss": 114.6369, + "step": 100650 + }, + { + "epoch": 0.4066791371905768, + "grad_norm": 1648.016845703125, + "learning_rate": 2.9839963305769462e-05, + "loss": 106.4386, + "step": 100660 + }, + { + "epoch": 0.40671953845594444, + "grad_norm": 805.6259765625, + "learning_rate": 2.9837532046537255e-05, + "loss": 80.8479, + "step": 100670 + }, + { + "epoch": 0.4067599397213121, + "grad_norm": 2143.642822265625, + "learning_rate": 2.98351005955173e-05, + "loss": 106.9889, + "step": 100680 + }, + { + "epoch": 0.4068003409866797, + "grad_norm": 842.1837768554688, + "learning_rate": 2.9832668952756997e-05, + "loss": 95.7102, + "step": 100690 + }, + { + "epoch": 0.40684074225204736, + "grad_norm": 1074.3897705078125, + "learning_rate": 2.9830237118303742e-05, + "loss": 51.329, + "step": 100700 + }, + { + "epoch": 0.40688114351741494, + "grad_norm": 795.4459838867188, + "learning_rate": 2.9827805092204957e-05, + "loss": 64.4072, + "step": 100710 + }, + { + "epoch": 0.4069215447827826, + "grad_norm": 405.7424621582031, + "learning_rate": 2.9825372874508054e-05, + "loss": 82.9882, + "step": 100720 + }, + { + "epoch": 0.4069619460481502, + "grad_norm": 602.1675415039062, + "learning_rate": 2.982294046526045e-05, + "loss": 66.4098, + "step": 100730 + }, + { + "epoch": 0.40700234731351786, + "grad_norm": 463.5370178222656, + "learning_rate": 2.9820507864509567e-05, + "loss": 70.3965, + "step": 100740 + }, + { + "epoch": 0.4070427485788855, + "grad_norm": 1258.0748291015625, + "learning_rate": 2.981807507230282e-05, + "loss": 65.3473, + "step": 100750 + }, + { + "epoch": 0.40708314984425314, + "grad_norm": 988.177001953125, + "learning_rate": 2.9815642088687647e-05, + "loss": 65.9819, + "step": 100760 + }, + { + "epoch": 0.40712355110962073, + "grad_norm": 554.76318359375, + "learning_rate": 2.981320891371148e-05, + "loss": 80.5751, + "step": 100770 + }, + { + "epoch": 0.40716395237498837, + "grad_norm": 802.5167236328125, + "learning_rate": 2.9810775547421752e-05, + "loss": 69.3785, + "step": 100780 + }, + { + "epoch": 0.407204353640356, + "grad_norm": 401.9982604980469, + "learning_rate": 2.9808341989865903e-05, + "loss": 112.7568, + "step": 100790 + }, + { + "epoch": 0.40724475490572365, + "grad_norm": 353.8996276855469, + "learning_rate": 2.9805908241091375e-05, + "loss": 49.0205, + "step": 100800 + }, + { + "epoch": 0.4072851561710913, + "grad_norm": 1105.011474609375, + "learning_rate": 2.980347430114562e-05, + "loss": 98.1618, + "step": 100810 + }, + { + "epoch": 0.40732555743645893, + "grad_norm": 733.9447631835938, + "learning_rate": 2.980104017007609e-05, + "loss": 104.5442, + "step": 100820 + }, + { + "epoch": 0.4073659587018265, + "grad_norm": 463.2933044433594, + "learning_rate": 2.9798605847930236e-05, + "loss": 89.2008, + "step": 100830 + }, + { + "epoch": 0.40740635996719415, + "grad_norm": 1058.00732421875, + "learning_rate": 2.979617133475551e-05, + "loss": 89.4865, + "step": 100840 + }, + { + "epoch": 0.4074467612325618, + "grad_norm": 507.5879821777344, + "learning_rate": 2.9793736630599384e-05, + "loss": 107.9926, + "step": 100850 + }, + { + "epoch": 0.40748716249792943, + "grad_norm": 666.5136108398438, + "learning_rate": 2.9791301735509316e-05, + "loss": 78.2162, + "step": 100860 + }, + { + "epoch": 0.4075275637632971, + "grad_norm": 1778.90087890625, + "learning_rate": 2.9788866649532785e-05, + "loss": 95.5333, + "step": 100870 + }, + { + "epoch": 0.4075679650286647, + "grad_norm": 1163.996826171875, + "learning_rate": 2.9786431372717256e-05, + "loss": 90.1711, + "step": 100880 + }, + { + "epoch": 0.40760836629403235, + "grad_norm": 1472.2333984375, + "learning_rate": 2.9783995905110206e-05, + "loss": 100.0642, + "step": 100890 + }, + { + "epoch": 0.40764876755939994, + "grad_norm": 805.7984619140625, + "learning_rate": 2.978156024675913e-05, + "loss": 57.4663, + "step": 100900 + }, + { + "epoch": 0.4076891688247676, + "grad_norm": 568.2591552734375, + "learning_rate": 2.9779124397711493e-05, + "loss": 81.5651, + "step": 100910 + }, + { + "epoch": 0.4077295700901352, + "grad_norm": 537.5708618164062, + "learning_rate": 2.9776688358014792e-05, + "loss": 56.7743, + "step": 100920 + }, + { + "epoch": 0.40776997135550286, + "grad_norm": 1871.752685546875, + "learning_rate": 2.9774252127716518e-05, + "loss": 61.6614, + "step": 100930 + }, + { + "epoch": 0.4078103726208705, + "grad_norm": 1129.851318359375, + "learning_rate": 2.977181570686417e-05, + "loss": 81.99, + "step": 100940 + }, + { + "epoch": 0.40785077388623814, + "grad_norm": 557.4409790039062, + "learning_rate": 2.976937909550524e-05, + "loss": 85.3099, + "step": 100950 + }, + { + "epoch": 0.4078911751516057, + "grad_norm": 1494.6304931640625, + "learning_rate": 2.9766942293687234e-05, + "loss": 103.7091, + "step": 100960 + }, + { + "epoch": 0.40793157641697336, + "grad_norm": 1027.2786865234375, + "learning_rate": 2.976450530145766e-05, + "loss": 85.1238, + "step": 100970 + }, + { + "epoch": 0.407971977682341, + "grad_norm": 568.3897094726562, + "learning_rate": 2.9762068118864032e-05, + "loss": 102.1211, + "step": 100980 + }, + { + "epoch": 0.40801237894770864, + "grad_norm": 533.45068359375, + "learning_rate": 2.9759630745953854e-05, + "loss": 79.4324, + "step": 100990 + }, + { + "epoch": 0.4080527802130763, + "grad_norm": 1022.1384887695312, + "learning_rate": 2.9757193182774658e-05, + "loss": 84.5238, + "step": 101000 + }, + { + "epoch": 0.4080931814784439, + "grad_norm": 557.9917602539062, + "learning_rate": 2.9754755429373952e-05, + "loss": 107.0561, + "step": 101010 + }, + { + "epoch": 0.40813358274381156, + "grad_norm": 1311.711669921875, + "learning_rate": 2.975231748579927e-05, + "loss": 55.4578, + "step": 101020 + }, + { + "epoch": 0.40817398400917915, + "grad_norm": 596.0655517578125, + "learning_rate": 2.9749879352098136e-05, + "loss": 109.8271, + "step": 101030 + }, + { + "epoch": 0.4082143852745468, + "grad_norm": 278.0167541503906, + "learning_rate": 2.974744102831808e-05, + "loss": 54.5976, + "step": 101040 + }, + { + "epoch": 0.4082547865399144, + "grad_norm": 492.8935241699219, + "learning_rate": 2.974500251450665e-05, + "loss": 54.7062, + "step": 101050 + }, + { + "epoch": 0.40829518780528207, + "grad_norm": 599.0740356445312, + "learning_rate": 2.9742563810711376e-05, + "loss": 86.1442, + "step": 101060 + }, + { + "epoch": 0.4083355890706497, + "grad_norm": 300.50604248046875, + "learning_rate": 2.9740124916979806e-05, + "loss": 90.0614, + "step": 101070 + }, + { + "epoch": 0.40837599033601735, + "grad_norm": 958.6484375, + "learning_rate": 2.9737685833359485e-05, + "loss": 90.5603, + "step": 101080 + }, + { + "epoch": 0.40841639160138493, + "grad_norm": 709.5164184570312, + "learning_rate": 2.9735246559897962e-05, + "loss": 90.2463, + "step": 101090 + }, + { + "epoch": 0.40845679286675257, + "grad_norm": 734.1888427734375, + "learning_rate": 2.9732807096642804e-05, + "loss": 83.9704, + "step": 101100 + }, + { + "epoch": 0.4084971941321202, + "grad_norm": 962.6340942382812, + "learning_rate": 2.9730367443641555e-05, + "loss": 118.7526, + "step": 101110 + }, + { + "epoch": 0.40853759539748785, + "grad_norm": 1071.9810791015625, + "learning_rate": 2.9727927600941783e-05, + "loss": 90.8958, + "step": 101120 + }, + { + "epoch": 0.4085779966628555, + "grad_norm": 558.5797119140625, + "learning_rate": 2.9725487568591052e-05, + "loss": 70.7151, + "step": 101130 + }, + { + "epoch": 0.40861839792822313, + "grad_norm": 554.2314453125, + "learning_rate": 2.972304734663694e-05, + "loss": 82.0324, + "step": 101140 + }, + { + "epoch": 0.4086587991935907, + "grad_norm": 707.044677734375, + "learning_rate": 2.9720606935127007e-05, + "loss": 82.0831, + "step": 101150 + }, + { + "epoch": 0.40869920045895836, + "grad_norm": 1486.3939208984375, + "learning_rate": 2.971816633410884e-05, + "loss": 91.8749, + "step": 101160 + }, + { + "epoch": 0.408739601724326, + "grad_norm": 1143.4642333984375, + "learning_rate": 2.971572554363002e-05, + "loss": 81.7156, + "step": 101170 + }, + { + "epoch": 0.40878000298969364, + "grad_norm": 577.1227416992188, + "learning_rate": 2.9713284563738128e-05, + "loss": 98.4352, + "step": 101180 + }, + { + "epoch": 0.4088204042550613, + "grad_norm": 1006.4549560546875, + "learning_rate": 2.971084339448075e-05, + "loss": 98.4299, + "step": 101190 + }, + { + "epoch": 0.4088608055204289, + "grad_norm": 558.46435546875, + "learning_rate": 2.970840203590548e-05, + "loss": 134.998, + "step": 101200 + }, + { + "epoch": 0.40890120678579656, + "grad_norm": 1375.786865234375, + "learning_rate": 2.970596048805992e-05, + "loss": 109.0192, + "step": 101210 + }, + { + "epoch": 0.40894160805116414, + "grad_norm": 1114.6036376953125, + "learning_rate": 2.970351875099166e-05, + "loss": 52.0891, + "step": 101220 + }, + { + "epoch": 0.4089820093165318, + "grad_norm": 677.32275390625, + "learning_rate": 2.9701076824748304e-05, + "loss": 76.9035, + "step": 101230 + }, + { + "epoch": 0.4090224105818994, + "grad_norm": 837.0114135742188, + "learning_rate": 2.969863470937746e-05, + "loss": 84.2498, + "step": 101240 + }, + { + "epoch": 0.40906281184726706, + "grad_norm": 1066.646728515625, + "learning_rate": 2.9696192404926747e-05, + "loss": 68.5491, + "step": 101250 + }, + { + "epoch": 0.4091032131126347, + "grad_norm": 641.6810913085938, + "learning_rate": 2.9693749911443763e-05, + "loss": 62.2218, + "step": 101260 + }, + { + "epoch": 0.40914361437800234, + "grad_norm": 422.7613830566406, + "learning_rate": 2.9691307228976137e-05, + "loss": 79.8769, + "step": 101270 + }, + { + "epoch": 0.4091840156433699, + "grad_norm": 900.3056030273438, + "learning_rate": 2.9688864357571487e-05, + "loss": 89.9641, + "step": 101280 + }, + { + "epoch": 0.40922441690873756, + "grad_norm": 534.5391235351562, + "learning_rate": 2.9686421297277436e-05, + "loss": 82.2336, + "step": 101290 + }, + { + "epoch": 0.4092648181741052, + "grad_norm": 595.19140625, + "learning_rate": 2.9683978048141618e-05, + "loss": 108.2922, + "step": 101300 + }, + { + "epoch": 0.40930521943947284, + "grad_norm": 679.0897216796875, + "learning_rate": 2.968153461021166e-05, + "loss": 80.3343, + "step": 101310 + }, + { + "epoch": 0.4093456207048405, + "grad_norm": 1166.92578125, + "learning_rate": 2.9679090983535198e-05, + "loss": 107.7027, + "step": 101320 + }, + { + "epoch": 0.4093860219702081, + "grad_norm": 1424.146728515625, + "learning_rate": 2.9676647168159877e-05, + "loss": 64.7421, + "step": 101330 + }, + { + "epoch": 0.40942642323557576, + "grad_norm": 544.7164306640625, + "learning_rate": 2.967420316413334e-05, + "loss": 94.1627, + "step": 101340 + }, + { + "epoch": 0.40946682450094335, + "grad_norm": 666.3820190429688, + "learning_rate": 2.9671758971503234e-05, + "loss": 83.8276, + "step": 101350 + }, + { + "epoch": 0.409507225766311, + "grad_norm": 786.4224243164062, + "learning_rate": 2.9669314590317202e-05, + "loss": 55.8622, + "step": 101360 + }, + { + "epoch": 0.40954762703167863, + "grad_norm": 587.5588989257812, + "learning_rate": 2.9666870020622904e-05, + "loss": 84.6572, + "step": 101370 + }, + { + "epoch": 0.40958802829704627, + "grad_norm": 676.482666015625, + "learning_rate": 2.9664425262468e-05, + "loss": 88.5244, + "step": 101380 + }, + { + "epoch": 0.4096284295624139, + "grad_norm": 838.5384521484375, + "learning_rate": 2.9661980315900152e-05, + "loss": 87.4174, + "step": 101390 + }, + { + "epoch": 0.40966883082778155, + "grad_norm": 340.7973937988281, + "learning_rate": 2.9659535180967016e-05, + "loss": 68.33, + "step": 101400 + }, + { + "epoch": 0.40970923209314913, + "grad_norm": 1295.660400390625, + "learning_rate": 2.9657089857716273e-05, + "loss": 72.7666, + "step": 101410 + }, + { + "epoch": 0.4097496333585168, + "grad_norm": 1260.36865234375, + "learning_rate": 2.9654644346195596e-05, + "loss": 73.0549, + "step": 101420 + }, + { + "epoch": 0.4097900346238844, + "grad_norm": 1314.9671630859375, + "learning_rate": 2.9652198646452654e-05, + "loss": 115.7125, + "step": 101430 + }, + { + "epoch": 0.40983043588925205, + "grad_norm": 554.6221313476562, + "learning_rate": 2.9649752758535125e-05, + "loss": 108.3485, + "step": 101440 + }, + { + "epoch": 0.4098708371546197, + "grad_norm": 433.7135009765625, + "learning_rate": 2.9647306682490705e-05, + "loss": 63.1223, + "step": 101450 + }, + { + "epoch": 0.40991123841998733, + "grad_norm": 614.7924194335938, + "learning_rate": 2.9644860418367068e-05, + "loss": 46.9724, + "step": 101460 + }, + { + "epoch": 0.4099516396853549, + "grad_norm": 383.16778564453125, + "learning_rate": 2.9642413966211914e-05, + "loss": 87.3785, + "step": 101470 + }, + { + "epoch": 0.40999204095072256, + "grad_norm": 1063.79150390625, + "learning_rate": 2.963996732607294e-05, + "loss": 74.1868, + "step": 101480 + }, + { + "epoch": 0.4100324422160902, + "grad_norm": 1331.01953125, + "learning_rate": 2.9637520497997836e-05, + "loss": 89.3768, + "step": 101490 + }, + { + "epoch": 0.41007284348145784, + "grad_norm": 810.9175415039062, + "learning_rate": 2.9635073482034307e-05, + "loss": 74.96, + "step": 101500 + }, + { + "epoch": 0.4101132447468255, + "grad_norm": 1241.639892578125, + "learning_rate": 2.963262627823006e-05, + "loss": 89.2363, + "step": 101510 + }, + { + "epoch": 0.4101536460121931, + "grad_norm": 1112.5863037109375, + "learning_rate": 2.963017888663281e-05, + "loss": 86.3573, + "step": 101520 + }, + { + "epoch": 0.41019404727756076, + "grad_norm": 311.25787353515625, + "learning_rate": 2.962773130729026e-05, + "loss": 79.8112, + "step": 101530 + }, + { + "epoch": 0.41023444854292834, + "grad_norm": 645.7020263671875, + "learning_rate": 2.962528354025013e-05, + "loss": 67.0156, + "step": 101540 + }, + { + "epoch": 0.410274849808296, + "grad_norm": 989.2708129882812, + "learning_rate": 2.9622835585560147e-05, + "loss": 87.7592, + "step": 101550 + }, + { + "epoch": 0.4103152510736636, + "grad_norm": 661.308837890625, + "learning_rate": 2.9620387443268028e-05, + "loss": 87.5455, + "step": 101560 + }, + { + "epoch": 0.41035565233903126, + "grad_norm": 954.9491577148438, + "learning_rate": 2.9617939113421496e-05, + "loss": 84.5768, + "step": 101570 + }, + { + "epoch": 0.4103960536043989, + "grad_norm": 746.9169311523438, + "learning_rate": 2.9615490596068297e-05, + "loss": 100.1928, + "step": 101580 + }, + { + "epoch": 0.41043645486976654, + "grad_norm": 860.0364379882812, + "learning_rate": 2.961304189125616e-05, + "loss": 123.3183, + "step": 101590 + }, + { + "epoch": 0.4104768561351341, + "grad_norm": 728.4450073242188, + "learning_rate": 2.9610592999032815e-05, + "loss": 93.9417, + "step": 101600 + }, + { + "epoch": 0.41051725740050177, + "grad_norm": 1080.4842529296875, + "learning_rate": 2.960814391944602e-05, + "loss": 111.6896, + "step": 101610 + }, + { + "epoch": 0.4105576586658694, + "grad_norm": 706.7281494140625, + "learning_rate": 2.9605694652543507e-05, + "loss": 57.9073, + "step": 101620 + }, + { + "epoch": 0.41059805993123705, + "grad_norm": 528.2630615234375, + "learning_rate": 2.9603245198373037e-05, + "loss": 99.007, + "step": 101630 + }, + { + "epoch": 0.4106384611966047, + "grad_norm": 708.9039916992188, + "learning_rate": 2.960079555698235e-05, + "loss": 90.3956, + "step": 101640 + }, + { + "epoch": 0.4106788624619723, + "grad_norm": 821.7721557617188, + "learning_rate": 2.9598345728419214e-05, + "loss": 73.2257, + "step": 101650 + }, + { + "epoch": 0.41071926372733997, + "grad_norm": 723.275390625, + "learning_rate": 2.9595895712731386e-05, + "loss": 110.4846, + "step": 101660 + }, + { + "epoch": 0.41075966499270755, + "grad_norm": 857.1060180664062, + "learning_rate": 2.9593445509966635e-05, + "loss": 64.8143, + "step": 101670 + }, + { + "epoch": 0.4108000662580752, + "grad_norm": 603.6101684570312, + "learning_rate": 2.9590995120172716e-05, + "loss": 65.0324, + "step": 101680 + }, + { + "epoch": 0.41084046752344283, + "grad_norm": 1237.0367431640625, + "learning_rate": 2.9588544543397416e-05, + "loss": 76.0417, + "step": 101690 + }, + { + "epoch": 0.41088086878881047, + "grad_norm": 427.9548645019531, + "learning_rate": 2.9586093779688504e-05, + "loss": 91.833, + "step": 101700 + }, + { + "epoch": 0.4109212700541781, + "grad_norm": 388.0860900878906, + "learning_rate": 2.9583642829093756e-05, + "loss": 51.7763, + "step": 101710 + }, + { + "epoch": 0.41096167131954575, + "grad_norm": 344.2474365234375, + "learning_rate": 2.958119169166096e-05, + "loss": 59.4776, + "step": 101720 + }, + { + "epoch": 0.41100207258491334, + "grad_norm": 1314.1070556640625, + "learning_rate": 2.9578740367437896e-05, + "loss": 101.6859, + "step": 101730 + }, + { + "epoch": 0.411042473850281, + "grad_norm": 1068.271484375, + "learning_rate": 2.9576288856472354e-05, + "loss": 50.7563, + "step": 101740 + }, + { + "epoch": 0.4110828751156486, + "grad_norm": 607.7156372070312, + "learning_rate": 2.957383715881214e-05, + "loss": 49.8177, + "step": 101750 + }, + { + "epoch": 0.41112327638101626, + "grad_norm": 542.142578125, + "learning_rate": 2.9571385274505037e-05, + "loss": 96.1195, + "step": 101760 + }, + { + "epoch": 0.4111636776463839, + "grad_norm": 940.1134033203125, + "learning_rate": 2.9568933203598847e-05, + "loss": 67.4525, + "step": 101770 + }, + { + "epoch": 0.41120407891175154, + "grad_norm": 872.6924438476562, + "learning_rate": 2.956648094614138e-05, + "loss": 88.209, + "step": 101780 + }, + { + "epoch": 0.4112444801771191, + "grad_norm": 1352.097900390625, + "learning_rate": 2.9564028502180444e-05, + "loss": 79.0445, + "step": 101790 + }, + { + "epoch": 0.41128488144248676, + "grad_norm": 665.7899169921875, + "learning_rate": 2.956157587176385e-05, + "loss": 77.4172, + "step": 101800 + }, + { + "epoch": 0.4113252827078544, + "grad_norm": 1209.0172119140625, + "learning_rate": 2.9559123054939403e-05, + "loss": 87.4228, + "step": 101810 + }, + { + "epoch": 0.41136568397322204, + "grad_norm": 898.3853759765625, + "learning_rate": 2.9556670051754935e-05, + "loss": 84.7399, + "step": 101820 + }, + { + "epoch": 0.4114060852385897, + "grad_norm": 519.8313598632812, + "learning_rate": 2.955421686225827e-05, + "loss": 101.3268, + "step": 101830 + }, + { + "epoch": 0.4114464865039573, + "grad_norm": 831.49609375, + "learning_rate": 2.9551763486497225e-05, + "loss": 71.3833, + "step": 101840 + }, + { + "epoch": 0.41148688776932496, + "grad_norm": 942.4913940429688, + "learning_rate": 2.9549309924519626e-05, + "loss": 75.0515, + "step": 101850 + }, + { + "epoch": 0.41152728903469254, + "grad_norm": 1571.7410888671875, + "learning_rate": 2.9546856176373323e-05, + "loss": 102.0793, + "step": 101860 + }, + { + "epoch": 0.4115676903000602, + "grad_norm": 1069.091552734375, + "learning_rate": 2.9544402242106147e-05, + "loss": 93.8609, + "step": 101870 + }, + { + "epoch": 0.4116080915654278, + "grad_norm": 418.4473571777344, + "learning_rate": 2.9541948121765927e-05, + "loss": 68.491, + "step": 101880 + }, + { + "epoch": 0.41164849283079546, + "grad_norm": 491.02886962890625, + "learning_rate": 2.9539493815400514e-05, + "loss": 53.2697, + "step": 101890 + }, + { + "epoch": 0.4116888940961631, + "grad_norm": 728.8077392578125, + "learning_rate": 2.9537039323057758e-05, + "loss": 65.2863, + "step": 101900 + }, + { + "epoch": 0.41172929536153074, + "grad_norm": 649.7918701171875, + "learning_rate": 2.9534584644785515e-05, + "loss": 72.9409, + "step": 101910 + }, + { + "epoch": 0.41176969662689833, + "grad_norm": 644.6507568359375, + "learning_rate": 2.9532129780631636e-05, + "loss": 106.8911, + "step": 101920 + }, + { + "epoch": 0.41181009789226597, + "grad_norm": 464.0957336425781, + "learning_rate": 2.9529674730643978e-05, + "loss": 87.7972, + "step": 101930 + }, + { + "epoch": 0.4118504991576336, + "grad_norm": 1127.255615234375, + "learning_rate": 2.9527219494870407e-05, + "loss": 114.9517, + "step": 101940 + }, + { + "epoch": 0.41189090042300125, + "grad_norm": 211.61471557617188, + "learning_rate": 2.9524764073358786e-05, + "loss": 135.9766, + "step": 101950 + }, + { + "epoch": 0.4119313016883689, + "grad_norm": 591.4796752929688, + "learning_rate": 2.952230846615699e-05, + "loss": 72.8937, + "step": 101960 + }, + { + "epoch": 0.41197170295373653, + "grad_norm": 694.7760009765625, + "learning_rate": 2.9519852673312877e-05, + "loss": 64.4668, + "step": 101970 + }, + { + "epoch": 0.41201210421910417, + "grad_norm": 573.482666015625, + "learning_rate": 2.951739669487434e-05, + "loss": 73.3253, + "step": 101980 + }, + { + "epoch": 0.41205250548447175, + "grad_norm": 908.8732299804688, + "learning_rate": 2.9514940530889254e-05, + "loss": 101.8951, + "step": 101990 + }, + { + "epoch": 0.4120929067498394, + "grad_norm": 478.71954345703125, + "learning_rate": 2.951248418140551e-05, + "loss": 80.682, + "step": 102000 + }, + { + "epoch": 0.41213330801520703, + "grad_norm": 829.7828979492188, + "learning_rate": 2.9510027646470983e-05, + "loss": 72.2819, + "step": 102010 + }, + { + "epoch": 0.4121737092805747, + "grad_norm": 320.1921691894531, + "learning_rate": 2.950757092613357e-05, + "loss": 89.5854, + "step": 102020 + }, + { + "epoch": 0.4122141105459423, + "grad_norm": 545.007080078125, + "learning_rate": 2.950511402044117e-05, + "loss": 73.0841, + "step": 102030 + }, + { + "epoch": 0.41225451181130995, + "grad_norm": 383.9866638183594, + "learning_rate": 2.9502656929441685e-05, + "loss": 131.4048, + "step": 102040 + }, + { + "epoch": 0.41229491307667754, + "grad_norm": 575.4655151367188, + "learning_rate": 2.9500199653183e-05, + "loss": 83.8569, + "step": 102050 + }, + { + "epoch": 0.4123353143420452, + "grad_norm": 911.9077758789062, + "learning_rate": 2.949774219171303e-05, + "loss": 88.9945, + "step": 102060 + }, + { + "epoch": 0.4123757156074128, + "grad_norm": 280.343994140625, + "learning_rate": 2.9495284545079696e-05, + "loss": 80.505, + "step": 102070 + }, + { + "epoch": 0.41241611687278046, + "grad_norm": 1036.03125, + "learning_rate": 2.9492826713330898e-05, + "loss": 89.8384, + "step": 102080 + }, + { + "epoch": 0.4124565181381481, + "grad_norm": 1080.1055908203125, + "learning_rate": 2.9490368696514556e-05, + "loss": 83.4794, + "step": 102090 + }, + { + "epoch": 0.41249691940351574, + "grad_norm": 684.9985961914062, + "learning_rate": 2.9487910494678585e-05, + "loss": 110.7087, + "step": 102100 + }, + { + "epoch": 0.4125373206688833, + "grad_norm": 1465.13916015625, + "learning_rate": 2.9485452107870923e-05, + "loss": 132.2476, + "step": 102110 + }, + { + "epoch": 0.41257772193425096, + "grad_norm": 665.65576171875, + "learning_rate": 2.9482993536139485e-05, + "loss": 58.221, + "step": 102120 + }, + { + "epoch": 0.4126181231996186, + "grad_norm": 556.6141967773438, + "learning_rate": 2.9480534779532207e-05, + "loss": 64.2791, + "step": 102130 + }, + { + "epoch": 0.41265852446498624, + "grad_norm": 968.1908569335938, + "learning_rate": 2.9478075838097017e-05, + "loss": 91.4234, + "step": 102140 + }, + { + "epoch": 0.4126989257303539, + "grad_norm": 404.67706298828125, + "learning_rate": 2.9475616711881864e-05, + "loss": 63.4686, + "step": 102150 + }, + { + "epoch": 0.4127393269957215, + "grad_norm": 934.3843383789062, + "learning_rate": 2.9473157400934687e-05, + "loss": 95.0739, + "step": 102160 + }, + { + "epoch": 0.41277972826108916, + "grad_norm": 689.4697265625, + "learning_rate": 2.9470697905303424e-05, + "loss": 126.1283, + "step": 102170 + }, + { + "epoch": 0.41282012952645675, + "grad_norm": 532.5220336914062, + "learning_rate": 2.946823822503603e-05, + "loss": 117.4854, + "step": 102180 + }, + { + "epoch": 0.4128605307918244, + "grad_norm": 533.8617553710938, + "learning_rate": 2.9465778360180457e-05, + "loss": 74.7991, + "step": 102190 + }, + { + "epoch": 0.412900932057192, + "grad_norm": 762.31494140625, + "learning_rate": 2.9463318310784664e-05, + "loss": 94.4617, + "step": 102200 + }, + { + "epoch": 0.41294133332255967, + "grad_norm": 482.3002624511719, + "learning_rate": 2.9460858076896612e-05, + "loss": 108.2751, + "step": 102210 + }, + { + "epoch": 0.4129817345879273, + "grad_norm": 816.73681640625, + "learning_rate": 2.9458397658564255e-05, + "loss": 67.9988, + "step": 102220 + }, + { + "epoch": 0.41302213585329495, + "grad_norm": 379.90625, + "learning_rate": 2.945593705583557e-05, + "loss": 119.2732, + "step": 102230 + }, + { + "epoch": 0.41306253711866253, + "grad_norm": 1255.748046875, + "learning_rate": 2.945347626875852e-05, + "loss": 153.5148, + "step": 102240 + }, + { + "epoch": 0.41310293838403017, + "grad_norm": 1754.71630859375, + "learning_rate": 2.9451015297381085e-05, + "loss": 110.7132, + "step": 102250 + }, + { + "epoch": 0.4131433396493978, + "grad_norm": 496.32440185546875, + "learning_rate": 2.9448554141751237e-05, + "loss": 57.599, + "step": 102260 + }, + { + "epoch": 0.41318374091476545, + "grad_norm": 918.6785278320312, + "learning_rate": 2.9446092801916964e-05, + "loss": 74.1492, + "step": 102270 + }, + { + "epoch": 0.4132241421801331, + "grad_norm": 1417.417724609375, + "learning_rate": 2.9443631277926257e-05, + "loss": 90.9676, + "step": 102280 + }, + { + "epoch": 0.41326454344550073, + "grad_norm": 639.9609375, + "learning_rate": 2.9441169569827087e-05, + "loss": 74.1897, + "step": 102290 + }, + { + "epoch": 0.41330494471086837, + "grad_norm": 361.1966552734375, + "learning_rate": 2.9438707677667458e-05, + "loss": 61.9638, + "step": 102300 + }, + { + "epoch": 0.41334534597623596, + "grad_norm": 696.9091186523438, + "learning_rate": 2.9436245601495363e-05, + "loss": 95.3536, + "step": 102310 + }, + { + "epoch": 0.4133857472416036, + "grad_norm": 741.5752563476562, + "learning_rate": 2.9433783341358807e-05, + "loss": 69.8527, + "step": 102320 + }, + { + "epoch": 0.41342614850697124, + "grad_norm": 1936.1826171875, + "learning_rate": 2.943132089730578e-05, + "loss": 77.4569, + "step": 102330 + }, + { + "epoch": 0.4134665497723389, + "grad_norm": 541.037353515625, + "learning_rate": 2.94288582693843e-05, + "loss": 85.2763, + "step": 102340 + }, + { + "epoch": 0.4135069510377065, + "grad_norm": 940.5930786132812, + "learning_rate": 2.942639545764237e-05, + "loss": 112.4567, + "step": 102350 + }, + { + "epoch": 0.41354735230307416, + "grad_norm": 544.6072998046875, + "learning_rate": 2.9423932462128015e-05, + "loss": 73.5299, + "step": 102360 + }, + { + "epoch": 0.41358775356844174, + "grad_norm": 1121.7386474609375, + "learning_rate": 2.942146928288924e-05, + "loss": 65.2916, + "step": 102370 + }, + { + "epoch": 0.4136281548338094, + "grad_norm": 457.3451843261719, + "learning_rate": 2.9419005919974073e-05, + "loss": 118.049, + "step": 102380 + }, + { + "epoch": 0.413668556099177, + "grad_norm": 422.0569763183594, + "learning_rate": 2.9416542373430538e-05, + "loss": 72.5048, + "step": 102390 + }, + { + "epoch": 0.41370895736454466, + "grad_norm": 860.5277099609375, + "learning_rate": 2.941407864330666e-05, + "loss": 59.2924, + "step": 102400 + }, + { + "epoch": 0.4137493586299123, + "grad_norm": 523.949462890625, + "learning_rate": 2.9411614729650467e-05, + "loss": 52.0407, + "step": 102410 + }, + { + "epoch": 0.41378975989527994, + "grad_norm": 947.2323608398438, + "learning_rate": 2.940915063251e-05, + "loss": 82.7512, + "step": 102420 + }, + { + "epoch": 0.4138301611606475, + "grad_norm": 693.1100463867188, + "learning_rate": 2.94066863519333e-05, + "loss": 83.4489, + "step": 102430 + }, + { + "epoch": 0.41387056242601516, + "grad_norm": 531.5813598632812, + "learning_rate": 2.9404221887968406e-05, + "loss": 77.2275, + "step": 102440 + }, + { + "epoch": 0.4139109636913828, + "grad_norm": 414.62335205078125, + "learning_rate": 2.9401757240663368e-05, + "loss": 52.1728, + "step": 102450 + }, + { + "epoch": 0.41395136495675044, + "grad_norm": 1057.8563232421875, + "learning_rate": 2.939929241006623e-05, + "loss": 78.1879, + "step": 102460 + }, + { + "epoch": 0.4139917662221181, + "grad_norm": 4222.7783203125, + "learning_rate": 2.9396827396225052e-05, + "loss": 100.7438, + "step": 102470 + }, + { + "epoch": 0.4140321674874857, + "grad_norm": 1265.8741455078125, + "learning_rate": 2.939436219918788e-05, + "loss": 140.8602, + "step": 102480 + }, + { + "epoch": 0.41407256875285336, + "grad_norm": 2843.1806640625, + "learning_rate": 2.9391896819002782e-05, + "loss": 117.0677, + "step": 102490 + }, + { + "epoch": 0.41411297001822095, + "grad_norm": 874.6304931640625, + "learning_rate": 2.938943125571782e-05, + "loss": 133.541, + "step": 102500 + }, + { + "epoch": 0.4141533712835886, + "grad_norm": 688.3585205078125, + "learning_rate": 2.9386965509381057e-05, + "loss": 100.0725, + "step": 102510 + }, + { + "epoch": 0.41419377254895623, + "grad_norm": 648.43603515625, + "learning_rate": 2.9384499580040576e-05, + "loss": 60.0707, + "step": 102520 + }, + { + "epoch": 0.41423417381432387, + "grad_norm": 513.8038940429688, + "learning_rate": 2.9382033467744442e-05, + "loss": 63.8693, + "step": 102530 + }, + { + "epoch": 0.4142745750796915, + "grad_norm": 629.941650390625, + "learning_rate": 2.9379567172540735e-05, + "loss": 106.7182, + "step": 102540 + }, + { + "epoch": 0.41431497634505915, + "grad_norm": 473.29302978515625, + "learning_rate": 2.937710069447754e-05, + "loss": 58.309, + "step": 102550 + }, + { + "epoch": 0.41435537761042673, + "grad_norm": 800.0940551757812, + "learning_rate": 2.9374634033602937e-05, + "loss": 87.9695, + "step": 102560 + }, + { + "epoch": 0.4143957788757944, + "grad_norm": 472.98126220703125, + "learning_rate": 2.9372167189965014e-05, + "loss": 78.3608, + "step": 102570 + }, + { + "epoch": 0.414436180141162, + "grad_norm": 399.52752685546875, + "learning_rate": 2.936970016361187e-05, + "loss": 103.0284, + "step": 102580 + }, + { + "epoch": 0.41447658140652965, + "grad_norm": 858.9774169921875, + "learning_rate": 2.9367232954591593e-05, + "loss": 94.3255, + "step": 102590 + }, + { + "epoch": 0.4145169826718973, + "grad_norm": 606.3959350585938, + "learning_rate": 2.936476556295229e-05, + "loss": 73.3241, + "step": 102600 + }, + { + "epoch": 0.41455738393726493, + "grad_norm": 719.265380859375, + "learning_rate": 2.9362297988742064e-05, + "loss": 63.846, + "step": 102610 + }, + { + "epoch": 0.4145977852026326, + "grad_norm": 572.8154907226562, + "learning_rate": 2.9359830232009018e-05, + "loss": 107.2589, + "step": 102620 + }, + { + "epoch": 0.41463818646800016, + "grad_norm": 523.6227416992188, + "learning_rate": 2.9357362292801255e-05, + "loss": 99.1469, + "step": 102630 + }, + { + "epoch": 0.4146785877333678, + "grad_norm": 982.0803833007812, + "learning_rate": 2.9354894171166906e-05, + "loss": 63.305, + "step": 102640 + }, + { + "epoch": 0.41471898899873544, + "grad_norm": 607.8756713867188, + "learning_rate": 2.935242586715408e-05, + "loss": 64.2629, + "step": 102650 + }, + { + "epoch": 0.4147593902641031, + "grad_norm": 782.0191650390625, + "learning_rate": 2.9349957380810893e-05, + "loss": 72.6717, + "step": 102660 + }, + { + "epoch": 0.4147997915294707, + "grad_norm": 1487.702880859375, + "learning_rate": 2.9347488712185472e-05, + "loss": 112.4669, + "step": 102670 + }, + { + "epoch": 0.41484019279483836, + "grad_norm": 493.4783935546875, + "learning_rate": 2.9345019861325944e-05, + "loss": 77.6424, + "step": 102680 + }, + { + "epoch": 0.41488059406020594, + "grad_norm": 885.004150390625, + "learning_rate": 2.9342550828280443e-05, + "loss": 112.8622, + "step": 102690 + }, + { + "epoch": 0.4149209953255736, + "grad_norm": 928.6383666992188, + "learning_rate": 2.934008161309711e-05, + "loss": 69.7193, + "step": 102700 + }, + { + "epoch": 0.4149613965909412, + "grad_norm": 290.48583984375, + "learning_rate": 2.933761221582407e-05, + "loss": 91.9459, + "step": 102710 + }, + { + "epoch": 0.41500179785630886, + "grad_norm": 736.0753784179688, + "learning_rate": 2.9335142636509482e-05, + "loss": 61.2773, + "step": 102720 + }, + { + "epoch": 0.4150421991216765, + "grad_norm": 601.7123413085938, + "learning_rate": 2.9332672875201476e-05, + "loss": 75.4896, + "step": 102730 + }, + { + "epoch": 0.41508260038704414, + "grad_norm": 647.9940795898438, + "learning_rate": 2.9330202931948205e-05, + "loss": 94.6208, + "step": 102740 + }, + { + "epoch": 0.4151230016524117, + "grad_norm": 1088.4774169921875, + "learning_rate": 2.932773280679783e-05, + "loss": 66.5816, + "step": 102750 + }, + { + "epoch": 0.41516340291777937, + "grad_norm": 779.9930419921875, + "learning_rate": 2.9325262499798497e-05, + "loss": 119.7711, + "step": 102760 + }, + { + "epoch": 0.415203804183147, + "grad_norm": 497.36810302734375, + "learning_rate": 2.9322792010998372e-05, + "loss": 86.7908, + "step": 102770 + }, + { + "epoch": 0.41524420544851465, + "grad_norm": 880.1007080078125, + "learning_rate": 2.932032134044562e-05, + "loss": 77.7764, + "step": 102780 + }, + { + "epoch": 0.4152846067138823, + "grad_norm": 783.8365478515625, + "learning_rate": 2.9317850488188394e-05, + "loss": 64.9244, + "step": 102790 + }, + { + "epoch": 0.4153250079792499, + "grad_norm": 570.3333129882812, + "learning_rate": 2.9315379454274886e-05, + "loss": 69.5671, + "step": 102800 + }, + { + "epoch": 0.41536540924461757, + "grad_norm": 759.520751953125, + "learning_rate": 2.9312908238753262e-05, + "loss": 83.6992, + "step": 102810 + }, + { + "epoch": 0.41540581050998515, + "grad_norm": 932.0973510742188, + "learning_rate": 2.931043684167169e-05, + "loss": 109.0697, + "step": 102820 + }, + { + "epoch": 0.4154462117753528, + "grad_norm": 741.29638671875, + "learning_rate": 2.9307965263078366e-05, + "loss": 106.0171, + "step": 102830 + }, + { + "epoch": 0.41548661304072043, + "grad_norm": 517.0711059570312, + "learning_rate": 2.930549350302146e-05, + "loss": 82.2335, + "step": 102840 + }, + { + "epoch": 0.41552701430608807, + "grad_norm": 846.7621459960938, + "learning_rate": 2.930302156154917e-05, + "loss": 78.3867, + "step": 102850 + }, + { + "epoch": 0.4155674155714557, + "grad_norm": 255.9545135498047, + "learning_rate": 2.9300549438709686e-05, + "loss": 105.1691, + "step": 102860 + }, + { + "epoch": 0.41560781683682335, + "grad_norm": 1163.7276611328125, + "learning_rate": 2.92980771345512e-05, + "loss": 106.2785, + "step": 102870 + }, + { + "epoch": 0.41564821810219094, + "grad_norm": 733.15673828125, + "learning_rate": 2.9295604649121912e-05, + "loss": 43.9508, + "step": 102880 + }, + { + "epoch": 0.4156886193675586, + "grad_norm": 477.54119873046875, + "learning_rate": 2.929313198247003e-05, + "loss": 74.4056, + "step": 102890 + }, + { + "epoch": 0.4157290206329262, + "grad_norm": 512.30908203125, + "learning_rate": 2.929065913464376e-05, + "loss": 63.5092, + "step": 102900 + }, + { + "epoch": 0.41576942189829386, + "grad_norm": 666.0638427734375, + "learning_rate": 2.9288186105691298e-05, + "loss": 67.694, + "step": 102910 + }, + { + "epoch": 0.4158098231636615, + "grad_norm": 700.628173828125, + "learning_rate": 2.9285712895660868e-05, + "loss": 118.304, + "step": 102920 + }, + { + "epoch": 0.41585022442902914, + "grad_norm": 617.6477661132812, + "learning_rate": 2.9283239504600686e-05, + "loss": 64.1366, + "step": 102930 + }, + { + "epoch": 0.4158906256943968, + "grad_norm": 442.2611083984375, + "learning_rate": 2.928076593255897e-05, + "loss": 73.9806, + "step": 102940 + }, + { + "epoch": 0.41593102695976436, + "grad_norm": 395.4718017578125, + "learning_rate": 2.9278292179583943e-05, + "loss": 68.321, + "step": 102950 + }, + { + "epoch": 0.415971428225132, + "grad_norm": 759.212158203125, + "learning_rate": 2.9275818245723836e-05, + "loss": 76.3137, + "step": 102960 + }, + { + "epoch": 0.41601182949049964, + "grad_norm": 1543.3028564453125, + "learning_rate": 2.927334413102687e-05, + "loss": 100.1201, + "step": 102970 + }, + { + "epoch": 0.4160522307558673, + "grad_norm": 1056.102294921875, + "learning_rate": 2.9270869835541295e-05, + "loss": 59.138, + "step": 102980 + }, + { + "epoch": 0.4160926320212349, + "grad_norm": 466.1551208496094, + "learning_rate": 2.926839535931534e-05, + "loss": 107.2554, + "step": 102990 + }, + { + "epoch": 0.41613303328660256, + "grad_norm": 694.722900390625, + "learning_rate": 2.926592070239724e-05, + "loss": 79.4341, + "step": 103000 + }, + { + "epoch": 0.41617343455197015, + "grad_norm": 334.9779968261719, + "learning_rate": 2.9263445864835244e-05, + "loss": 67.6969, + "step": 103010 + }, + { + "epoch": 0.4162138358173378, + "grad_norm": 375.42999267578125, + "learning_rate": 2.9260970846677605e-05, + "loss": 84.2631, + "step": 103020 + }, + { + "epoch": 0.4162542370827054, + "grad_norm": 662.0013427734375, + "learning_rate": 2.9258495647972572e-05, + "loss": 67.946, + "step": 103030 + }, + { + "epoch": 0.41629463834807306, + "grad_norm": 464.0340881347656, + "learning_rate": 2.9256020268768396e-05, + "loss": 77.359, + "step": 103040 + }, + { + "epoch": 0.4163350396134407, + "grad_norm": 373.83929443359375, + "learning_rate": 2.925354470911334e-05, + "loss": 100.2022, + "step": 103050 + }, + { + "epoch": 0.41637544087880834, + "grad_norm": 425.1412353515625, + "learning_rate": 2.9251068969055674e-05, + "loss": 84.7438, + "step": 103060 + }, + { + "epoch": 0.41641584214417593, + "grad_norm": 784.388671875, + "learning_rate": 2.9248593048643653e-05, + "loss": 75.9624, + "step": 103070 + }, + { + "epoch": 0.41645624340954357, + "grad_norm": 1088.4742431640625, + "learning_rate": 2.924611694792554e-05, + "loss": 126.5729, + "step": 103080 + }, + { + "epoch": 0.4164966446749112, + "grad_norm": 827.8613891601562, + "learning_rate": 2.9243640666949624e-05, + "loss": 86.3651, + "step": 103090 + }, + { + "epoch": 0.41653704594027885, + "grad_norm": 961.62939453125, + "learning_rate": 2.9241164205764176e-05, + "loss": 103.3194, + "step": 103100 + }, + { + "epoch": 0.4165774472056465, + "grad_norm": 698.69921875, + "learning_rate": 2.923868756441747e-05, + "loss": 60.2846, + "step": 103110 + }, + { + "epoch": 0.41661784847101413, + "grad_norm": 742.6216430664062, + "learning_rate": 2.9236210742957793e-05, + "loss": 84.8511, + "step": 103120 + }, + { + "epoch": 0.41665824973638177, + "grad_norm": 717.4282836914062, + "learning_rate": 2.923373374143343e-05, + "loss": 81.0356, + "step": 103130 + }, + { + "epoch": 0.41669865100174935, + "grad_norm": 1212.7132568359375, + "learning_rate": 2.9231256559892683e-05, + "loss": 65.7451, + "step": 103140 + }, + { + "epoch": 0.416739052267117, + "grad_norm": 627.8330078125, + "learning_rate": 2.9228779198383834e-05, + "loss": 97.9387, + "step": 103150 + }, + { + "epoch": 0.41677945353248463, + "grad_norm": 405.49359130859375, + "learning_rate": 2.9226301656955177e-05, + "loss": 91.1168, + "step": 103160 + }, + { + "epoch": 0.4168198547978523, + "grad_norm": 699.6193237304688, + "learning_rate": 2.9223823935655026e-05, + "loss": 96.5633, + "step": 103170 + }, + { + "epoch": 0.4168602560632199, + "grad_norm": 869.2167358398438, + "learning_rate": 2.922134603453168e-05, + "loss": 56.2502, + "step": 103180 + }, + { + "epoch": 0.41690065732858755, + "grad_norm": 846.4490356445312, + "learning_rate": 2.9218867953633435e-05, + "loss": 112.2865, + "step": 103190 + }, + { + "epoch": 0.41694105859395514, + "grad_norm": 959.8173828125, + "learning_rate": 2.9216389693008622e-05, + "loss": 87.8304, + "step": 103200 + }, + { + "epoch": 0.4169814598593228, + "grad_norm": 1104.97119140625, + "learning_rate": 2.921391125270554e-05, + "loss": 68.9894, + "step": 103210 + }, + { + "epoch": 0.4170218611246904, + "grad_norm": 688.3469848632812, + "learning_rate": 2.9211432632772525e-05, + "loss": 108.4339, + "step": 103220 + }, + { + "epoch": 0.41706226239005806, + "grad_norm": 639.7540893554688, + "learning_rate": 2.920895383325788e-05, + "loss": 52.0688, + "step": 103230 + }, + { + "epoch": 0.4171026636554257, + "grad_norm": 1160.4830322265625, + "learning_rate": 2.920647485420994e-05, + "loss": 84.3542, + "step": 103240 + }, + { + "epoch": 0.41714306492079334, + "grad_norm": 946.0479736328125, + "learning_rate": 2.9203995695677036e-05, + "loss": 98.7777, + "step": 103250 + }, + { + "epoch": 0.4171834661861609, + "grad_norm": 654.8866577148438, + "learning_rate": 2.9201516357707498e-05, + "loss": 82.909, + "step": 103260 + }, + { + "epoch": 0.41722386745152856, + "grad_norm": 539.5443725585938, + "learning_rate": 2.919903684034966e-05, + "loss": 76.9844, + "step": 103270 + }, + { + "epoch": 0.4172642687168962, + "grad_norm": 520.18798828125, + "learning_rate": 2.919655714365186e-05, + "loss": 82.4719, + "step": 103280 + }, + { + "epoch": 0.41730466998226384, + "grad_norm": 407.04986572265625, + "learning_rate": 2.9194077267662445e-05, + "loss": 79.3716, + "step": 103290 + }, + { + "epoch": 0.4173450712476315, + "grad_norm": 636.1814575195312, + "learning_rate": 2.9191597212429763e-05, + "loss": 89.894, + "step": 103300 + }, + { + "epoch": 0.4173854725129991, + "grad_norm": 688.3976440429688, + "learning_rate": 2.9189116978002156e-05, + "loss": 94.5153, + "step": 103310 + }, + { + "epoch": 0.41742587377836676, + "grad_norm": 850.574951171875, + "learning_rate": 2.9186636564427985e-05, + "loss": 102.0475, + "step": 103320 + }, + { + "epoch": 0.41746627504373435, + "grad_norm": 877.7534790039062, + "learning_rate": 2.918415597175561e-05, + "loss": 82.5757, + "step": 103330 + }, + { + "epoch": 0.417506676309102, + "grad_norm": 776.1998291015625, + "learning_rate": 2.918167520003338e-05, + "loss": 143.4587, + "step": 103340 + }, + { + "epoch": 0.4175470775744696, + "grad_norm": 776.315673828125, + "learning_rate": 2.9179194249309667e-05, + "loss": 70.2369, + "step": 103350 + }, + { + "epoch": 0.41758747883983727, + "grad_norm": 900.3502197265625, + "learning_rate": 2.9176713119632833e-05, + "loss": 111.9323, + "step": 103360 + }, + { + "epoch": 0.4176278801052049, + "grad_norm": 745.75927734375, + "learning_rate": 2.9174231811051253e-05, + "loss": 73.3711, + "step": 103370 + }, + { + "epoch": 0.41766828137057255, + "grad_norm": 748.3355102539062, + "learning_rate": 2.9171750323613305e-05, + "loss": 60.0089, + "step": 103380 + }, + { + "epoch": 0.41770868263594013, + "grad_norm": 708.4754028320312, + "learning_rate": 2.916926865736736e-05, + "loss": 71.7029, + "step": 103390 + }, + { + "epoch": 0.41774908390130777, + "grad_norm": 376.1477966308594, + "learning_rate": 2.9166786812361797e-05, + "loss": 84.9781, + "step": 103400 + }, + { + "epoch": 0.4177894851666754, + "grad_norm": 706.2715454101562, + "learning_rate": 2.9164304788645013e-05, + "loss": 67.3099, + "step": 103410 + }, + { + "epoch": 0.41782988643204305, + "grad_norm": 568.7808227539062, + "learning_rate": 2.9161822586265387e-05, + "loss": 102.8973, + "step": 103420 + }, + { + "epoch": 0.4178702876974107, + "grad_norm": 1024.5345458984375, + "learning_rate": 2.9159340205271313e-05, + "loss": 53.6633, + "step": 103430 + }, + { + "epoch": 0.41791068896277833, + "grad_norm": 1002.703125, + "learning_rate": 2.9156857645711184e-05, + "loss": 80.6511, + "step": 103440 + }, + { + "epoch": 0.41795109022814597, + "grad_norm": 722.1302490234375, + "learning_rate": 2.9154374907633403e-05, + "loss": 73.9681, + "step": 103450 + }, + { + "epoch": 0.41799149149351356, + "grad_norm": 875.416748046875, + "learning_rate": 2.9151891991086367e-05, + "loss": 84.7868, + "step": 103460 + }, + { + "epoch": 0.4180318927588812, + "grad_norm": 1315.9083251953125, + "learning_rate": 2.914940889611849e-05, + "loss": 101.2237, + "step": 103470 + }, + { + "epoch": 0.41807229402424884, + "grad_norm": 702.8494262695312, + "learning_rate": 2.9146925622778175e-05, + "loss": 64.0932, + "step": 103480 + }, + { + "epoch": 0.4181126952896165, + "grad_norm": 948.8506469726562, + "learning_rate": 2.9144442171113835e-05, + "loss": 85.8233, + "step": 103490 + }, + { + "epoch": 0.4181530965549841, + "grad_norm": 772.9378051757812, + "learning_rate": 2.914195854117389e-05, + "loss": 57.9209, + "step": 103500 + }, + { + "epoch": 0.41819349782035176, + "grad_norm": 406.3412170410156, + "learning_rate": 2.913947473300675e-05, + "loss": 89.0092, + "step": 103510 + }, + { + "epoch": 0.41823389908571934, + "grad_norm": 697.8853759765625, + "learning_rate": 2.913699074666085e-05, + "loss": 85.0419, + "step": 103520 + }, + { + "epoch": 0.418274300351087, + "grad_norm": 403.1318664550781, + "learning_rate": 2.9134506582184606e-05, + "loss": 68.0773, + "step": 103530 + }, + { + "epoch": 0.4183147016164546, + "grad_norm": 982.935791015625, + "learning_rate": 2.9132022239626456e-05, + "loss": 61.8023, + "step": 103540 + }, + { + "epoch": 0.41835510288182226, + "grad_norm": 703.4404907226562, + "learning_rate": 2.9129537719034835e-05, + "loss": 83.7917, + "step": 103550 + }, + { + "epoch": 0.4183955041471899, + "grad_norm": 464.0213623046875, + "learning_rate": 2.912705302045817e-05, + "loss": 77.3529, + "step": 103560 + }, + { + "epoch": 0.41843590541255754, + "grad_norm": 1040.42236328125, + "learning_rate": 2.912456814394491e-05, + "loss": 77.8843, + "step": 103570 + }, + { + "epoch": 0.4184763066779251, + "grad_norm": 764.7724609375, + "learning_rate": 2.91220830895435e-05, + "loss": 82.9925, + "step": 103580 + }, + { + "epoch": 0.41851670794329277, + "grad_norm": 429.0096435546875, + "learning_rate": 2.9119597857302378e-05, + "loss": 85.6849, + "step": 103590 + }, + { + "epoch": 0.4185571092086604, + "grad_norm": 774.899658203125, + "learning_rate": 2.9117112447270007e-05, + "loss": 86.1154, + "step": 103600 + }, + { + "epoch": 0.41859751047402805, + "grad_norm": 248.62535095214844, + "learning_rate": 2.9114626859494826e-05, + "loss": 70.1185, + "step": 103610 + }, + { + "epoch": 0.4186379117393957, + "grad_norm": 598.5653076171875, + "learning_rate": 2.9112141094025306e-05, + "loss": 97.5988, + "step": 103620 + }, + { + "epoch": 0.4186783130047633, + "grad_norm": 384.2664794921875, + "learning_rate": 2.910965515090991e-05, + "loss": 98.9108, + "step": 103630 + }, + { + "epoch": 0.41871871427013097, + "grad_norm": 619.362060546875, + "learning_rate": 2.910716903019709e-05, + "loss": 93.1227, + "step": 103640 + }, + { + "epoch": 0.41875911553549855, + "grad_norm": 1446.725830078125, + "learning_rate": 2.910468273193532e-05, + "loss": 86.9909, + "step": 103650 + }, + { + "epoch": 0.4187995168008662, + "grad_norm": 297.9083557128906, + "learning_rate": 2.9102196256173077e-05, + "loss": 99.7383, + "step": 103660 + }, + { + "epoch": 0.41883991806623383, + "grad_norm": 674.8902587890625, + "learning_rate": 2.909970960295883e-05, + "loss": 88.7265, + "step": 103670 + }, + { + "epoch": 0.41888031933160147, + "grad_norm": 1060.3572998046875, + "learning_rate": 2.9097222772341064e-05, + "loss": 66.5391, + "step": 103680 + }, + { + "epoch": 0.4189207205969691, + "grad_norm": 820.8623657226562, + "learning_rate": 2.909473576436825e-05, + "loss": 83.21, + "step": 103690 + }, + { + "epoch": 0.41896112186233675, + "grad_norm": 549.6026611328125, + "learning_rate": 2.909224857908888e-05, + "loss": 84.01, + "step": 103700 + }, + { + "epoch": 0.41900152312770433, + "grad_norm": 993.6141357421875, + "learning_rate": 2.9089761216551448e-05, + "loss": 131.1072, + "step": 103710 + }, + { + "epoch": 0.419041924393072, + "grad_norm": 816.5700073242188, + "learning_rate": 2.908727367680444e-05, + "loss": 90.2224, + "step": 103720 + }, + { + "epoch": 0.4190823256584396, + "grad_norm": 787.0625610351562, + "learning_rate": 2.908478595989635e-05, + "loss": 79.0271, + "step": 103730 + }, + { + "epoch": 0.41912272692380725, + "grad_norm": 515.69775390625, + "learning_rate": 2.908229806587568e-05, + "loss": 73.9338, + "step": 103740 + }, + { + "epoch": 0.4191631281891749, + "grad_norm": 432.7344665527344, + "learning_rate": 2.9079809994790937e-05, + "loss": 36.7718, + "step": 103750 + }, + { + "epoch": 0.41920352945454253, + "grad_norm": 846.4924926757812, + "learning_rate": 2.9077321746690623e-05, + "loss": 64.2515, + "step": 103760 + }, + { + "epoch": 0.4192439307199102, + "grad_norm": 814.8074340820312, + "learning_rate": 2.907483332162325e-05, + "loss": 110.2781, + "step": 103770 + }, + { + "epoch": 0.41928433198527776, + "grad_norm": 572.6931762695312, + "learning_rate": 2.9072344719637325e-05, + "loss": 70.0534, + "step": 103780 + }, + { + "epoch": 0.4193247332506454, + "grad_norm": 1141.197998046875, + "learning_rate": 2.9069855940781373e-05, + "loss": 110.1513, + "step": 103790 + }, + { + "epoch": 0.41936513451601304, + "grad_norm": 856.073974609375, + "learning_rate": 2.9067366985103908e-05, + "loss": 81.2237, + "step": 103800 + }, + { + "epoch": 0.4194055357813807, + "grad_norm": 1125.92919921875, + "learning_rate": 2.9064877852653452e-05, + "loss": 63.9564, + "step": 103810 + }, + { + "epoch": 0.4194459370467483, + "grad_norm": 2806.7490234375, + "learning_rate": 2.906238854347854e-05, + "loss": 61.888, + "step": 103820 + }, + { + "epoch": 0.41948633831211596, + "grad_norm": 740.0335083007812, + "learning_rate": 2.9059899057627697e-05, + "loss": 80.1686, + "step": 103830 + }, + { + "epoch": 0.41952673957748354, + "grad_norm": 1129.919189453125, + "learning_rate": 2.9057409395149457e-05, + "loss": 73.5674, + "step": 103840 + }, + { + "epoch": 0.4195671408428512, + "grad_norm": 643.6719970703125, + "learning_rate": 2.905491955609236e-05, + "loss": 95.5317, + "step": 103850 + }, + { + "epoch": 0.4196075421082188, + "grad_norm": 735.3046875, + "learning_rate": 2.9052429540504943e-05, + "loss": 61.6064, + "step": 103860 + }, + { + "epoch": 0.41964794337358646, + "grad_norm": 1198.06103515625, + "learning_rate": 2.904993934843575e-05, + "loss": 101.3035, + "step": 103870 + }, + { + "epoch": 0.4196883446389541, + "grad_norm": 941.5462646484375, + "learning_rate": 2.9047448979933332e-05, + "loss": 73.7928, + "step": 103880 + }, + { + "epoch": 0.41972874590432174, + "grad_norm": 433.3593444824219, + "learning_rate": 2.9044958435046234e-05, + "loss": 63.6045, + "step": 103890 + }, + { + "epoch": 0.4197691471696893, + "grad_norm": 998.6087036132812, + "learning_rate": 2.9042467713823015e-05, + "loss": 132.9427, + "step": 103900 + }, + { + "epoch": 0.41980954843505697, + "grad_norm": 667.7609252929688, + "learning_rate": 2.9039976816312242e-05, + "loss": 100.8477, + "step": 103910 + }, + { + "epoch": 0.4198499497004246, + "grad_norm": 835.2872924804688, + "learning_rate": 2.9037485742562458e-05, + "loss": 79.4456, + "step": 103920 + }, + { + "epoch": 0.41989035096579225, + "grad_norm": 395.7143859863281, + "learning_rate": 2.9034994492622232e-05, + "loss": 59.1453, + "step": 103930 + }, + { + "epoch": 0.4199307522311599, + "grad_norm": 938.9763793945312, + "learning_rate": 2.9032503066540152e-05, + "loss": 90.6419, + "step": 103940 + }, + { + "epoch": 0.4199711534965275, + "grad_norm": 456.6357421875, + "learning_rate": 2.9030011464364768e-05, + "loss": 93.0894, + "step": 103950 + }, + { + "epoch": 0.42001155476189517, + "grad_norm": 440.40087890625, + "learning_rate": 2.9027519686144655e-05, + "loss": 79.5251, + "step": 103960 + }, + { + "epoch": 0.42005195602726275, + "grad_norm": 625.7998657226562, + "learning_rate": 2.9025027731928405e-05, + "loss": 66.2964, + "step": 103970 + }, + { + "epoch": 0.4200923572926304, + "grad_norm": 535.0301513671875, + "learning_rate": 2.9022535601764587e-05, + "loss": 78.119, + "step": 103980 + }, + { + "epoch": 0.42013275855799803, + "grad_norm": 561.8072509765625, + "learning_rate": 2.902004329570179e-05, + "loss": 87.1498, + "step": 103990 + }, + { + "epoch": 0.42017315982336567, + "grad_norm": 871.46728515625, + "learning_rate": 2.9017550813788616e-05, + "loss": 76.3153, + "step": 104000 + }, + { + "epoch": 0.4202135610887333, + "grad_norm": 738.1829223632812, + "learning_rate": 2.9015058156073645e-05, + "loss": 60.6342, + "step": 104010 + }, + { + "epoch": 0.42025396235410095, + "grad_norm": 735.241943359375, + "learning_rate": 2.901256532260547e-05, + "loss": 86.5872, + "step": 104020 + }, + { + "epoch": 0.42029436361946854, + "grad_norm": 1209.6143798828125, + "learning_rate": 2.9010072313432693e-05, + "loss": 79.0249, + "step": 104030 + }, + { + "epoch": 0.4203347648848362, + "grad_norm": 617.3246459960938, + "learning_rate": 2.9007579128603924e-05, + "loss": 78.0693, + "step": 104040 + }, + { + "epoch": 0.4203751661502038, + "grad_norm": 1134.20361328125, + "learning_rate": 2.9005085768167753e-05, + "loss": 88.2406, + "step": 104050 + }, + { + "epoch": 0.42041556741557146, + "grad_norm": 1022.9719848632812, + "learning_rate": 2.9002592232172802e-05, + "loss": 62.8195, + "step": 104060 + }, + { + "epoch": 0.4204559686809391, + "grad_norm": 239.638671875, + "learning_rate": 2.9000098520667685e-05, + "loss": 96.6558, + "step": 104070 + }, + { + "epoch": 0.42049636994630674, + "grad_norm": 623.9303588867188, + "learning_rate": 2.8997604633701007e-05, + "loss": 77.3504, + "step": 104080 + }, + { + "epoch": 0.4205367712116744, + "grad_norm": 407.93792724609375, + "learning_rate": 2.8995110571321402e-05, + "loss": 45.9619, + "step": 104090 + }, + { + "epoch": 0.42057717247704196, + "grad_norm": 449.3992614746094, + "learning_rate": 2.899261633357748e-05, + "loss": 77.0531, + "step": 104100 + }, + { + "epoch": 0.4206175737424096, + "grad_norm": 611.480224609375, + "learning_rate": 2.8990121920517876e-05, + "loss": 60.5513, + "step": 104110 + }, + { + "epoch": 0.42065797500777724, + "grad_norm": 882.1436157226562, + "learning_rate": 2.8987627332191218e-05, + "loss": 82.5429, + "step": 104120 + }, + { + "epoch": 0.4206983762731449, + "grad_norm": 241.77342224121094, + "learning_rate": 2.8985132568646132e-05, + "loss": 78.0289, + "step": 104130 + }, + { + "epoch": 0.4207387775385125, + "grad_norm": 294.696533203125, + "learning_rate": 2.898263762993126e-05, + "loss": 76.8653, + "step": 104140 + }, + { + "epoch": 0.42077917880388016, + "grad_norm": 1163.3746337890625, + "learning_rate": 2.898014251609525e-05, + "loss": 80.9619, + "step": 104150 + }, + { + "epoch": 0.42081958006924775, + "grad_norm": 597.35546875, + "learning_rate": 2.8977647227186736e-05, + "loss": 82.2987, + "step": 104160 + }, + { + "epoch": 0.4208599813346154, + "grad_norm": 872.8321533203125, + "learning_rate": 2.8975151763254364e-05, + "loss": 94.0573, + "step": 104170 + }, + { + "epoch": 0.420900382599983, + "grad_norm": 746.0072631835938, + "learning_rate": 2.897265612434679e-05, + "loss": 86.6843, + "step": 104180 + }, + { + "epoch": 0.42094078386535067, + "grad_norm": 388.1343994140625, + "learning_rate": 2.8970160310512666e-05, + "loss": 70.5035, + "step": 104190 + }, + { + "epoch": 0.4209811851307183, + "grad_norm": 454.0213623046875, + "learning_rate": 2.8967664321800653e-05, + "loss": 55.1291, + "step": 104200 + }, + { + "epoch": 0.42102158639608595, + "grad_norm": 536.0286865234375, + "learning_rate": 2.89651681582594e-05, + "loss": 64.9818, + "step": 104210 + }, + { + "epoch": 0.42106198766145353, + "grad_norm": 1121.9163818359375, + "learning_rate": 2.896267181993758e-05, + "loss": 108.4052, + "step": 104220 + }, + { + "epoch": 0.42110238892682117, + "grad_norm": 1033.5213623046875, + "learning_rate": 2.8960175306883854e-05, + "loss": 101.1812, + "step": 104230 + }, + { + "epoch": 0.4211427901921888, + "grad_norm": 289.15594482421875, + "learning_rate": 2.8957678619146907e-05, + "loss": 72.9512, + "step": 104240 + }, + { + "epoch": 0.42118319145755645, + "grad_norm": 389.9089050292969, + "learning_rate": 2.8955181756775395e-05, + "loss": 62.4539, + "step": 104250 + }, + { + "epoch": 0.4212235927229241, + "grad_norm": 1762.1082763671875, + "learning_rate": 2.895268471981801e-05, + "loss": 102.8078, + "step": 104260 + }, + { + "epoch": 0.42126399398829173, + "grad_norm": 1047.668701171875, + "learning_rate": 2.8950187508323422e-05, + "loss": 88.0333, + "step": 104270 + }, + { + "epoch": 0.42130439525365937, + "grad_norm": 467.64599609375, + "learning_rate": 2.894769012234033e-05, + "loss": 70.8164, + "step": 104280 + }, + { + "epoch": 0.42134479651902695, + "grad_norm": 734.701416015625, + "learning_rate": 2.894519256191741e-05, + "loss": 71.3522, + "step": 104290 + }, + { + "epoch": 0.4213851977843946, + "grad_norm": 915.85888671875, + "learning_rate": 2.8942694827103346e-05, + "loss": 69.9152, + "step": 104300 + }, + { + "epoch": 0.42142559904976223, + "grad_norm": 512.060302734375, + "learning_rate": 2.894019691794685e-05, + "loss": 91.8809, + "step": 104310 + }, + { + "epoch": 0.4214660003151299, + "grad_norm": 973.54345703125, + "learning_rate": 2.893769883449661e-05, + "loss": 69.9998, + "step": 104320 + }, + { + "epoch": 0.4215064015804975, + "grad_norm": 359.21929931640625, + "learning_rate": 2.893520057680133e-05, + "loss": 60.4851, + "step": 104330 + }, + { + "epoch": 0.42154680284586515, + "grad_norm": 1068.87353515625, + "learning_rate": 2.8932702144909713e-05, + "loss": 97.0551, + "step": 104340 + }, + { + "epoch": 0.42158720411123274, + "grad_norm": 1693.8504638671875, + "learning_rate": 2.893020353887047e-05, + "loss": 79.85, + "step": 104350 + }, + { + "epoch": 0.4216276053766004, + "grad_norm": 648.1838989257812, + "learning_rate": 2.892770475873232e-05, + "loss": 78.3473, + "step": 104360 + }, + { + "epoch": 0.421668006641968, + "grad_norm": 556.5646362304688, + "learning_rate": 2.892520580454396e-05, + "loss": 85.455, + "step": 104370 + }, + { + "epoch": 0.42170840790733566, + "grad_norm": 802.2183227539062, + "learning_rate": 2.892270667635412e-05, + "loss": 56.0786, + "step": 104380 + }, + { + "epoch": 0.4217488091727033, + "grad_norm": 1538.5386962890625, + "learning_rate": 2.8920207374211516e-05, + "loss": 67.3399, + "step": 104390 + }, + { + "epoch": 0.42178921043807094, + "grad_norm": 527.6337280273438, + "learning_rate": 2.8917707898164886e-05, + "loss": 54.8555, + "step": 104400 + }, + { + "epoch": 0.4218296117034386, + "grad_norm": 913.2432861328125, + "learning_rate": 2.891520824826294e-05, + "loss": 135.8501, + "step": 104410 + }, + { + "epoch": 0.42187001296880616, + "grad_norm": 734.3636474609375, + "learning_rate": 2.891270842455442e-05, + "loss": 54.7417, + "step": 104420 + }, + { + "epoch": 0.4219104142341738, + "grad_norm": 618.9473876953125, + "learning_rate": 2.891020842708806e-05, + "loss": 115.2495, + "step": 104430 + }, + { + "epoch": 0.42195081549954144, + "grad_norm": 912.1622314453125, + "learning_rate": 2.890770825591261e-05, + "loss": 71.1039, + "step": 104440 + }, + { + "epoch": 0.4219912167649091, + "grad_norm": 819.3677368164062, + "learning_rate": 2.8905207911076795e-05, + "loss": 88.8971, + "step": 104450 + }, + { + "epoch": 0.4220316180302767, + "grad_norm": 340.776123046875, + "learning_rate": 2.8902707392629366e-05, + "loss": 55.6186, + "step": 104460 + }, + { + "epoch": 0.42207201929564436, + "grad_norm": 671.2758178710938, + "learning_rate": 2.8900206700619072e-05, + "loss": 87.2669, + "step": 104470 + }, + { + "epoch": 0.42211242056101195, + "grad_norm": 853.0125732421875, + "learning_rate": 2.889770583509467e-05, + "loss": 104.3613, + "step": 104480 + }, + { + "epoch": 0.4221528218263796, + "grad_norm": 648.9463500976562, + "learning_rate": 2.8895204796104913e-05, + "loss": 75.5523, + "step": 104490 + }, + { + "epoch": 0.4221932230917472, + "grad_norm": 444.09344482421875, + "learning_rate": 2.8892703583698553e-05, + "loss": 89.9276, + "step": 104500 + }, + { + "epoch": 0.42223362435711487, + "grad_norm": 515.7337036132812, + "learning_rate": 2.889020219792436e-05, + "loss": 69.8114, + "step": 104510 + }, + { + "epoch": 0.4222740256224825, + "grad_norm": 900.69580078125, + "learning_rate": 2.8887700638831098e-05, + "loss": 77.4262, + "step": 104520 + }, + { + "epoch": 0.42231442688785015, + "grad_norm": 612.1874389648438, + "learning_rate": 2.888519890646754e-05, + "loss": 57.9855, + "step": 104530 + }, + { + "epoch": 0.42235482815321773, + "grad_norm": 541.9161376953125, + "learning_rate": 2.8882697000882455e-05, + "loss": 66.426, + "step": 104540 + }, + { + "epoch": 0.42239522941858537, + "grad_norm": 715.2144775390625, + "learning_rate": 2.8880194922124616e-05, + "loss": 78.9262, + "step": 104550 + }, + { + "epoch": 0.422435630683953, + "grad_norm": 454.2840576171875, + "learning_rate": 2.8877692670242803e-05, + "loss": 101.243, + "step": 104560 + }, + { + "epoch": 0.42247603194932065, + "grad_norm": 636.487060546875, + "learning_rate": 2.8875190245285804e-05, + "loss": 73.6134, + "step": 104570 + }, + { + "epoch": 0.4225164332146883, + "grad_norm": 703.4803466796875, + "learning_rate": 2.88726876473024e-05, + "loss": 57.7372, + "step": 104580 + }, + { + "epoch": 0.42255683448005593, + "grad_norm": 418.8224792480469, + "learning_rate": 2.8870184876341385e-05, + "loss": 72.4984, + "step": 104590 + }, + { + "epoch": 0.42259723574542357, + "grad_norm": 500.6607971191406, + "learning_rate": 2.8867681932451544e-05, + "loss": 90.4348, + "step": 104600 + }, + { + "epoch": 0.42263763701079116, + "grad_norm": 362.71783447265625, + "learning_rate": 2.8865178815681685e-05, + "loss": 95.8614, + "step": 104610 + }, + { + "epoch": 0.4226780382761588, + "grad_norm": 829.4452514648438, + "learning_rate": 2.8862675526080595e-05, + "loss": 77.2645, + "step": 104620 + }, + { + "epoch": 0.42271843954152644, + "grad_norm": 949.4658813476562, + "learning_rate": 2.886017206369709e-05, + "loss": 98.5984, + "step": 104630 + }, + { + "epoch": 0.4227588408068941, + "grad_norm": 967.6311645507812, + "learning_rate": 2.8857668428579964e-05, + "loss": 88.886, + "step": 104640 + }, + { + "epoch": 0.4227992420722617, + "grad_norm": 951.359375, + "learning_rate": 2.885516462077804e-05, + "loss": 82.3266, + "step": 104650 + }, + { + "epoch": 0.42283964333762936, + "grad_norm": 6447.5732421875, + "learning_rate": 2.885266064034011e-05, + "loss": 72.3754, + "step": 104660 + }, + { + "epoch": 0.42288004460299694, + "grad_norm": 508.2853698730469, + "learning_rate": 2.885015648731501e-05, + "loss": 92.9138, + "step": 104670 + }, + { + "epoch": 0.4229204458683646, + "grad_norm": 830.9992065429688, + "learning_rate": 2.8847652161751546e-05, + "loss": 90.3937, + "step": 104680 + }, + { + "epoch": 0.4229608471337322, + "grad_norm": 473.1213684082031, + "learning_rate": 2.8845147663698556e-05, + "loss": 63.2747, + "step": 104690 + }, + { + "epoch": 0.42300124839909986, + "grad_norm": 972.2019653320312, + "learning_rate": 2.8842642993204856e-05, + "loss": 77.3634, + "step": 104700 + }, + { + "epoch": 0.4230416496644675, + "grad_norm": 1102.3114013671875, + "learning_rate": 2.884013815031928e-05, + "loss": 105.688, + "step": 104710 + }, + { + "epoch": 0.42308205092983514, + "grad_norm": 625.1220092773438, + "learning_rate": 2.8837633135090656e-05, + "loss": 63.0207, + "step": 104720 + }, + { + "epoch": 0.4231224521952028, + "grad_norm": 1619.56201171875, + "learning_rate": 2.8835127947567828e-05, + "loss": 88.7588, + "step": 104730 + }, + { + "epoch": 0.42316285346057037, + "grad_norm": 1547.9202880859375, + "learning_rate": 2.883262258779963e-05, + "loss": 97.9992, + "step": 104740 + }, + { + "epoch": 0.423203254725938, + "grad_norm": 4228.17138671875, + "learning_rate": 2.8830117055834908e-05, + "loss": 152.9501, + "step": 104750 + }, + { + "epoch": 0.42324365599130565, + "grad_norm": 591.4442138671875, + "learning_rate": 2.8827611351722505e-05, + "loss": 57.0487, + "step": 104760 + }, + { + "epoch": 0.4232840572566733, + "grad_norm": 751.7659912109375, + "learning_rate": 2.8825105475511283e-05, + "loss": 118.1129, + "step": 104770 + }, + { + "epoch": 0.4233244585220409, + "grad_norm": 745.6287231445312, + "learning_rate": 2.8822599427250074e-05, + "loss": 122.8834, + "step": 104780 + }, + { + "epoch": 0.42336485978740857, + "grad_norm": 623.7366943359375, + "learning_rate": 2.8820093206987754e-05, + "loss": 127.6994, + "step": 104790 + }, + { + "epoch": 0.42340526105277615, + "grad_norm": 947.1618041992188, + "learning_rate": 2.8817586814773174e-05, + "loss": 82.8127, + "step": 104800 + }, + { + "epoch": 0.4234456623181438, + "grad_norm": 1158.8897705078125, + "learning_rate": 2.8815080250655203e-05, + "loss": 97.0987, + "step": 104810 + }, + { + "epoch": 0.42348606358351143, + "grad_norm": 660.35791015625, + "learning_rate": 2.88125735146827e-05, + "loss": 72.7118, + "step": 104820 + }, + { + "epoch": 0.42352646484887907, + "grad_norm": 725.7471313476562, + "learning_rate": 2.881006660690454e-05, + "loss": 79.5109, + "step": 104830 + }, + { + "epoch": 0.4235668661142467, + "grad_norm": 230.72335815429688, + "learning_rate": 2.8807559527369594e-05, + "loss": 93.6292, + "step": 104840 + }, + { + "epoch": 0.42360726737961435, + "grad_norm": 720.2030639648438, + "learning_rate": 2.8805052276126746e-05, + "loss": 81.7023, + "step": 104850 + }, + { + "epoch": 0.42364766864498193, + "grad_norm": 1370.3331298828125, + "learning_rate": 2.8802544853224864e-05, + "loss": 82.4065, + "step": 104860 + }, + { + "epoch": 0.4236880699103496, + "grad_norm": 606.8391723632812, + "learning_rate": 2.880003725871284e-05, + "loss": 69.6314, + "step": 104870 + }, + { + "epoch": 0.4237284711757172, + "grad_norm": 553.3970947265625, + "learning_rate": 2.8797529492639566e-05, + "loss": 54.3541, + "step": 104880 + }, + { + "epoch": 0.42376887244108485, + "grad_norm": 684.4129028320312, + "learning_rate": 2.879502155505392e-05, + "loss": 101.9425, + "step": 104890 + }, + { + "epoch": 0.4238092737064525, + "grad_norm": 467.1248779296875, + "learning_rate": 2.87925134460048e-05, + "loss": 53.0952, + "step": 104900 + }, + { + "epoch": 0.42384967497182013, + "grad_norm": 1087.8912353515625, + "learning_rate": 2.8790005165541105e-05, + "loss": 77.1562, + "step": 104910 + }, + { + "epoch": 0.4238900762371878, + "grad_norm": 1449.2415771484375, + "learning_rate": 2.878749671371173e-05, + "loss": 92.6296, + "step": 104920 + }, + { + "epoch": 0.42393047750255536, + "grad_norm": 486.91485595703125, + "learning_rate": 2.8784988090565593e-05, + "loss": 52.421, + "step": 104930 + }, + { + "epoch": 0.423970878767923, + "grad_norm": 999.2219848632812, + "learning_rate": 2.878247929615158e-05, + "loss": 89.9152, + "step": 104940 + }, + { + "epoch": 0.42401128003329064, + "grad_norm": 842.1345825195312, + "learning_rate": 2.8779970330518616e-05, + "loss": 87.8745, + "step": 104950 + }, + { + "epoch": 0.4240516812986583, + "grad_norm": 632.5797119140625, + "learning_rate": 2.8777461193715605e-05, + "loss": 88.4344, + "step": 104960 + }, + { + "epoch": 0.4240920825640259, + "grad_norm": 577.797607421875, + "learning_rate": 2.8774951885791478e-05, + "loss": 86.0273, + "step": 104970 + }, + { + "epoch": 0.42413248382939356, + "grad_norm": 782.6690063476562, + "learning_rate": 2.877244240679514e-05, + "loss": 101.0026, + "step": 104980 + }, + { + "epoch": 0.42417288509476114, + "grad_norm": 472.5457763671875, + "learning_rate": 2.8769932756775523e-05, + "loss": 95.2088, + "step": 104990 + }, + { + "epoch": 0.4242132863601288, + "grad_norm": 371.9153747558594, + "learning_rate": 2.876742293578155e-05, + "loss": 47.5205, + "step": 105000 + }, + { + "epoch": 0.4242536876254964, + "grad_norm": 391.036376953125, + "learning_rate": 2.8764912943862155e-05, + "loss": 96.5551, + "step": 105010 + }, + { + "epoch": 0.42429408889086406, + "grad_norm": 926.7314453125, + "learning_rate": 2.876240278106627e-05, + "loss": 73.8786, + "step": 105020 + }, + { + "epoch": 0.4243344901562317, + "grad_norm": 768.184326171875, + "learning_rate": 2.875989244744283e-05, + "loss": 90.7892, + "step": 105030 + }, + { + "epoch": 0.42437489142159934, + "grad_norm": 1199.2076416015625, + "learning_rate": 2.8757381943040776e-05, + "loss": 63.7623, + "step": 105040 + }, + { + "epoch": 0.424415292686967, + "grad_norm": 1205.672119140625, + "learning_rate": 2.8754871267909056e-05, + "loss": 84.2197, + "step": 105050 + }, + { + "epoch": 0.42445569395233457, + "grad_norm": 729.1465454101562, + "learning_rate": 2.875236042209661e-05, + "loss": 72.6348, + "step": 105060 + }, + { + "epoch": 0.4244960952177022, + "grad_norm": 1134.208984375, + "learning_rate": 2.8749849405652397e-05, + "loss": 73.9928, + "step": 105070 + }, + { + "epoch": 0.42453649648306985, + "grad_norm": 695.9851684570312, + "learning_rate": 2.874733821862536e-05, + "loss": 39.1328, + "step": 105080 + }, + { + "epoch": 0.4245768977484375, + "grad_norm": 602.19189453125, + "learning_rate": 2.8744826861064462e-05, + "loss": 96.754, + "step": 105090 + }, + { + "epoch": 0.4246172990138051, + "grad_norm": 541.1510009765625, + "learning_rate": 2.874231533301866e-05, + "loss": 65.045, + "step": 105100 + }, + { + "epoch": 0.42465770027917277, + "grad_norm": 607.4556274414062, + "learning_rate": 2.873980363453692e-05, + "loss": 83.118, + "step": 105110 + }, + { + "epoch": 0.42469810154454035, + "grad_norm": 1163.156005859375, + "learning_rate": 2.8737291765668208e-05, + "loss": 88.6641, + "step": 105120 + }, + { + "epoch": 0.424738502809908, + "grad_norm": 502.86846923828125, + "learning_rate": 2.8734779726461498e-05, + "loss": 64.7135, + "step": 105130 + }, + { + "epoch": 0.42477890407527563, + "grad_norm": 645.7200927734375, + "learning_rate": 2.8732267516965762e-05, + "loss": 71.9255, + "step": 105140 + }, + { + "epoch": 0.42481930534064327, + "grad_norm": 382.31610107421875, + "learning_rate": 2.872975513722998e-05, + "loss": 81.2682, + "step": 105150 + }, + { + "epoch": 0.4248597066060109, + "grad_norm": 573.1731567382812, + "learning_rate": 2.8727242587303115e-05, + "loss": 65.0699, + "step": 105160 + }, + { + "epoch": 0.42490010787137855, + "grad_norm": 669.7529907226562, + "learning_rate": 2.8724729867234167e-05, + "loss": 100.1225, + "step": 105170 + }, + { + "epoch": 0.42494050913674614, + "grad_norm": 970.8921508789062, + "learning_rate": 2.872221697707212e-05, + "loss": 66.9502, + "step": 105180 + }, + { + "epoch": 0.4249809104021138, + "grad_norm": 346.18353271484375, + "learning_rate": 2.8719703916865966e-05, + "loss": 85.0796, + "step": 105190 + }, + { + "epoch": 0.4250213116674814, + "grad_norm": 545.74072265625, + "learning_rate": 2.8717190686664683e-05, + "loss": 68.1977, + "step": 105200 + }, + { + "epoch": 0.42506171293284906, + "grad_norm": 756.230224609375, + "learning_rate": 2.8714677286517285e-05, + "loss": 79.9863, + "step": 105210 + }, + { + "epoch": 0.4251021141982167, + "grad_norm": 612.4761352539062, + "learning_rate": 2.871216371647277e-05, + "loss": 123.8906, + "step": 105220 + }, + { + "epoch": 0.42514251546358434, + "grad_norm": 576.6854248046875, + "learning_rate": 2.870964997658014e-05, + "loss": 89.8964, + "step": 105230 + }, + { + "epoch": 0.425182916728952, + "grad_norm": 947.3173828125, + "learning_rate": 2.8707136066888396e-05, + "loss": 71.5738, + "step": 105240 + }, + { + "epoch": 0.42522331799431956, + "grad_norm": 1179.4605712890625, + "learning_rate": 2.8704621987446555e-05, + "loss": 85.7555, + "step": 105250 + }, + { + "epoch": 0.4252637192596872, + "grad_norm": 952.653564453125, + "learning_rate": 2.8702107738303624e-05, + "loss": 88.4532, + "step": 105260 + }, + { + "epoch": 0.42530412052505484, + "grad_norm": 947.9247436523438, + "learning_rate": 2.8699593319508623e-05, + "loss": 69.0782, + "step": 105270 + }, + { + "epoch": 0.4253445217904225, + "grad_norm": 490.212890625, + "learning_rate": 2.8697078731110566e-05, + "loss": 90.2751, + "step": 105280 + }, + { + "epoch": 0.4253849230557901, + "grad_norm": 791.9286499023438, + "learning_rate": 2.8694563973158484e-05, + "loss": 78.2643, + "step": 105290 + }, + { + "epoch": 0.42542532432115776, + "grad_norm": 1562.5616455078125, + "learning_rate": 2.869204904570141e-05, + "loss": 62.4062, + "step": 105300 + }, + { + "epoch": 0.42546572558652535, + "grad_norm": 929.8128662109375, + "learning_rate": 2.8689533948788354e-05, + "loss": 72.5294, + "step": 105310 + }, + { + "epoch": 0.425506126851893, + "grad_norm": 280.5260009765625, + "learning_rate": 2.868701868246837e-05, + "loss": 87.6952, + "step": 105320 + }, + { + "epoch": 0.4255465281172606, + "grad_norm": 628.096923828125, + "learning_rate": 2.8684503246790477e-05, + "loss": 94.9132, + "step": 105330 + }, + { + "epoch": 0.42558692938262827, + "grad_norm": 696.5733032226562, + "learning_rate": 2.8681987641803727e-05, + "loss": 94.9036, + "step": 105340 + }, + { + "epoch": 0.4256273306479959, + "grad_norm": 457.8122253417969, + "learning_rate": 2.867947186755715e-05, + "loss": 114.9449, + "step": 105350 + }, + { + "epoch": 0.42566773191336355, + "grad_norm": 613.4500732421875, + "learning_rate": 2.8676955924099807e-05, + "loss": 90.427, + "step": 105360 + }, + { + "epoch": 0.4257081331787312, + "grad_norm": 426.1357116699219, + "learning_rate": 2.867443981148074e-05, + "loss": 115.3307, + "step": 105370 + }, + { + "epoch": 0.42574853444409877, + "grad_norm": 839.4988403320312, + "learning_rate": 2.8671923529749003e-05, + "loss": 88.3484, + "step": 105380 + }, + { + "epoch": 0.4257889357094664, + "grad_norm": 1038.85400390625, + "learning_rate": 2.866940707895365e-05, + "loss": 80.4299, + "step": 105390 + }, + { + "epoch": 0.42582933697483405, + "grad_norm": 492.3866271972656, + "learning_rate": 2.8666890459143748e-05, + "loss": 50.8908, + "step": 105400 + }, + { + "epoch": 0.4258697382402017, + "grad_norm": 776.3677368164062, + "learning_rate": 2.866437367036835e-05, + "loss": 94.6155, + "step": 105410 + }, + { + "epoch": 0.42591013950556933, + "grad_norm": 720.854248046875, + "learning_rate": 2.866185671267653e-05, + "loss": 62.2939, + "step": 105420 + }, + { + "epoch": 0.42595054077093697, + "grad_norm": 1156.625, + "learning_rate": 2.8659339586117352e-05, + "loss": 81.3519, + "step": 105430 + }, + { + "epoch": 0.42599094203630455, + "grad_norm": 1286.5545654296875, + "learning_rate": 2.8656822290739885e-05, + "loss": 84.2254, + "step": 105440 + }, + { + "epoch": 0.4260313433016722, + "grad_norm": 956.8025512695312, + "learning_rate": 2.8654304826593214e-05, + "loss": 75.5316, + "step": 105450 + }, + { + "epoch": 0.42607174456703983, + "grad_norm": 684.2816772460938, + "learning_rate": 2.865178719372642e-05, + "loss": 86.6578, + "step": 105460 + }, + { + "epoch": 0.4261121458324075, + "grad_norm": 523.485595703125, + "learning_rate": 2.8649269392188576e-05, + "loss": 68.9588, + "step": 105470 + }, + { + "epoch": 0.4261525470977751, + "grad_norm": 1049.9564208984375, + "learning_rate": 2.8646751422028773e-05, + "loss": 54.7253, + "step": 105480 + }, + { + "epoch": 0.42619294836314275, + "grad_norm": 366.2418212890625, + "learning_rate": 2.8644233283296105e-05, + "loss": 67.3569, + "step": 105490 + }, + { + "epoch": 0.42623334962851034, + "grad_norm": 800.6529541015625, + "learning_rate": 2.8641714976039652e-05, + "loss": 92.946, + "step": 105500 + }, + { + "epoch": 0.426273750893878, + "grad_norm": 606.2694702148438, + "learning_rate": 2.8639196500308515e-05, + "loss": 48.8487, + "step": 105510 + }, + { + "epoch": 0.4263141521592456, + "grad_norm": 688.59326171875, + "learning_rate": 2.86366778561518e-05, + "loss": 69.7746, + "step": 105520 + }, + { + "epoch": 0.42635455342461326, + "grad_norm": 838.3773803710938, + "learning_rate": 2.8634159043618597e-05, + "loss": 79.8206, + "step": 105530 + }, + { + "epoch": 0.4263949546899809, + "grad_norm": 610.3615112304688, + "learning_rate": 2.863164006275802e-05, + "loss": 116.2035, + "step": 105540 + }, + { + "epoch": 0.42643535595534854, + "grad_norm": 221.18971252441406, + "learning_rate": 2.8629120913619184e-05, + "loss": 105.1783, + "step": 105550 + }, + { + "epoch": 0.4264757572207162, + "grad_norm": 520.5387573242188, + "learning_rate": 2.862660159625119e-05, + "loss": 66.084, + "step": 105560 + }, + { + "epoch": 0.42651615848608376, + "grad_norm": 842.1646728515625, + "learning_rate": 2.862408211070315e-05, + "loss": 104.8879, + "step": 105570 + }, + { + "epoch": 0.4265565597514514, + "grad_norm": 547.8723754882812, + "learning_rate": 2.8621562457024192e-05, + "loss": 67.6587, + "step": 105580 + }, + { + "epoch": 0.42659696101681904, + "grad_norm": 730.6173706054688, + "learning_rate": 2.861904263526344e-05, + "loss": 94.5424, + "step": 105590 + }, + { + "epoch": 0.4266373622821867, + "grad_norm": 884.0079345703125, + "learning_rate": 2.8616522645470012e-05, + "loss": 74.2302, + "step": 105600 + }, + { + "epoch": 0.4266777635475543, + "grad_norm": 1033.256103515625, + "learning_rate": 2.861400248769304e-05, + "loss": 83.1527, + "step": 105610 + }, + { + "epoch": 0.42671816481292196, + "grad_norm": 581.5377807617188, + "learning_rate": 2.861148216198165e-05, + "loss": 72.2787, + "step": 105620 + }, + { + "epoch": 0.42675856607828955, + "grad_norm": 1120.4049072265625, + "learning_rate": 2.8608961668384988e-05, + "loss": 65.2196, + "step": 105630 + }, + { + "epoch": 0.4267989673436572, + "grad_norm": 456.2352600097656, + "learning_rate": 2.860644100695218e-05, + "loss": 62.3308, + "step": 105640 + }, + { + "epoch": 0.4268393686090248, + "grad_norm": 1237.6044921875, + "learning_rate": 2.8603920177732375e-05, + "loss": 106.1678, + "step": 105650 + }, + { + "epoch": 0.42687976987439247, + "grad_norm": 592.0515747070312, + "learning_rate": 2.8601399180774718e-05, + "loss": 91.6578, + "step": 105660 + }, + { + "epoch": 0.4269201711397601, + "grad_norm": 849.7825317382812, + "learning_rate": 2.859887801612836e-05, + "loss": 78.115, + "step": 105670 + }, + { + "epoch": 0.42696057240512775, + "grad_norm": 716.6973876953125, + "learning_rate": 2.859635668384244e-05, + "loss": 84.1173, + "step": 105680 + }, + { + "epoch": 0.4270009736704954, + "grad_norm": 341.4538269042969, + "learning_rate": 2.8593835183966123e-05, + "loss": 88.0672, + "step": 105690 + }, + { + "epoch": 0.42704137493586297, + "grad_norm": 682.5006103515625, + "learning_rate": 2.8591313516548566e-05, + "loss": 96.7673, + "step": 105700 + }, + { + "epoch": 0.4270817762012306, + "grad_norm": 1058.537353515625, + "learning_rate": 2.858879168163893e-05, + "loss": 88.7436, + "step": 105710 + }, + { + "epoch": 0.42712217746659825, + "grad_norm": 1126.7076416015625, + "learning_rate": 2.858626967928638e-05, + "loss": 102.0898, + "step": 105720 + }, + { + "epoch": 0.4271625787319659, + "grad_norm": 679.111328125, + "learning_rate": 2.8583747509540077e-05, + "loss": 121.8257, + "step": 105730 + }, + { + "epoch": 0.42720297999733353, + "grad_norm": 460.3736877441406, + "learning_rate": 2.85812251724492e-05, + "loss": 56.929, + "step": 105740 + }, + { + "epoch": 0.42724338126270117, + "grad_norm": 916.8309326171875, + "learning_rate": 2.8578702668062922e-05, + "loss": 89.9145, + "step": 105750 + }, + { + "epoch": 0.42728378252806876, + "grad_norm": 542.6611938476562, + "learning_rate": 2.8576179996430418e-05, + "loss": 99.035, + "step": 105760 + }, + { + "epoch": 0.4273241837934364, + "grad_norm": 628.58154296875, + "learning_rate": 2.8573657157600867e-05, + "loss": 73.1292, + "step": 105770 + }, + { + "epoch": 0.42736458505880404, + "grad_norm": 1041.963623046875, + "learning_rate": 2.8571134151623456e-05, + "loss": 75.0053, + "step": 105780 + }, + { + "epoch": 0.4274049863241717, + "grad_norm": 1357.6065673828125, + "learning_rate": 2.8568610978547372e-05, + "loss": 101.7172, + "step": 105790 + }, + { + "epoch": 0.4274453875895393, + "grad_norm": 1541.330078125, + "learning_rate": 2.856608763842181e-05, + "loss": 113.1603, + "step": 105800 + }, + { + "epoch": 0.42748578885490696, + "grad_norm": 601.6787109375, + "learning_rate": 2.8563564131295955e-05, + "loss": 93.4789, + "step": 105810 + }, + { + "epoch": 0.42752619012027454, + "grad_norm": 685.3384399414062, + "learning_rate": 2.8561040457219006e-05, + "loss": 117.5979, + "step": 105820 + }, + { + "epoch": 0.4275665913856422, + "grad_norm": 628.1370239257812, + "learning_rate": 2.8558516616240174e-05, + "loss": 99.6421, + "step": 105830 + }, + { + "epoch": 0.4276069926510098, + "grad_norm": 702.2412109375, + "learning_rate": 2.855599260840865e-05, + "loss": 74.0526, + "step": 105840 + }, + { + "epoch": 0.42764739391637746, + "grad_norm": 909.5553588867188, + "learning_rate": 2.8553468433773646e-05, + "loss": 89.7569, + "step": 105850 + }, + { + "epoch": 0.4276877951817451, + "grad_norm": 651.8424682617188, + "learning_rate": 2.8550944092384364e-05, + "loss": 72.0429, + "step": 105860 + }, + { + "epoch": 0.42772819644711274, + "grad_norm": 1025.9189453125, + "learning_rate": 2.8548419584290033e-05, + "loss": 112.0891, + "step": 105870 + }, + { + "epoch": 0.4277685977124804, + "grad_norm": 498.83013916015625, + "learning_rate": 2.854589490953986e-05, + "loss": 75.4983, + "step": 105880 + }, + { + "epoch": 0.42780899897784797, + "grad_norm": 319.6015319824219, + "learning_rate": 2.8543370068183062e-05, + "loss": 65.8425, + "step": 105890 + }, + { + "epoch": 0.4278494002432156, + "grad_norm": 562.4129638671875, + "learning_rate": 2.8540845060268867e-05, + "loss": 72.4294, + "step": 105900 + }, + { + "epoch": 0.42788980150858325, + "grad_norm": 206.02456665039062, + "learning_rate": 2.8538319885846507e-05, + "loss": 79.9377, + "step": 105910 + }, + { + "epoch": 0.4279302027739509, + "grad_norm": 588.7818603515625, + "learning_rate": 2.8535794544965202e-05, + "loss": 77.8221, + "step": 105920 + }, + { + "epoch": 0.4279706040393185, + "grad_norm": 646.3232421875, + "learning_rate": 2.8533269037674184e-05, + "loss": 67.5873, + "step": 105930 + }, + { + "epoch": 0.42801100530468617, + "grad_norm": 1006.4345703125, + "learning_rate": 2.8530743364022698e-05, + "loss": 90.0275, + "step": 105940 + }, + { + "epoch": 0.42805140657005375, + "grad_norm": 3153.241943359375, + "learning_rate": 2.8528217524059978e-05, + "loss": 121.906, + "step": 105950 + }, + { + "epoch": 0.4280918078354214, + "grad_norm": 350.9641418457031, + "learning_rate": 2.852569151783526e-05, + "loss": 67.7539, + "step": 105960 + }, + { + "epoch": 0.42813220910078903, + "grad_norm": 565.3098754882812, + "learning_rate": 2.85231653453978e-05, + "loss": 83.0627, + "step": 105970 + }, + { + "epoch": 0.42817261036615667, + "grad_norm": 1315.7275390625, + "learning_rate": 2.8520639006796842e-05, + "loss": 84.4946, + "step": 105980 + }, + { + "epoch": 0.4282130116315243, + "grad_norm": 203.90594482421875, + "learning_rate": 2.8518112502081643e-05, + "loss": 95.6775, + "step": 105990 + }, + { + "epoch": 0.42825341289689195, + "grad_norm": 945.8977661132812, + "learning_rate": 2.8515585831301456e-05, + "loss": 63.4765, + "step": 106000 + }, + { + "epoch": 0.4282938141622596, + "grad_norm": 738.0753784179688, + "learning_rate": 2.851305899450554e-05, + "loss": 53.6151, + "step": 106010 + }, + { + "epoch": 0.4283342154276272, + "grad_norm": 204.49246215820312, + "learning_rate": 2.851053199174315e-05, + "loss": 68.7102, + "step": 106020 + }, + { + "epoch": 0.4283746166929948, + "grad_norm": 1157.87109375, + "learning_rate": 2.850800482306356e-05, + "loss": 85.4013, + "step": 106030 + }, + { + "epoch": 0.42841501795836245, + "grad_norm": 806.1469116210938, + "learning_rate": 2.8505477488516038e-05, + "loss": 68.8984, + "step": 106040 + }, + { + "epoch": 0.4284554192237301, + "grad_norm": 949.0098876953125, + "learning_rate": 2.8502949988149854e-05, + "loss": 85.1295, + "step": 106050 + }, + { + "epoch": 0.42849582048909773, + "grad_norm": 1247.1748046875, + "learning_rate": 2.850042232201428e-05, + "loss": 94.6982, + "step": 106060 + }, + { + "epoch": 0.4285362217544654, + "grad_norm": 1140.5665283203125, + "learning_rate": 2.8497894490158594e-05, + "loss": 73.0619, + "step": 106070 + }, + { + "epoch": 0.42857662301983296, + "grad_norm": 386.7445983886719, + "learning_rate": 2.8495366492632084e-05, + "loss": 119.2286, + "step": 106080 + }, + { + "epoch": 0.4286170242852006, + "grad_norm": 373.41644287109375, + "learning_rate": 2.8492838329484026e-05, + "loss": 68.7465, + "step": 106090 + }, + { + "epoch": 0.42865742555056824, + "grad_norm": 946.2130737304688, + "learning_rate": 2.849031000076372e-05, + "loss": 75.3913, + "step": 106100 + }, + { + "epoch": 0.4286978268159359, + "grad_norm": 519.6054077148438, + "learning_rate": 2.848778150652044e-05, + "loss": 81.5239, + "step": 106110 + }, + { + "epoch": 0.4287382280813035, + "grad_norm": 1362.129638671875, + "learning_rate": 2.84852528468035e-05, + "loss": 87.1364, + "step": 106120 + }, + { + "epoch": 0.42877862934667116, + "grad_norm": 528.8798828125, + "learning_rate": 2.8482724021662183e-05, + "loss": 61.4114, + "step": 106130 + }, + { + "epoch": 0.42881903061203874, + "grad_norm": 1209.891845703125, + "learning_rate": 2.8480195031145793e-05, + "loss": 96.5934, + "step": 106140 + }, + { + "epoch": 0.4288594318774064, + "grad_norm": 728.5469360351562, + "learning_rate": 2.8477665875303632e-05, + "loss": 107.1393, + "step": 106150 + }, + { + "epoch": 0.428899833142774, + "grad_norm": 1458.7572021484375, + "learning_rate": 2.8475136554185017e-05, + "loss": 107.2309, + "step": 106160 + }, + { + "epoch": 0.42894023440814166, + "grad_norm": 644.18701171875, + "learning_rate": 2.8472607067839243e-05, + "loss": 96.9709, + "step": 106170 + }, + { + "epoch": 0.4289806356735093, + "grad_norm": 256.7289733886719, + "learning_rate": 2.847007741631564e-05, + "loss": 111.6828, + "step": 106180 + }, + { + "epoch": 0.42902103693887694, + "grad_norm": 678.3931884765625, + "learning_rate": 2.8467547599663517e-05, + "loss": 117.622, + "step": 106190 + }, + { + "epoch": 0.4290614382042446, + "grad_norm": 535.7477416992188, + "learning_rate": 2.8465017617932195e-05, + "loss": 86.2309, + "step": 106200 + }, + { + "epoch": 0.42910183946961217, + "grad_norm": 882.1333618164062, + "learning_rate": 2.8462487471170986e-05, + "loss": 96.5252, + "step": 106210 + }, + { + "epoch": 0.4291422407349798, + "grad_norm": 351.4818420410156, + "learning_rate": 2.8459957159429233e-05, + "loss": 72.2769, + "step": 106220 + }, + { + "epoch": 0.42918264200034745, + "grad_norm": 553.3538818359375, + "learning_rate": 2.845742668275626e-05, + "loss": 76.8763, + "step": 106230 + }, + { + "epoch": 0.4292230432657151, + "grad_norm": 986.651611328125, + "learning_rate": 2.8454896041201404e-05, + "loss": 98.3256, + "step": 106240 + }, + { + "epoch": 0.4292634445310827, + "grad_norm": 581.5587768554688, + "learning_rate": 2.8452365234813992e-05, + "loss": 53.4478, + "step": 106250 + }, + { + "epoch": 0.42930384579645037, + "grad_norm": 824.5560913085938, + "learning_rate": 2.8449834263643365e-05, + "loss": 55.7812, + "step": 106260 + }, + { + "epoch": 0.42934424706181795, + "grad_norm": 1489.8837890625, + "learning_rate": 2.8447303127738877e-05, + "loss": 97.3654, + "step": 106270 + }, + { + "epoch": 0.4293846483271856, + "grad_norm": 442.5685729980469, + "learning_rate": 2.8444771827149865e-05, + "loss": 64.1154, + "step": 106280 + }, + { + "epoch": 0.42942504959255323, + "grad_norm": 1242.5076904296875, + "learning_rate": 2.8442240361925672e-05, + "loss": 79.1097, + "step": 106290 + }, + { + "epoch": 0.42946545085792087, + "grad_norm": 858.5222778320312, + "learning_rate": 2.8439708732115662e-05, + "loss": 130.7564, + "step": 106300 + }, + { + "epoch": 0.4295058521232885, + "grad_norm": 741.3097534179688, + "learning_rate": 2.8437176937769177e-05, + "loss": 78.2928, + "step": 106310 + }, + { + "epoch": 0.42954625338865615, + "grad_norm": 938.9823608398438, + "learning_rate": 2.8434644978935595e-05, + "loss": 102.8918, + "step": 106320 + }, + { + "epoch": 0.42958665465402374, + "grad_norm": 1010.37890625, + "learning_rate": 2.843211285566426e-05, + "loss": 119.6175, + "step": 106330 + }, + { + "epoch": 0.4296270559193914, + "grad_norm": 950.3468627929688, + "learning_rate": 2.842958056800454e-05, + "loss": 86.0591, + "step": 106340 + }, + { + "epoch": 0.429667457184759, + "grad_norm": 1339.6387939453125, + "learning_rate": 2.842704811600582e-05, + "loss": 83.8761, + "step": 106350 + }, + { + "epoch": 0.42970785845012666, + "grad_norm": 725.3245239257812, + "learning_rate": 2.8424515499717455e-05, + "loss": 90.9035, + "step": 106360 + }, + { + "epoch": 0.4297482597154943, + "grad_norm": 1592.190673828125, + "learning_rate": 2.8421982719188812e-05, + "loss": 86.4083, + "step": 106370 + }, + { + "epoch": 0.42978866098086194, + "grad_norm": 666.9827270507812, + "learning_rate": 2.8419449774469286e-05, + "loss": 56.2159, + "step": 106380 + }, + { + "epoch": 0.4298290622462296, + "grad_norm": 542.9052124023438, + "learning_rate": 2.8416916665608254e-05, + "loss": 75.6399, + "step": 106390 + }, + { + "epoch": 0.42986946351159716, + "grad_norm": 489.49395751953125, + "learning_rate": 2.84143833926551e-05, + "loss": 80.1778, + "step": 106400 + }, + { + "epoch": 0.4299098647769648, + "grad_norm": 664.9818725585938, + "learning_rate": 2.8411849955659202e-05, + "loss": 63.793, + "step": 106410 + }, + { + "epoch": 0.42995026604233244, + "grad_norm": 612.2920532226562, + "learning_rate": 2.840931635466996e-05, + "loss": 103.1132, + "step": 106420 + }, + { + "epoch": 0.4299906673077001, + "grad_norm": 601.6674194335938, + "learning_rate": 2.840678258973677e-05, + "loss": 88.7059, + "step": 106430 + }, + { + "epoch": 0.4300310685730677, + "grad_norm": 771.5123291015625, + "learning_rate": 2.8404248660909027e-05, + "loss": 70.0657, + "step": 106440 + }, + { + "epoch": 0.43007146983843536, + "grad_norm": 1929.0484619140625, + "learning_rate": 2.8401714568236133e-05, + "loss": 92.251, + "step": 106450 + }, + { + "epoch": 0.43011187110380295, + "grad_norm": 786.8155517578125, + "learning_rate": 2.8399180311767477e-05, + "loss": 82.6276, + "step": 106460 + }, + { + "epoch": 0.4301522723691706, + "grad_norm": 725.14208984375, + "learning_rate": 2.8396645891552482e-05, + "loss": 71.3501, + "step": 106470 + }, + { + "epoch": 0.4301926736345382, + "grad_norm": 486.4023132324219, + "learning_rate": 2.8394111307640554e-05, + "loss": 103.8624, + "step": 106480 + }, + { + "epoch": 0.43023307489990587, + "grad_norm": 1235.3631591796875, + "learning_rate": 2.8391576560081104e-05, + "loss": 112.2743, + "step": 106490 + }, + { + "epoch": 0.4302734761652735, + "grad_norm": 766.71875, + "learning_rate": 2.8389041648923546e-05, + "loss": 74.7645, + "step": 106500 + }, + { + "epoch": 0.43031387743064115, + "grad_norm": 523.7938842773438, + "learning_rate": 2.83865065742173e-05, + "loss": 76.7925, + "step": 106510 + }, + { + "epoch": 0.4303542786960088, + "grad_norm": 528.7904663085938, + "learning_rate": 2.83839713360118e-05, + "loss": 119.3603, + "step": 106520 + }, + { + "epoch": 0.43039467996137637, + "grad_norm": 1029.6571044921875, + "learning_rate": 2.838143593435646e-05, + "loss": 54.9923, + "step": 106530 + }, + { + "epoch": 0.430435081226744, + "grad_norm": 544.0681762695312, + "learning_rate": 2.8378900369300707e-05, + "loss": 80.2927, + "step": 106540 + }, + { + "epoch": 0.43047548249211165, + "grad_norm": 752.5253295898438, + "learning_rate": 2.837636464089397e-05, + "loss": 61.2584, + "step": 106550 + }, + { + "epoch": 0.4305158837574793, + "grad_norm": 658.489013671875, + "learning_rate": 2.8373828749185706e-05, + "loss": 75.0012, + "step": 106560 + }, + { + "epoch": 0.43055628502284693, + "grad_norm": 612.064208984375, + "learning_rate": 2.8371292694225332e-05, + "loss": 74.4021, + "step": 106570 + }, + { + "epoch": 0.43059668628821457, + "grad_norm": 487.0283508300781, + "learning_rate": 2.83687564760623e-05, + "loss": 57.538, + "step": 106580 + }, + { + "epoch": 0.43063708755358215, + "grad_norm": 837.4032592773438, + "learning_rate": 2.8366220094746047e-05, + "loss": 145.5711, + "step": 106590 + }, + { + "epoch": 0.4306774888189498, + "grad_norm": 457.0571594238281, + "learning_rate": 2.8363683550326028e-05, + "loss": 104.4991, + "step": 106600 + }, + { + "epoch": 0.43071789008431743, + "grad_norm": 1261.18798828125, + "learning_rate": 2.83611468428517e-05, + "loss": 98.8511, + "step": 106610 + }, + { + "epoch": 0.4307582913496851, + "grad_norm": 1095.70068359375, + "learning_rate": 2.8358609972372506e-05, + "loss": 80.9313, + "step": 106620 + }, + { + "epoch": 0.4307986926150527, + "grad_norm": 739.7699584960938, + "learning_rate": 2.8356072938937904e-05, + "loss": 91.9183, + "step": 106630 + }, + { + "epoch": 0.43083909388042035, + "grad_norm": 916.5511474609375, + "learning_rate": 2.8353535742597357e-05, + "loss": 69.7805, + "step": 106640 + }, + { + "epoch": 0.43087949514578794, + "grad_norm": 782.9662475585938, + "learning_rate": 2.8350998383400337e-05, + "loss": 75.993, + "step": 106650 + }, + { + "epoch": 0.4309198964111556, + "grad_norm": 834.4674682617188, + "learning_rate": 2.8348460861396297e-05, + "loss": 71.7479, + "step": 106660 + }, + { + "epoch": 0.4309602976765232, + "grad_norm": 427.20416259765625, + "learning_rate": 2.8345923176634717e-05, + "loss": 49.6967, + "step": 106670 + }, + { + "epoch": 0.43100069894189086, + "grad_norm": 825.9140625, + "learning_rate": 2.8343385329165066e-05, + "loss": 74.9985, + "step": 106680 + }, + { + "epoch": 0.4310411002072585, + "grad_norm": 884.0493774414062, + "learning_rate": 2.8340847319036826e-05, + "loss": 92.0689, + "step": 106690 + }, + { + "epoch": 0.43108150147262614, + "grad_norm": 756.5949096679688, + "learning_rate": 2.8338309146299474e-05, + "loss": 62.8186, + "step": 106700 + }, + { + "epoch": 0.4311219027379938, + "grad_norm": 753.9669799804688, + "learning_rate": 2.833577081100249e-05, + "loss": 90.2922, + "step": 106710 + }, + { + "epoch": 0.43116230400336136, + "grad_norm": 1190.60400390625, + "learning_rate": 2.833323231319536e-05, + "loss": 83.0139, + "step": 106720 + }, + { + "epoch": 0.431202705268729, + "grad_norm": 611.068115234375, + "learning_rate": 2.8330693652927582e-05, + "loss": 87.9259, + "step": 106730 + }, + { + "epoch": 0.43124310653409664, + "grad_norm": 408.4233703613281, + "learning_rate": 2.8328154830248635e-05, + "loss": 70.5007, + "step": 106740 + }, + { + "epoch": 0.4312835077994643, + "grad_norm": 683.3032836914062, + "learning_rate": 2.8325615845208028e-05, + "loss": 67.7571, + "step": 106750 + }, + { + "epoch": 0.4313239090648319, + "grad_norm": 885.6742553710938, + "learning_rate": 2.8323076697855247e-05, + "loss": 103.1438, + "step": 106760 + }, + { + "epoch": 0.43136431033019956, + "grad_norm": 455.7897033691406, + "learning_rate": 2.832053738823981e-05, + "loss": 66.9647, + "step": 106770 + }, + { + "epoch": 0.43140471159556715, + "grad_norm": 1922.479736328125, + "learning_rate": 2.8317997916411208e-05, + "loss": 87.1029, + "step": 106780 + }, + { + "epoch": 0.4314451128609348, + "grad_norm": 159.78404235839844, + "learning_rate": 2.8315458282418954e-05, + "loss": 49.9864, + "step": 106790 + }, + { + "epoch": 0.4314855141263024, + "grad_norm": 2295.392822265625, + "learning_rate": 2.831291848631256e-05, + "loss": 116.9219, + "step": 106800 + }, + { + "epoch": 0.43152591539167007, + "grad_norm": 856.0524291992188, + "learning_rate": 2.831037852814154e-05, + "loss": 72.1212, + "step": 106810 + }, + { + "epoch": 0.4315663166570377, + "grad_norm": 555.040771484375, + "learning_rate": 2.8307838407955412e-05, + "loss": 68.4156, + "step": 106820 + }, + { + "epoch": 0.43160671792240535, + "grad_norm": 598.0029907226562, + "learning_rate": 2.8305298125803698e-05, + "loss": 45.5126, + "step": 106830 + }, + { + "epoch": 0.431647119187773, + "grad_norm": 919.4389038085938, + "learning_rate": 2.830275768173592e-05, + "loss": 113.3273, + "step": 106840 + }, + { + "epoch": 0.43168752045314057, + "grad_norm": 349.8041076660156, + "learning_rate": 2.830021707580161e-05, + "loss": 97.6073, + "step": 106850 + }, + { + "epoch": 0.4317279217185082, + "grad_norm": 641.8746948242188, + "learning_rate": 2.8297676308050287e-05, + "loss": 69.4519, + "step": 106860 + }, + { + "epoch": 0.43176832298387585, + "grad_norm": 810.9525756835938, + "learning_rate": 2.82951353785315e-05, + "loss": 95.4369, + "step": 106870 + }, + { + "epoch": 0.4318087242492435, + "grad_norm": 953.5210571289062, + "learning_rate": 2.8292594287294776e-05, + "loss": 63.0826, + "step": 106880 + }, + { + "epoch": 0.43184912551461113, + "grad_norm": 1016.9251098632812, + "learning_rate": 2.8290053034389663e-05, + "loss": 76.375, + "step": 106890 + }, + { + "epoch": 0.43188952677997877, + "grad_norm": 330.61175537109375, + "learning_rate": 2.8287511619865687e-05, + "loss": 64.0187, + "step": 106900 + }, + { + "epoch": 0.43192992804534636, + "grad_norm": 334.85955810546875, + "learning_rate": 2.8284970043772405e-05, + "loss": 85.4728, + "step": 106910 + }, + { + "epoch": 0.431970329310714, + "grad_norm": 687.7796020507812, + "learning_rate": 2.8282428306159376e-05, + "loss": 59.1694, + "step": 106920 + }, + { + "epoch": 0.43201073057608164, + "grad_norm": 799.5068969726562, + "learning_rate": 2.827988640707614e-05, + "loss": 111.0711, + "step": 106930 + }, + { + "epoch": 0.4320511318414493, + "grad_norm": 2236.964599609375, + "learning_rate": 2.8277344346572256e-05, + "loss": 61.3413, + "step": 106940 + }, + { + "epoch": 0.4320915331068169, + "grad_norm": 609.0693969726562, + "learning_rate": 2.8274802124697278e-05, + "loss": 75.215, + "step": 106950 + }, + { + "epoch": 0.43213193437218456, + "grad_norm": 1423.853759765625, + "learning_rate": 2.8272259741500775e-05, + "loss": 69.6925, + "step": 106960 + }, + { + "epoch": 0.43217233563755214, + "grad_norm": 359.5812683105469, + "learning_rate": 2.8269717197032313e-05, + "loss": 69.6581, + "step": 106970 + }, + { + "epoch": 0.4322127369029198, + "grad_norm": 754.1324462890625, + "learning_rate": 2.8267174491341456e-05, + "loss": 62.4732, + "step": 106980 + }, + { + "epoch": 0.4322531381682874, + "grad_norm": 1542.2135009765625, + "learning_rate": 2.8264631624477766e-05, + "loss": 59.3229, + "step": 106990 + }, + { + "epoch": 0.43229353943365506, + "grad_norm": 788.8021240234375, + "learning_rate": 2.826208859649084e-05, + "loss": 88.6588, + "step": 107000 + }, + { + "epoch": 0.4323339406990227, + "grad_norm": 357.5472106933594, + "learning_rate": 2.8259545407430236e-05, + "loss": 55.5036, + "step": 107010 + }, + { + "epoch": 0.43237434196439034, + "grad_norm": 769.533203125, + "learning_rate": 2.8257002057345543e-05, + "loss": 65.7196, + "step": 107020 + }, + { + "epoch": 0.432414743229758, + "grad_norm": 583.9786987304688, + "learning_rate": 2.8254458546286344e-05, + "loss": 87.4067, + "step": 107030 + }, + { + "epoch": 0.43245514449512557, + "grad_norm": 777.4867553710938, + "learning_rate": 2.8251914874302232e-05, + "loss": 106.3548, + "step": 107040 + }, + { + "epoch": 0.4324955457604932, + "grad_norm": 1056.5625, + "learning_rate": 2.8249371041442786e-05, + "loss": 104.7058, + "step": 107050 + }, + { + "epoch": 0.43253594702586085, + "grad_norm": 963.8824462890625, + "learning_rate": 2.8246827047757607e-05, + "loss": 62.784, + "step": 107060 + }, + { + "epoch": 0.4325763482912285, + "grad_norm": 434.55975341796875, + "learning_rate": 2.8244282893296285e-05, + "loss": 77.3973, + "step": 107070 + }, + { + "epoch": 0.4326167495565961, + "grad_norm": 502.359375, + "learning_rate": 2.824173857810842e-05, + "loss": 76.9723, + "step": 107080 + }, + { + "epoch": 0.43265715082196377, + "grad_norm": 606.4881591796875, + "learning_rate": 2.823919410224363e-05, + "loss": 78.7204, + "step": 107090 + }, + { + "epoch": 0.43269755208733135, + "grad_norm": 618.1749877929688, + "learning_rate": 2.82366494657515e-05, + "loss": 59.227, + "step": 107100 + }, + { + "epoch": 0.432737953352699, + "grad_norm": 1007.1826171875, + "learning_rate": 2.823410466868165e-05, + "loss": 81.1199, + "step": 107110 + }, + { + "epoch": 0.43277835461806663, + "grad_norm": 713.4971313476562, + "learning_rate": 2.823155971108369e-05, + "loss": 73.0199, + "step": 107120 + }, + { + "epoch": 0.43281875588343427, + "grad_norm": 709.822021484375, + "learning_rate": 2.8229014593007243e-05, + "loss": 80.2951, + "step": 107130 + }, + { + "epoch": 0.4328591571488019, + "grad_norm": 610.1932983398438, + "learning_rate": 2.8226469314501917e-05, + "loss": 81.2443, + "step": 107140 + }, + { + "epoch": 0.43289955841416955, + "grad_norm": 535.1950073242188, + "learning_rate": 2.822392387561733e-05, + "loss": 57.7687, + "step": 107150 + }, + { + "epoch": 0.4329399596795372, + "grad_norm": 1208.4085693359375, + "learning_rate": 2.8221378276403114e-05, + "loss": 114.7107, + "step": 107160 + }, + { + "epoch": 0.4329803609449048, + "grad_norm": 306.8943176269531, + "learning_rate": 2.82188325169089e-05, + "loss": 98.25, + "step": 107170 + }, + { + "epoch": 0.4330207622102724, + "grad_norm": 1510.7523193359375, + "learning_rate": 2.821628659718431e-05, + "loss": 122.7309, + "step": 107180 + }, + { + "epoch": 0.43306116347564005, + "grad_norm": 379.21527099609375, + "learning_rate": 2.8213740517278986e-05, + "loss": 80.0865, + "step": 107190 + }, + { + "epoch": 0.4331015647410077, + "grad_norm": 1009.6148071289062, + "learning_rate": 2.8211194277242563e-05, + "loss": 80.2057, + "step": 107200 + }, + { + "epoch": 0.43314196600637533, + "grad_norm": 330.6834411621094, + "learning_rate": 2.8208647877124682e-05, + "loss": 108.7839, + "step": 107210 + }, + { + "epoch": 0.433182367271743, + "grad_norm": 386.667724609375, + "learning_rate": 2.820610131697498e-05, + "loss": 51.0327, + "step": 107220 + }, + { + "epoch": 0.43322276853711056, + "grad_norm": 829.5942993164062, + "learning_rate": 2.820355459684311e-05, + "loss": 98.6291, + "step": 107230 + }, + { + "epoch": 0.4332631698024782, + "grad_norm": 1189.6932373046875, + "learning_rate": 2.8201007716778717e-05, + "loss": 94.7846, + "step": 107240 + }, + { + "epoch": 0.43330357106784584, + "grad_norm": 257.6290283203125, + "learning_rate": 2.8198460676831456e-05, + "loss": 73.4907, + "step": 107250 + }, + { + "epoch": 0.4333439723332135, + "grad_norm": 826.18798828125, + "learning_rate": 2.8195913477050986e-05, + "loss": 70.6357, + "step": 107260 + }, + { + "epoch": 0.4333843735985811, + "grad_norm": 889.875, + "learning_rate": 2.8193366117486963e-05, + "loss": 90.5421, + "step": 107270 + }, + { + "epoch": 0.43342477486394876, + "grad_norm": 900.7783203125, + "learning_rate": 2.819081859818905e-05, + "loss": 118.4531, + "step": 107280 + }, + { + "epoch": 0.43346517612931634, + "grad_norm": 533.2224731445312, + "learning_rate": 2.8188270919206907e-05, + "loss": 112.5925, + "step": 107290 + }, + { + "epoch": 0.433505577394684, + "grad_norm": 745.5182495117188, + "learning_rate": 2.8185723080590218e-05, + "loss": 100.7749, + "step": 107300 + }, + { + "epoch": 0.4335459786600516, + "grad_norm": 595.0962524414062, + "learning_rate": 2.8183175082388635e-05, + "loss": 52.0306, + "step": 107310 + }, + { + "epoch": 0.43358637992541926, + "grad_norm": 663.384521484375, + "learning_rate": 2.8180626924651838e-05, + "loss": 83.7182, + "step": 107320 + }, + { + "epoch": 0.4336267811907869, + "grad_norm": 1314.6748046875, + "learning_rate": 2.8178078607429512e-05, + "loss": 93.7533, + "step": 107330 + }, + { + "epoch": 0.43366718245615454, + "grad_norm": 576.7864379882812, + "learning_rate": 2.8175530130771337e-05, + "loss": 107.6419, + "step": 107340 + }, + { + "epoch": 0.4337075837215222, + "grad_norm": 1463.8431396484375, + "learning_rate": 2.8172981494726985e-05, + "loss": 99.15, + "step": 107350 + }, + { + "epoch": 0.43374798498688977, + "grad_norm": 640.4923706054688, + "learning_rate": 2.8170432699346154e-05, + "loss": 75.2909, + "step": 107360 + }, + { + "epoch": 0.4337883862522574, + "grad_norm": 885.6763916015625, + "learning_rate": 2.8167883744678528e-05, + "loss": 45.8474, + "step": 107370 + }, + { + "epoch": 0.43382878751762505, + "grad_norm": 693.8721313476562, + "learning_rate": 2.8165334630773807e-05, + "loss": 71.1077, + "step": 107380 + }, + { + "epoch": 0.4338691887829927, + "grad_norm": 448.39739990234375, + "learning_rate": 2.8162785357681685e-05, + "loss": 83.3123, + "step": 107390 + }, + { + "epoch": 0.4339095900483603, + "grad_norm": 965.5805053710938, + "learning_rate": 2.8160235925451858e-05, + "loss": 124.5494, + "step": 107400 + }, + { + "epoch": 0.43394999131372797, + "grad_norm": 705.1239624023438, + "learning_rate": 2.815768633413403e-05, + "loss": 78.9388, + "step": 107410 + }, + { + "epoch": 0.43399039257909555, + "grad_norm": 703.5751342773438, + "learning_rate": 2.815513658377791e-05, + "loss": 71.407, + "step": 107420 + }, + { + "epoch": 0.4340307938444632, + "grad_norm": 314.4859619140625, + "learning_rate": 2.81525866744332e-05, + "loss": 70.1588, + "step": 107430 + }, + { + "epoch": 0.43407119510983083, + "grad_norm": 375.1404724121094, + "learning_rate": 2.8150036606149616e-05, + "loss": 70.9072, + "step": 107440 + }, + { + "epoch": 0.43411159637519847, + "grad_norm": 963.3777465820312, + "learning_rate": 2.8147486378976872e-05, + "loss": 98.4338, + "step": 107450 + }, + { + "epoch": 0.4341519976405661, + "grad_norm": 618.4844360351562, + "learning_rate": 2.8144935992964694e-05, + "loss": 73.0281, + "step": 107460 + }, + { + "epoch": 0.43419239890593375, + "grad_norm": 1008.6240234375, + "learning_rate": 2.8142385448162787e-05, + "loss": 105.7056, + "step": 107470 + }, + { + "epoch": 0.4342328001713014, + "grad_norm": 694.2526245117188, + "learning_rate": 2.8139834744620888e-05, + "loss": 58.6165, + "step": 107480 + }, + { + "epoch": 0.434273201436669, + "grad_norm": 1245.9080810546875, + "learning_rate": 2.813728388238872e-05, + "loss": 85.5535, + "step": 107490 + }, + { + "epoch": 0.4343136027020366, + "grad_norm": 933.8460083007812, + "learning_rate": 2.813473286151601e-05, + "loss": 83.5241, + "step": 107500 + }, + { + "epoch": 0.43435400396740426, + "grad_norm": 630.2044067382812, + "learning_rate": 2.8132181682052494e-05, + "loss": 124.668, + "step": 107510 + }, + { + "epoch": 0.4343944052327719, + "grad_norm": 807.12109375, + "learning_rate": 2.812963034404791e-05, + "loss": 88.8253, + "step": 107520 + }, + { + "epoch": 0.43443480649813954, + "grad_norm": 600.8772583007812, + "learning_rate": 2.8127078847551996e-05, + "loss": 96.734, + "step": 107530 + }, + { + "epoch": 0.4344752077635072, + "grad_norm": 595.0741577148438, + "learning_rate": 2.81245271926145e-05, + "loss": 61.5609, + "step": 107540 + }, + { + "epoch": 0.43451560902887476, + "grad_norm": 838.9129638671875, + "learning_rate": 2.8121975379285163e-05, + "loss": 76.4262, + "step": 107550 + }, + { + "epoch": 0.4345560102942424, + "grad_norm": 664.7395629882812, + "learning_rate": 2.811942340761373e-05, + "loss": 94.5822, + "step": 107560 + }, + { + "epoch": 0.43459641155961004, + "grad_norm": 2000.36328125, + "learning_rate": 2.8116871277649966e-05, + "loss": 93.2256, + "step": 107570 + }, + { + "epoch": 0.4346368128249777, + "grad_norm": 599.0641479492188, + "learning_rate": 2.8114318989443613e-05, + "loss": 59.2331, + "step": 107580 + }, + { + "epoch": 0.4346772140903453, + "grad_norm": 506.9762878417969, + "learning_rate": 2.8111766543044435e-05, + "loss": 67.2332, + "step": 107590 + }, + { + "epoch": 0.43471761535571296, + "grad_norm": 771.1796264648438, + "learning_rate": 2.810921393850219e-05, + "loss": 70.9977, + "step": 107600 + }, + { + "epoch": 0.43475801662108055, + "grad_norm": 473.7170104980469, + "learning_rate": 2.8106661175866642e-05, + "loss": 116.8734, + "step": 107610 + }, + { + "epoch": 0.4347984178864482, + "grad_norm": 612.1751708984375, + "learning_rate": 2.8104108255187564e-05, + "loss": 115.9909, + "step": 107620 + }, + { + "epoch": 0.4348388191518158, + "grad_norm": 894.1708374023438, + "learning_rate": 2.8101555176514726e-05, + "loss": 65.8631, + "step": 107630 + }, + { + "epoch": 0.43487922041718347, + "grad_norm": 720.2703857421875, + "learning_rate": 2.809900193989789e-05, + "loss": 77.7572, + "step": 107640 + }, + { + "epoch": 0.4349196216825511, + "grad_norm": 910.765380859375, + "learning_rate": 2.809644854538685e-05, + "loss": 119.2577, + "step": 107650 + }, + { + "epoch": 0.43496002294791875, + "grad_norm": 736.33935546875, + "learning_rate": 2.8093894993031377e-05, + "loss": 62.7203, + "step": 107660 + }, + { + "epoch": 0.4350004242132864, + "grad_norm": 626.4576416015625, + "learning_rate": 2.8091341282881254e-05, + "loss": 53.8266, + "step": 107670 + }, + { + "epoch": 0.43504082547865397, + "grad_norm": 931.9013061523438, + "learning_rate": 2.808878741498626e-05, + "loss": 100.8897, + "step": 107680 + }, + { + "epoch": 0.4350812267440216, + "grad_norm": 1107.9371337890625, + "learning_rate": 2.8086233389396197e-05, + "loss": 72.258, + "step": 107690 + }, + { + "epoch": 0.43512162800938925, + "grad_norm": 741.6502075195312, + "learning_rate": 2.8083679206160846e-05, + "loss": 71.2549, + "step": 107700 + }, + { + "epoch": 0.4351620292747569, + "grad_norm": 1175.9466552734375, + "learning_rate": 2.808112486533001e-05, + "loss": 97.9334, + "step": 107710 + }, + { + "epoch": 0.43520243054012453, + "grad_norm": 813.3326416015625, + "learning_rate": 2.8078570366953487e-05, + "loss": 121.6968, + "step": 107720 + }, + { + "epoch": 0.43524283180549217, + "grad_norm": 559.6058349609375, + "learning_rate": 2.8076015711081074e-05, + "loss": 136.7451, + "step": 107730 + }, + { + "epoch": 0.43528323307085975, + "grad_norm": 996.3305053710938, + "learning_rate": 2.8073460897762576e-05, + "loss": 86.4125, + "step": 107740 + }, + { + "epoch": 0.4353236343362274, + "grad_norm": 1108.4769287109375, + "learning_rate": 2.8070905927047806e-05, + "loss": 119.043, + "step": 107750 + }, + { + "epoch": 0.43536403560159503, + "grad_norm": 654.6348266601562, + "learning_rate": 2.8068350798986562e-05, + "loss": 90.7337, + "step": 107760 + }, + { + "epoch": 0.4354044368669627, + "grad_norm": 782.865478515625, + "learning_rate": 2.8065795513628668e-05, + "loss": 64.837, + "step": 107770 + }, + { + "epoch": 0.4354448381323303, + "grad_norm": 866.9967041015625, + "learning_rate": 2.806324007102394e-05, + "loss": 54.7211, + "step": 107780 + }, + { + "epoch": 0.43548523939769795, + "grad_norm": 406.1822509765625, + "learning_rate": 2.8060684471222196e-05, + "loss": 81.0616, + "step": 107790 + }, + { + "epoch": 0.4355256406630656, + "grad_norm": 604.3024291992188, + "learning_rate": 2.8058128714273257e-05, + "loss": 61.5869, + "step": 107800 + }, + { + "epoch": 0.4355660419284332, + "grad_norm": 1208.31494140625, + "learning_rate": 2.8055572800226943e-05, + "loss": 124.1106, + "step": 107810 + }, + { + "epoch": 0.4356064431938008, + "grad_norm": 857.28173828125, + "learning_rate": 2.80530167291331e-05, + "loss": 53.3853, + "step": 107820 + }, + { + "epoch": 0.43564684445916846, + "grad_norm": 562.5675048828125, + "learning_rate": 2.8050460501041553e-05, + "loss": 48.0067, + "step": 107830 + }, + { + "epoch": 0.4356872457245361, + "grad_norm": 638.8025512695312, + "learning_rate": 2.8047904116002125e-05, + "loss": 95.4937, + "step": 107840 + }, + { + "epoch": 0.43572764698990374, + "grad_norm": 1078.258544921875, + "learning_rate": 2.8045347574064667e-05, + "loss": 53.2465, + "step": 107850 + }, + { + "epoch": 0.4357680482552714, + "grad_norm": 1286.33056640625, + "learning_rate": 2.804279087527901e-05, + "loss": 74.6375, + "step": 107860 + }, + { + "epoch": 0.43580844952063896, + "grad_norm": 654.0866088867188, + "learning_rate": 2.8040234019695013e-05, + "loss": 50.2335, + "step": 107870 + }, + { + "epoch": 0.4358488507860066, + "grad_norm": 316.4346923828125, + "learning_rate": 2.8037677007362507e-05, + "loss": 83.3004, + "step": 107880 + }, + { + "epoch": 0.43588925205137424, + "grad_norm": 1253.59716796875, + "learning_rate": 2.803511983833135e-05, + "loss": 92.2982, + "step": 107890 + }, + { + "epoch": 0.4359296533167419, + "grad_norm": 938.1961059570312, + "learning_rate": 2.8032562512651403e-05, + "loss": 78.5556, + "step": 107900 + }, + { + "epoch": 0.4359700545821095, + "grad_norm": 2347.408203125, + "learning_rate": 2.8030005030372506e-05, + "loss": 91.2869, + "step": 107910 + }, + { + "epoch": 0.43601045584747716, + "grad_norm": 731.7193603515625, + "learning_rate": 2.802744739154453e-05, + "loss": 66.0418, + "step": 107920 + }, + { + "epoch": 0.43605085711284475, + "grad_norm": 1642.9217529296875, + "learning_rate": 2.8024889596217337e-05, + "loss": 64.0873, + "step": 107930 + }, + { + "epoch": 0.4360912583782124, + "grad_norm": 423.9770202636719, + "learning_rate": 2.8022331644440782e-05, + "loss": 85.8013, + "step": 107940 + }, + { + "epoch": 0.43613165964358, + "grad_norm": 619.2984008789062, + "learning_rate": 2.801977353626475e-05, + "loss": 46.368, + "step": 107950 + }, + { + "epoch": 0.43617206090894767, + "grad_norm": 864.2037353515625, + "learning_rate": 2.80172152717391e-05, + "loss": 70.0626, + "step": 107960 + }, + { + "epoch": 0.4362124621743153, + "grad_norm": 786.6764526367188, + "learning_rate": 2.8014656850913708e-05, + "loss": 110.1384, + "step": 107970 + }, + { + "epoch": 0.43625286343968295, + "grad_norm": 1661.3404541015625, + "learning_rate": 2.8012098273838457e-05, + "loss": 87.8968, + "step": 107980 + }, + { + "epoch": 0.4362932647050506, + "grad_norm": 1234.216064453125, + "learning_rate": 2.800953954056323e-05, + "loss": 82.2677, + "step": 107990 + }, + { + "epoch": 0.43633366597041817, + "grad_norm": 670.7018432617188, + "learning_rate": 2.8006980651137906e-05, + "loss": 53.9576, + "step": 108000 + }, + { + "epoch": 0.4363740672357858, + "grad_norm": 336.9872131347656, + "learning_rate": 2.8004421605612365e-05, + "loss": 80.0805, + "step": 108010 + }, + { + "epoch": 0.43641446850115345, + "grad_norm": 849.7322998046875, + "learning_rate": 2.8001862404036505e-05, + "loss": 82.6923, + "step": 108020 + }, + { + "epoch": 0.4364548697665211, + "grad_norm": 683.2806396484375, + "learning_rate": 2.799930304646022e-05, + "loss": 112.5007, + "step": 108030 + }, + { + "epoch": 0.43649527103188873, + "grad_norm": 872.87890625, + "learning_rate": 2.7996743532933405e-05, + "loss": 95.8545, + "step": 108040 + }, + { + "epoch": 0.43653567229725637, + "grad_norm": 1158.8154296875, + "learning_rate": 2.7994183863505957e-05, + "loss": 54.8587, + "step": 108050 + }, + { + "epoch": 0.43657607356262396, + "grad_norm": 923.2055053710938, + "learning_rate": 2.7991624038227773e-05, + "loss": 93.3421, + "step": 108060 + }, + { + "epoch": 0.4366164748279916, + "grad_norm": 400.946533203125, + "learning_rate": 2.7989064057148772e-05, + "loss": 57.4966, + "step": 108070 + }, + { + "epoch": 0.43665687609335924, + "grad_norm": 956.4223022460938, + "learning_rate": 2.798650392031885e-05, + "loss": 76.5426, + "step": 108080 + }, + { + "epoch": 0.4366972773587269, + "grad_norm": 1203.708740234375, + "learning_rate": 2.7983943627787928e-05, + "loss": 100.3521, + "step": 108090 + }, + { + "epoch": 0.4367376786240945, + "grad_norm": 626.91455078125, + "learning_rate": 2.798138317960591e-05, + "loss": 63.034, + "step": 108100 + }, + { + "epoch": 0.43677807988946216, + "grad_norm": 883.8171997070312, + "learning_rate": 2.7978822575822712e-05, + "loss": 50.722, + "step": 108110 + }, + { + "epoch": 0.4368184811548298, + "grad_norm": 425.65655517578125, + "learning_rate": 2.797626181648827e-05, + "loss": 108.9107, + "step": 108120 + }, + { + "epoch": 0.4368588824201974, + "grad_norm": 277.50445556640625, + "learning_rate": 2.797370090165249e-05, + "loss": 81.454, + "step": 108130 + }, + { + "epoch": 0.436899283685565, + "grad_norm": 295.593017578125, + "learning_rate": 2.797113983136531e-05, + "loss": 119.9071, + "step": 108140 + }, + { + "epoch": 0.43693968495093266, + "grad_norm": 523.1868286132812, + "learning_rate": 2.7968578605676654e-05, + "loss": 94.8842, + "step": 108150 + }, + { + "epoch": 0.4369800862163003, + "grad_norm": 636.720703125, + "learning_rate": 2.796601722463646e-05, + "loss": 69.3963, + "step": 108160 + }, + { + "epoch": 0.43702048748166794, + "grad_norm": 1723.0177001953125, + "learning_rate": 2.7963455688294658e-05, + "loss": 70.219, + "step": 108170 + }, + { + "epoch": 0.4370608887470356, + "grad_norm": 430.1444091796875, + "learning_rate": 2.7960893996701178e-05, + "loss": 72.436, + "step": 108180 + }, + { + "epoch": 0.43710129001240317, + "grad_norm": 503.9712829589844, + "learning_rate": 2.795833214990598e-05, + "loss": 121.9395, + "step": 108190 + }, + { + "epoch": 0.4371416912777708, + "grad_norm": 433.45806884765625, + "learning_rate": 2.7955770147959e-05, + "loss": 78.9574, + "step": 108200 + }, + { + "epoch": 0.43718209254313845, + "grad_norm": 483.24847412109375, + "learning_rate": 2.7953207990910178e-05, + "loss": 78.8691, + "step": 108210 + }, + { + "epoch": 0.4372224938085061, + "grad_norm": 651.658203125, + "learning_rate": 2.795064567880948e-05, + "loss": 63.9857, + "step": 108220 + }, + { + "epoch": 0.4372628950738737, + "grad_norm": 626.5777587890625, + "learning_rate": 2.7948083211706843e-05, + "loss": 61.122, + "step": 108230 + }, + { + "epoch": 0.43730329633924137, + "grad_norm": 1407.1153564453125, + "learning_rate": 2.7945520589652235e-05, + "loss": 57.6615, + "step": 108240 + }, + { + "epoch": 0.43734369760460895, + "grad_norm": 783.1539306640625, + "learning_rate": 2.7942957812695613e-05, + "loss": 54.7539, + "step": 108250 + }, + { + "epoch": 0.4373840988699766, + "grad_norm": 502.4456787109375, + "learning_rate": 2.7940394880886945e-05, + "loss": 157.2069, + "step": 108260 + }, + { + "epoch": 0.43742450013534423, + "grad_norm": 395.5521545410156, + "learning_rate": 2.793783179427618e-05, + "loss": 102.431, + "step": 108270 + }, + { + "epoch": 0.43746490140071187, + "grad_norm": 1254.617919921875, + "learning_rate": 2.793526855291331e-05, + "loss": 99.9106, + "step": 108280 + }, + { + "epoch": 0.4375053026660795, + "grad_norm": 890.6553955078125, + "learning_rate": 2.793270515684828e-05, + "loss": 79.6673, + "step": 108290 + }, + { + "epoch": 0.43754570393144715, + "grad_norm": 594.8746948242188, + "learning_rate": 2.7930141606131086e-05, + "loss": 94.8744, + "step": 108300 + }, + { + "epoch": 0.4375861051968148, + "grad_norm": 459.41357421875, + "learning_rate": 2.7927577900811696e-05, + "loss": 69.3133, + "step": 108310 + }, + { + "epoch": 0.4376265064621824, + "grad_norm": 652.1686401367188, + "learning_rate": 2.79250140409401e-05, + "loss": 89.3923, + "step": 108320 + }, + { + "epoch": 0.43766690772755, + "grad_norm": 616.8619995117188, + "learning_rate": 2.792245002656627e-05, + "loss": 94.2853, + "step": 108330 + }, + { + "epoch": 0.43770730899291765, + "grad_norm": 399.5962829589844, + "learning_rate": 2.7919885857740202e-05, + "loss": 60.5942, + "step": 108340 + }, + { + "epoch": 0.4377477102582853, + "grad_norm": 1849.211669921875, + "learning_rate": 2.7917321534511876e-05, + "loss": 93.1326, + "step": 108350 + }, + { + "epoch": 0.43778811152365293, + "grad_norm": 1559.8529052734375, + "learning_rate": 2.7914757056931295e-05, + "loss": 96.9053, + "step": 108360 + }, + { + "epoch": 0.4378285127890206, + "grad_norm": 489.99072265625, + "learning_rate": 2.7912192425048446e-05, + "loss": 76.0344, + "step": 108370 + }, + { + "epoch": 0.43786891405438816, + "grad_norm": 919.3154907226562, + "learning_rate": 2.790962763891334e-05, + "loss": 112.377, + "step": 108380 + }, + { + "epoch": 0.4379093153197558, + "grad_norm": 652.8154907226562, + "learning_rate": 2.790706269857596e-05, + "loss": 66.7694, + "step": 108390 + }, + { + "epoch": 0.43794971658512344, + "grad_norm": 756.871826171875, + "learning_rate": 2.7904497604086333e-05, + "loss": 98.575, + "step": 108400 + }, + { + "epoch": 0.4379901178504911, + "grad_norm": 842.2221069335938, + "learning_rate": 2.790193235549445e-05, + "loss": 72.5448, + "step": 108410 + }, + { + "epoch": 0.4380305191158587, + "grad_norm": 1011.0355834960938, + "learning_rate": 2.7899366952850323e-05, + "loss": 91.7917, + "step": 108420 + }, + { + "epoch": 0.43807092038122636, + "grad_norm": 531.28955078125, + "learning_rate": 2.7896801396203984e-05, + "loss": 64.9336, + "step": 108430 + }, + { + "epoch": 0.438111321646594, + "grad_norm": 346.8134765625, + "learning_rate": 2.7894235685605426e-05, + "loss": 69.1069, + "step": 108440 + }, + { + "epoch": 0.4381517229119616, + "grad_norm": 966.1568603515625, + "learning_rate": 2.789166982110468e-05, + "loss": 94.3737, + "step": 108450 + }, + { + "epoch": 0.4381921241773292, + "grad_norm": 422.4350891113281, + "learning_rate": 2.7889103802751768e-05, + "loss": 84.1492, + "step": 108460 + }, + { + "epoch": 0.43823252544269686, + "grad_norm": 1151.854736328125, + "learning_rate": 2.7886537630596716e-05, + "loss": 106.8925, + "step": 108470 + }, + { + "epoch": 0.4382729267080645, + "grad_norm": 1446.5206298828125, + "learning_rate": 2.7883971304689555e-05, + "loss": 119.098, + "step": 108480 + }, + { + "epoch": 0.43831332797343214, + "grad_norm": 619.822021484375, + "learning_rate": 2.7881404825080314e-05, + "loss": 72.8643, + "step": 108490 + }, + { + "epoch": 0.4383537292387998, + "grad_norm": 734.1318969726562, + "learning_rate": 2.7878838191819026e-05, + "loss": 77.4192, + "step": 108500 + }, + { + "epoch": 0.43839413050416737, + "grad_norm": 595.5697631835938, + "learning_rate": 2.7876271404955732e-05, + "loss": 64.548, + "step": 108510 + }, + { + "epoch": 0.438434531769535, + "grad_norm": 716.731689453125, + "learning_rate": 2.7873704464540473e-05, + "loss": 94.7044, + "step": 108520 + }, + { + "epoch": 0.43847493303490265, + "grad_norm": 884.2084350585938, + "learning_rate": 2.7871137370623294e-05, + "loss": 112.1121, + "step": 108530 + }, + { + "epoch": 0.4385153343002703, + "grad_norm": 716.5902099609375, + "learning_rate": 2.786857012325423e-05, + "loss": 90.9307, + "step": 108540 + }, + { + "epoch": 0.4385557355656379, + "grad_norm": 937.4422607421875, + "learning_rate": 2.786600272248335e-05, + "loss": 88.2, + "step": 108550 + }, + { + "epoch": 0.43859613683100557, + "grad_norm": 913.8324584960938, + "learning_rate": 2.7863435168360698e-05, + "loss": 69.2476, + "step": 108560 + }, + { + "epoch": 0.43863653809637315, + "grad_norm": 603.038818359375, + "learning_rate": 2.7860867460936322e-05, + "loss": 96.1983, + "step": 108570 + }, + { + "epoch": 0.4386769393617408, + "grad_norm": 987.2791748046875, + "learning_rate": 2.7858299600260287e-05, + "loss": 67.2604, + "step": 108580 + }, + { + "epoch": 0.43871734062710843, + "grad_norm": 692.4564819335938, + "learning_rate": 2.785573158638266e-05, + "loss": 89.5952, + "step": 108590 + }, + { + "epoch": 0.43875774189247607, + "grad_norm": 426.4747009277344, + "learning_rate": 2.7853163419353505e-05, + "loss": 67.1179, + "step": 108600 + }, + { + "epoch": 0.4387981431578437, + "grad_norm": 352.0201110839844, + "learning_rate": 2.785059509922288e-05, + "loss": 60.9063, + "step": 108610 + }, + { + "epoch": 0.43883854442321135, + "grad_norm": 657.58544921875, + "learning_rate": 2.7848026626040865e-05, + "loss": 74.2122, + "step": 108620 + }, + { + "epoch": 0.438878945688579, + "grad_norm": 529.6314697265625, + "learning_rate": 2.784545799985753e-05, + "loss": 65.3618, + "step": 108630 + }, + { + "epoch": 0.4389193469539466, + "grad_norm": 493.81463623046875, + "learning_rate": 2.784288922072295e-05, + "loss": 73.1972, + "step": 108640 + }, + { + "epoch": 0.4389597482193142, + "grad_norm": 450.31109619140625, + "learning_rate": 2.784032028868721e-05, + "loss": 71.1544, + "step": 108650 + }, + { + "epoch": 0.43900014948468186, + "grad_norm": 1029.6627197265625, + "learning_rate": 2.783775120380039e-05, + "loss": 108.6687, + "step": 108660 + }, + { + "epoch": 0.4390405507500495, + "grad_norm": 3695.950439453125, + "learning_rate": 2.7835181966112568e-05, + "loss": 196.319, + "step": 108670 + }, + { + "epoch": 0.43908095201541714, + "grad_norm": 785.7459106445312, + "learning_rate": 2.7832612575673848e-05, + "loss": 72.4891, + "step": 108680 + }, + { + "epoch": 0.4391213532807848, + "grad_norm": 1487.4971923828125, + "learning_rate": 2.7830043032534316e-05, + "loss": 76.8986, + "step": 108690 + }, + { + "epoch": 0.43916175454615236, + "grad_norm": 495.9024353027344, + "learning_rate": 2.7827473336744054e-05, + "loss": 71.6, + "step": 108700 + }, + { + "epoch": 0.43920215581152, + "grad_norm": 498.5118713378906, + "learning_rate": 2.782490348835317e-05, + "loss": 67.856, + "step": 108710 + }, + { + "epoch": 0.43924255707688764, + "grad_norm": 443.5705871582031, + "learning_rate": 2.7822333487411767e-05, + "loss": 81.3446, + "step": 108720 + }, + { + "epoch": 0.4392829583422553, + "grad_norm": 302.3796691894531, + "learning_rate": 2.781976333396995e-05, + "loss": 87.0051, + "step": 108730 + }, + { + "epoch": 0.4393233596076229, + "grad_norm": 569.6698608398438, + "learning_rate": 2.7817193028077817e-05, + "loss": 71.0941, + "step": 108740 + }, + { + "epoch": 0.43936376087299056, + "grad_norm": 667.3056640625, + "learning_rate": 2.7814622569785474e-05, + "loss": 89.3329, + "step": 108750 + }, + { + "epoch": 0.4394041621383582, + "grad_norm": 823.868408203125, + "learning_rate": 2.781205195914305e-05, + "loss": 74.7757, + "step": 108760 + }, + { + "epoch": 0.4394445634037258, + "grad_norm": 542.7114868164062, + "learning_rate": 2.780948119620065e-05, + "loss": 98.8863, + "step": 108770 + }, + { + "epoch": 0.4394849646690934, + "grad_norm": 881.0919189453125, + "learning_rate": 2.78069102810084e-05, + "loss": 88.8362, + "step": 108780 + }, + { + "epoch": 0.43952536593446107, + "grad_norm": 964.8372802734375, + "learning_rate": 2.78043392136164e-05, + "loss": 139.2094, + "step": 108790 + }, + { + "epoch": 0.4395657671998287, + "grad_norm": 585.7197875976562, + "learning_rate": 2.7801767994074796e-05, + "loss": 76.9554, + "step": 108800 + }, + { + "epoch": 0.43960616846519635, + "grad_norm": 709.2533569335938, + "learning_rate": 2.779919662243371e-05, + "loss": 111.8875, + "step": 108810 + }, + { + "epoch": 0.439646569730564, + "grad_norm": 849.9103393554688, + "learning_rate": 2.779662509874327e-05, + "loss": 72.6983, + "step": 108820 + }, + { + "epoch": 0.43968697099593157, + "grad_norm": 735.4912109375, + "learning_rate": 2.7794053423053608e-05, + "loss": 91.5318, + "step": 108830 + }, + { + "epoch": 0.4397273722612992, + "grad_norm": 1785.129638671875, + "learning_rate": 2.7791481595414862e-05, + "loss": 139.555, + "step": 108840 + }, + { + "epoch": 0.43976777352666685, + "grad_norm": 499.39556884765625, + "learning_rate": 2.7788909615877174e-05, + "loss": 65.6219, + "step": 108850 + }, + { + "epoch": 0.4398081747920345, + "grad_norm": 433.9969177246094, + "learning_rate": 2.7786337484490686e-05, + "loss": 76.728, + "step": 108860 + }, + { + "epoch": 0.43984857605740213, + "grad_norm": 902.6666259765625, + "learning_rate": 2.778376520130553e-05, + "loss": 78.6021, + "step": 108870 + }, + { + "epoch": 0.43988897732276977, + "grad_norm": 519.9874267578125, + "learning_rate": 2.778119276637187e-05, + "loss": 83.8157, + "step": 108880 + }, + { + "epoch": 0.43992937858813735, + "grad_norm": 1055.5556640625, + "learning_rate": 2.777862017973985e-05, + "loss": 107.6471, + "step": 108890 + }, + { + "epoch": 0.439969779853505, + "grad_norm": 475.3175964355469, + "learning_rate": 2.777604744145962e-05, + "loss": 98.8644, + "step": 108900 + }, + { + "epoch": 0.44001018111887263, + "grad_norm": 624.224365234375, + "learning_rate": 2.7773474551581353e-05, + "loss": 94.1712, + "step": 108910 + }, + { + "epoch": 0.4400505823842403, + "grad_norm": 364.4097900390625, + "learning_rate": 2.777090151015519e-05, + "loss": 72.5706, + "step": 108920 + }, + { + "epoch": 0.4400909836496079, + "grad_norm": 583.73876953125, + "learning_rate": 2.7768328317231304e-05, + "loss": 75.2094, + "step": 108930 + }, + { + "epoch": 0.44013138491497555, + "grad_norm": 1022.2356567382812, + "learning_rate": 2.7765754972859853e-05, + "loss": 79.2135, + "step": 108940 + }, + { + "epoch": 0.4401717861803432, + "grad_norm": 1350.6619873046875, + "learning_rate": 2.7763181477091016e-05, + "loss": 89.1546, + "step": 108950 + }, + { + "epoch": 0.4402121874457108, + "grad_norm": 607.9484252929688, + "learning_rate": 2.7760607829974956e-05, + "loss": 97.6613, + "step": 108960 + }, + { + "epoch": 0.4402525887110784, + "grad_norm": 578.354248046875, + "learning_rate": 2.7758034031561858e-05, + "loss": 64.3891, + "step": 108970 + }, + { + "epoch": 0.44029298997644606, + "grad_norm": 641.7802734375, + "learning_rate": 2.7755460081901885e-05, + "loss": 127.9871, + "step": 108980 + }, + { + "epoch": 0.4403333912418137, + "grad_norm": 788.435302734375, + "learning_rate": 2.7752885981045224e-05, + "loss": 79.1936, + "step": 108990 + }, + { + "epoch": 0.44037379250718134, + "grad_norm": 992.8035888671875, + "learning_rate": 2.7750311729042062e-05, + "loss": 72.593, + "step": 109000 + }, + { + "epoch": 0.440414193772549, + "grad_norm": 833.2522583007812, + "learning_rate": 2.7747737325942584e-05, + "loss": 76.8035, + "step": 109010 + }, + { + "epoch": 0.44045459503791656, + "grad_norm": 521.5999755859375, + "learning_rate": 2.7745162771796973e-05, + "loss": 78.0496, + "step": 109020 + }, + { + "epoch": 0.4404949963032842, + "grad_norm": 626.26708984375, + "learning_rate": 2.7742588066655436e-05, + "loss": 100.8856, + "step": 109030 + }, + { + "epoch": 0.44053539756865184, + "grad_norm": 184.10321044921875, + "learning_rate": 2.7740013210568153e-05, + "loss": 86.7372, + "step": 109040 + }, + { + "epoch": 0.4405757988340195, + "grad_norm": 655.500732421875, + "learning_rate": 2.7737438203585328e-05, + "loss": 85.4363, + "step": 109050 + }, + { + "epoch": 0.4406162000993871, + "grad_norm": 1103.606689453125, + "learning_rate": 2.773486304575716e-05, + "loss": 85.8745, + "step": 109060 + }, + { + "epoch": 0.44065660136475476, + "grad_norm": 946.7453002929688, + "learning_rate": 2.7732287737133858e-05, + "loss": 86.7427, + "step": 109070 + }, + { + "epoch": 0.4406970026301224, + "grad_norm": 543.5090942382812, + "learning_rate": 2.772971227776562e-05, + "loss": 77.6052, + "step": 109080 + }, + { + "epoch": 0.44073740389549, + "grad_norm": 741.3240966796875, + "learning_rate": 2.7727136667702668e-05, + "loss": 98.4621, + "step": 109090 + }, + { + "epoch": 0.4407778051608576, + "grad_norm": 843.0729370117188, + "learning_rate": 2.7724560906995202e-05, + "loss": 55.1862, + "step": 109100 + }, + { + "epoch": 0.44081820642622527, + "grad_norm": 1131.4190673828125, + "learning_rate": 2.7721984995693446e-05, + "loss": 55.9475, + "step": 109110 + }, + { + "epoch": 0.4408586076915929, + "grad_norm": 1414.1094970703125, + "learning_rate": 2.7719408933847624e-05, + "loss": 139.6697, + "step": 109120 + }, + { + "epoch": 0.44089900895696055, + "grad_norm": 655.0340576171875, + "learning_rate": 2.7716832721507946e-05, + "loss": 95.1755, + "step": 109130 + }, + { + "epoch": 0.4409394102223282, + "grad_norm": 519.7282104492188, + "learning_rate": 2.771425635872464e-05, + "loss": 65.5409, + "step": 109140 + }, + { + "epoch": 0.44097981148769577, + "grad_norm": 645.0565185546875, + "learning_rate": 2.7711679845547936e-05, + "loss": 81.251, + "step": 109150 + }, + { + "epoch": 0.4410202127530634, + "grad_norm": 622.3805541992188, + "learning_rate": 2.7709103182028058e-05, + "loss": 89.9577, + "step": 109160 + }, + { + "epoch": 0.44106061401843105, + "grad_norm": 1175.214111328125, + "learning_rate": 2.770652636821525e-05, + "loss": 93.497, + "step": 109170 + }, + { + "epoch": 0.4411010152837987, + "grad_norm": 826.6397094726562, + "learning_rate": 2.7703949404159743e-05, + "loss": 87.8858, + "step": 109180 + }, + { + "epoch": 0.44114141654916633, + "grad_norm": 1030.1534423828125, + "learning_rate": 2.7701372289911774e-05, + "loss": 71.4686, + "step": 109190 + }, + { + "epoch": 0.44118181781453397, + "grad_norm": 977.8969116210938, + "learning_rate": 2.7698795025521593e-05, + "loss": 73.5299, + "step": 109200 + }, + { + "epoch": 0.44122221907990156, + "grad_norm": 1343.9517822265625, + "learning_rate": 2.769621761103943e-05, + "loss": 90.9531, + "step": 109210 + }, + { + "epoch": 0.4412626203452692, + "grad_norm": 332.86407470703125, + "learning_rate": 2.7693640046515554e-05, + "loss": 67.6184, + "step": 109220 + }, + { + "epoch": 0.44130302161063684, + "grad_norm": 2748.17236328125, + "learning_rate": 2.7691062332000194e-05, + "loss": 172.8129, + "step": 109230 + }, + { + "epoch": 0.4413434228760045, + "grad_norm": 749.7548217773438, + "learning_rate": 2.768848446754362e-05, + "loss": 73.3734, + "step": 109240 + }, + { + "epoch": 0.4413838241413721, + "grad_norm": 516.8180541992188, + "learning_rate": 2.7685906453196073e-05, + "loss": 52.6356, + "step": 109250 + }, + { + "epoch": 0.44142422540673976, + "grad_norm": 252.4339599609375, + "learning_rate": 2.7683328289007838e-05, + "loss": 70.1862, + "step": 109260 + }, + { + "epoch": 0.4414646266721074, + "grad_norm": 882.948486328125, + "learning_rate": 2.768074997502915e-05, + "loss": 77.9139, + "step": 109270 + }, + { + "epoch": 0.441505027937475, + "grad_norm": 449.2543029785156, + "learning_rate": 2.7678171511310293e-05, + "loss": 93.5709, + "step": 109280 + }, + { + "epoch": 0.4415454292028426, + "grad_norm": 600.88671875, + "learning_rate": 2.767559289790153e-05, + "loss": 90.1475, + "step": 109290 + }, + { + "epoch": 0.44158583046821026, + "grad_norm": 704.863525390625, + "learning_rate": 2.767301413485313e-05, + "loss": 62.1784, + "step": 109300 + }, + { + "epoch": 0.4416262317335779, + "grad_norm": 868.3765258789062, + "learning_rate": 2.767043522221537e-05, + "loss": 64.0232, + "step": 109310 + }, + { + "epoch": 0.44166663299894554, + "grad_norm": 1021.1484375, + "learning_rate": 2.7667856160038523e-05, + "loss": 70.6936, + "step": 109320 + }, + { + "epoch": 0.4417070342643132, + "grad_norm": 666.2997436523438, + "learning_rate": 2.7665276948372876e-05, + "loss": 82.2566, + "step": 109330 + }, + { + "epoch": 0.44174743552968077, + "grad_norm": 642.2588500976562, + "learning_rate": 2.766269758726871e-05, + "loss": 110.6013, + "step": 109340 + }, + { + "epoch": 0.4417878367950484, + "grad_norm": 625.8738403320312, + "learning_rate": 2.7660118076776305e-05, + "loss": 87.0626, + "step": 109350 + }, + { + "epoch": 0.44182823806041605, + "grad_norm": 226.11949157714844, + "learning_rate": 2.7657538416945952e-05, + "loss": 89.4131, + "step": 109360 + }, + { + "epoch": 0.4418686393257837, + "grad_norm": 991.2916259765625, + "learning_rate": 2.7654958607827957e-05, + "loss": 103.4511, + "step": 109370 + }, + { + "epoch": 0.4419090405911513, + "grad_norm": 831.37353515625, + "learning_rate": 2.7652378649472588e-05, + "loss": 81.0368, + "step": 109380 + }, + { + "epoch": 0.44194944185651897, + "grad_norm": 465.68243408203125, + "learning_rate": 2.7649798541930174e-05, + "loss": 67.1264, + "step": 109390 + }, + { + "epoch": 0.44198984312188655, + "grad_norm": 802.1065063476562, + "learning_rate": 2.7647218285250984e-05, + "loss": 121.6818, + "step": 109400 + }, + { + "epoch": 0.4420302443872542, + "grad_norm": 628.7476196289062, + "learning_rate": 2.764463787948534e-05, + "loss": 79.9854, + "step": 109410 + }, + { + "epoch": 0.44207064565262183, + "grad_norm": 903.51318359375, + "learning_rate": 2.764205732468355e-05, + "loss": 87.7884, + "step": 109420 + }, + { + "epoch": 0.44211104691798947, + "grad_norm": 697.433349609375, + "learning_rate": 2.7639476620895912e-05, + "loss": 104.0829, + "step": 109430 + }, + { + "epoch": 0.4421514481833571, + "grad_norm": 1253.65185546875, + "learning_rate": 2.763689576817275e-05, + "loss": 92.1124, + "step": 109440 + }, + { + "epoch": 0.44219184944872475, + "grad_norm": 1166.527587890625, + "learning_rate": 2.763431476656437e-05, + "loss": 89.3992, + "step": 109450 + }, + { + "epoch": 0.4422322507140924, + "grad_norm": 445.5728759765625, + "learning_rate": 2.7631733616121095e-05, + "loss": 48.1036, + "step": 109460 + }, + { + "epoch": 0.44227265197946, + "grad_norm": 813.0160522460938, + "learning_rate": 2.7629152316893245e-05, + "loss": 69.1355, + "step": 109470 + }, + { + "epoch": 0.4423130532448276, + "grad_norm": 367.3717041015625, + "learning_rate": 2.7626570868931143e-05, + "loss": 54.6717, + "step": 109480 + }, + { + "epoch": 0.44235345451019525, + "grad_norm": 812.6731567382812, + "learning_rate": 2.7623989272285112e-05, + "loss": 85.385, + "step": 109490 + }, + { + "epoch": 0.4423938557755629, + "grad_norm": 621.633056640625, + "learning_rate": 2.7621407527005487e-05, + "loss": 82.6446, + "step": 109500 + }, + { + "epoch": 0.44243425704093053, + "grad_norm": 844.3721923828125, + "learning_rate": 2.7618825633142595e-05, + "loss": 158.5291, + "step": 109510 + }, + { + "epoch": 0.4424746583062982, + "grad_norm": 282.8899841308594, + "learning_rate": 2.761624359074678e-05, + "loss": 75.1209, + "step": 109520 + }, + { + "epoch": 0.44251505957166576, + "grad_norm": 638.0318603515625, + "learning_rate": 2.7613661399868367e-05, + "loss": 91.3835, + "step": 109530 + }, + { + "epoch": 0.4425554608370334, + "grad_norm": 750.9769287109375, + "learning_rate": 2.7611079060557716e-05, + "loss": 58.7886, + "step": 109540 + }, + { + "epoch": 0.44259586210240104, + "grad_norm": 1098.7945556640625, + "learning_rate": 2.7608496572865154e-05, + "loss": 61.7845, + "step": 109550 + }, + { + "epoch": 0.4426362633677687, + "grad_norm": 1119.239501953125, + "learning_rate": 2.760591393684104e-05, + "loss": 130.1093, + "step": 109560 + }, + { + "epoch": 0.4426766646331363, + "grad_norm": 733.471435546875, + "learning_rate": 2.760333115253571e-05, + "loss": 86.5297, + "step": 109570 + }, + { + "epoch": 0.44271706589850396, + "grad_norm": 1132.9801025390625, + "learning_rate": 2.760074821999953e-05, + "loss": 114.7573, + "step": 109580 + }, + { + "epoch": 0.4427574671638716, + "grad_norm": 1218.995361328125, + "learning_rate": 2.759816513928285e-05, + "loss": 70.2196, + "step": 109590 + }, + { + "epoch": 0.4427978684292392, + "grad_norm": 783.8511962890625, + "learning_rate": 2.759558191043603e-05, + "loss": 75.6858, + "step": 109600 + }, + { + "epoch": 0.4428382696946068, + "grad_norm": 1332.5771484375, + "learning_rate": 2.759299853350942e-05, + "loss": 92.3504, + "step": 109610 + }, + { + "epoch": 0.44287867095997446, + "grad_norm": 741.8960571289062, + "learning_rate": 2.7590415008553403e-05, + "loss": 83.8413, + "step": 109620 + }, + { + "epoch": 0.4429190722253421, + "grad_norm": 944.0001220703125, + "learning_rate": 2.7587831335618342e-05, + "loss": 92.4441, + "step": 109630 + }, + { + "epoch": 0.44295947349070974, + "grad_norm": 579.3040771484375, + "learning_rate": 2.7585247514754594e-05, + "loss": 76.6605, + "step": 109640 + }, + { + "epoch": 0.4429998747560774, + "grad_norm": 771.0035400390625, + "learning_rate": 2.7582663546012544e-05, + "loss": 92.0734, + "step": 109650 + }, + { + "epoch": 0.44304027602144497, + "grad_norm": 1856.6160888671875, + "learning_rate": 2.7580079429442563e-05, + "loss": 92.0013, + "step": 109660 + }, + { + "epoch": 0.4430806772868126, + "grad_norm": 635.4410400390625, + "learning_rate": 2.7577495165095034e-05, + "loss": 60.5259, + "step": 109670 + }, + { + "epoch": 0.44312107855218025, + "grad_norm": 555.914794921875, + "learning_rate": 2.7574910753020333e-05, + "loss": 73.6342, + "step": 109680 + }, + { + "epoch": 0.4431614798175479, + "grad_norm": 789.3749389648438, + "learning_rate": 2.7572326193268847e-05, + "loss": 63.5361, + "step": 109690 + }, + { + "epoch": 0.4432018810829155, + "grad_norm": 1766.4774169921875, + "learning_rate": 2.7569741485890964e-05, + "loss": 65.6252, + "step": 109700 + }, + { + "epoch": 0.44324228234828317, + "grad_norm": 608.0762939453125, + "learning_rate": 2.7567156630937074e-05, + "loss": 105.9469, + "step": 109710 + }, + { + "epoch": 0.44328268361365075, + "grad_norm": 722.127685546875, + "learning_rate": 2.7564571628457567e-05, + "loss": 102.7167, + "step": 109720 + }, + { + "epoch": 0.4433230848790184, + "grad_norm": 946.6883544921875, + "learning_rate": 2.756198647850285e-05, + "loss": 90.6655, + "step": 109730 + }, + { + "epoch": 0.44336348614438603, + "grad_norm": 961.5009765625, + "learning_rate": 2.7559401181123307e-05, + "loss": 61.2359, + "step": 109740 + }, + { + "epoch": 0.44340388740975367, + "grad_norm": 427.78802490234375, + "learning_rate": 2.7556815736369348e-05, + "loss": 92.277, + "step": 109750 + }, + { + "epoch": 0.4434442886751213, + "grad_norm": 1302.704833984375, + "learning_rate": 2.755423014429137e-05, + "loss": 66.595, + "step": 109760 + }, + { + "epoch": 0.44348468994048895, + "grad_norm": 504.1219787597656, + "learning_rate": 2.755164440493979e-05, + "loss": 80.7675, + "step": 109770 + }, + { + "epoch": 0.4435250912058566, + "grad_norm": 489.0342712402344, + "learning_rate": 2.7549058518365016e-05, + "loss": 70.9, + "step": 109780 + }, + { + "epoch": 0.4435654924712242, + "grad_norm": 888.829833984375, + "learning_rate": 2.7546472484617458e-05, + "loss": 87.3321, + "step": 109790 + }, + { + "epoch": 0.4436058937365918, + "grad_norm": 1036.823974609375, + "learning_rate": 2.7543886303747537e-05, + "loss": 116.7449, + "step": 109800 + }, + { + "epoch": 0.44364629500195946, + "grad_norm": 984.7528076171875, + "learning_rate": 2.7541299975805666e-05, + "loss": 58.075, + "step": 109810 + }, + { + "epoch": 0.4436866962673271, + "grad_norm": 362.28973388671875, + "learning_rate": 2.753871350084227e-05, + "loss": 94.5942, + "step": 109820 + }, + { + "epoch": 0.44372709753269474, + "grad_norm": 1080.3843994140625, + "learning_rate": 2.7536126878907773e-05, + "loss": 111.9223, + "step": 109830 + }, + { + "epoch": 0.4437674987980624, + "grad_norm": 752.4998168945312, + "learning_rate": 2.7533540110052605e-05, + "loss": 78.8717, + "step": 109840 + }, + { + "epoch": 0.44380790006342996, + "grad_norm": 1175.2918701171875, + "learning_rate": 2.753095319432719e-05, + "loss": 83.3798, + "step": 109850 + }, + { + "epoch": 0.4438483013287976, + "grad_norm": 1005.5425415039062, + "learning_rate": 2.7528366131781965e-05, + "loss": 77.8289, + "step": 109860 + }, + { + "epoch": 0.44388870259416524, + "grad_norm": 1633.837158203125, + "learning_rate": 2.7525778922467367e-05, + "loss": 76.5729, + "step": 109870 + }, + { + "epoch": 0.4439291038595329, + "grad_norm": 1501.1688232421875, + "learning_rate": 2.7523191566433832e-05, + "loss": 100.9618, + "step": 109880 + }, + { + "epoch": 0.4439695051249005, + "grad_norm": 640.9383544921875, + "learning_rate": 2.752060406373181e-05, + "loss": 72.4927, + "step": 109890 + }, + { + "epoch": 0.44400990639026816, + "grad_norm": 627.4410400390625, + "learning_rate": 2.7518016414411737e-05, + "loss": 56.4709, + "step": 109900 + }, + { + "epoch": 0.4440503076556358, + "grad_norm": 1390.52294921875, + "learning_rate": 2.7515428618524065e-05, + "loss": 101.998, + "step": 109910 + }, + { + "epoch": 0.4440907089210034, + "grad_norm": 1165.104736328125, + "learning_rate": 2.7512840676119244e-05, + "loss": 79.9594, + "step": 109920 + }, + { + "epoch": 0.444131110186371, + "grad_norm": 363.34857177734375, + "learning_rate": 2.7510252587247724e-05, + "loss": 80.352, + "step": 109930 + }, + { + "epoch": 0.44417151145173867, + "grad_norm": 596.8103637695312, + "learning_rate": 2.750766435195996e-05, + "loss": 82.8144, + "step": 109940 + }, + { + "epoch": 0.4442119127171063, + "grad_norm": 1359.5137939453125, + "learning_rate": 2.750507597030642e-05, + "loss": 78.4705, + "step": 109950 + }, + { + "epoch": 0.44425231398247395, + "grad_norm": 932.8899536132812, + "learning_rate": 2.7502487442337557e-05, + "loss": 125.4763, + "step": 109960 + }, + { + "epoch": 0.4442927152478416, + "grad_norm": 568.8868408203125, + "learning_rate": 2.7499898768103837e-05, + "loss": 94.6074, + "step": 109970 + }, + { + "epoch": 0.44433311651320917, + "grad_norm": 600.7315673828125, + "learning_rate": 2.749730994765573e-05, + "loss": 87.9768, + "step": 109980 + }, + { + "epoch": 0.4443735177785768, + "grad_norm": 1457.211181640625, + "learning_rate": 2.7494720981043715e-05, + "loss": 85.2442, + "step": 109990 + }, + { + "epoch": 0.44441391904394445, + "grad_norm": 959.6185913085938, + "learning_rate": 2.7492131868318247e-05, + "loss": 108.0483, + "step": 110000 + }, + { + "epoch": 0.4444543203093121, + "grad_norm": 859.4281005859375, + "learning_rate": 2.7489542609529808e-05, + "loss": 76.8492, + "step": 110010 + }, + { + "epoch": 0.44449472157467973, + "grad_norm": 1576.571533203125, + "learning_rate": 2.7486953204728884e-05, + "loss": 80.6798, + "step": 110020 + }, + { + "epoch": 0.44453512284004737, + "grad_norm": 457.47705078125, + "learning_rate": 2.748436365396596e-05, + "loss": 89.4201, + "step": 110030 + }, + { + "epoch": 0.44457552410541495, + "grad_norm": 889.1416015625, + "learning_rate": 2.74817739572915e-05, + "loss": 82.6285, + "step": 110040 + }, + { + "epoch": 0.4446159253707826, + "grad_norm": 526.1444702148438, + "learning_rate": 2.747918411475601e-05, + "loss": 72.9643, + "step": 110050 + }, + { + "epoch": 0.44465632663615023, + "grad_norm": 1117.876708984375, + "learning_rate": 2.7476594126409978e-05, + "loss": 64.469, + "step": 110060 + }, + { + "epoch": 0.4446967279015179, + "grad_norm": 723.9794921875, + "learning_rate": 2.747400399230389e-05, + "loss": 79.5318, + "step": 110070 + }, + { + "epoch": 0.4447371291668855, + "grad_norm": 630.9938354492188, + "learning_rate": 2.7471413712488253e-05, + "loss": 82.0176, + "step": 110080 + }, + { + "epoch": 0.44477753043225315, + "grad_norm": 1575.54931640625, + "learning_rate": 2.7468823287013557e-05, + "loss": 98.7812, + "step": 110090 + }, + { + "epoch": 0.4448179316976208, + "grad_norm": 907.8284301757812, + "learning_rate": 2.74662327159303e-05, + "loss": 55.3915, + "step": 110100 + }, + { + "epoch": 0.4448583329629884, + "grad_norm": 887.6716918945312, + "learning_rate": 2.7463641999289e-05, + "loss": 91.9406, + "step": 110110 + }, + { + "epoch": 0.444898734228356, + "grad_norm": 1239.335693359375, + "learning_rate": 2.746105113714015e-05, + "loss": 83.7483, + "step": 110120 + }, + { + "epoch": 0.44493913549372366, + "grad_norm": 458.3565368652344, + "learning_rate": 2.7458460129534267e-05, + "loss": 48.8211, + "step": 110130 + }, + { + "epoch": 0.4449795367590913, + "grad_norm": 848.4712524414062, + "learning_rate": 2.7455868976521867e-05, + "loss": 69.4526, + "step": 110140 + }, + { + "epoch": 0.44501993802445894, + "grad_norm": 423.1230773925781, + "learning_rate": 2.7453277678153464e-05, + "loss": 75.0877, + "step": 110150 + }, + { + "epoch": 0.4450603392898266, + "grad_norm": 718.173583984375, + "learning_rate": 2.7450686234479577e-05, + "loss": 145.7876, + "step": 110160 + }, + { + "epoch": 0.44510074055519416, + "grad_norm": 710.3490600585938, + "learning_rate": 2.744809464555072e-05, + "loss": 112.6615, + "step": 110170 + }, + { + "epoch": 0.4451411418205618, + "grad_norm": 738.2438354492188, + "learning_rate": 2.7445502911417425e-05, + "loss": 101.4297, + "step": 110180 + }, + { + "epoch": 0.44518154308592944, + "grad_norm": 578.36328125, + "learning_rate": 2.744291103213022e-05, + "loss": 77.5273, + "step": 110190 + }, + { + "epoch": 0.4452219443512971, + "grad_norm": 980.9275512695312, + "learning_rate": 2.7440319007739632e-05, + "loss": 82.1859, + "step": 110200 + }, + { + "epoch": 0.4452623456166647, + "grad_norm": 819.7158813476562, + "learning_rate": 2.7437726838296193e-05, + "loss": 72.8286, + "step": 110210 + }, + { + "epoch": 0.44530274688203236, + "grad_norm": 557.6326904296875, + "learning_rate": 2.743513452385044e-05, + "loss": 99.1977, + "step": 110220 + }, + { + "epoch": 0.4453431481474, + "grad_norm": 664.4640502929688, + "learning_rate": 2.7432542064452906e-05, + "loss": 105.8836, + "step": 110230 + }, + { + "epoch": 0.4453835494127676, + "grad_norm": 1193.81689453125, + "learning_rate": 2.742994946015415e-05, + "loss": 97.8003, + "step": 110240 + }, + { + "epoch": 0.4454239506781352, + "grad_norm": 1252.8304443359375, + "learning_rate": 2.74273567110047e-05, + "loss": 89.9474, + "step": 110250 + }, + { + "epoch": 0.44546435194350287, + "grad_norm": 999.4140014648438, + "learning_rate": 2.7424763817055104e-05, + "loss": 79.0366, + "step": 110260 + }, + { + "epoch": 0.4455047532088705, + "grad_norm": 910.2816162109375, + "learning_rate": 2.7422170778355917e-05, + "loss": 104.7332, + "step": 110270 + }, + { + "epoch": 0.44554515447423815, + "grad_norm": 909.5275268554688, + "learning_rate": 2.741957759495769e-05, + "loss": 55.6495, + "step": 110280 + }, + { + "epoch": 0.4455855557396058, + "grad_norm": 806.0700073242188, + "learning_rate": 2.7416984266910973e-05, + "loss": 98.3352, + "step": 110290 + }, + { + "epoch": 0.44562595700497337, + "grad_norm": 879.367431640625, + "learning_rate": 2.741439079426633e-05, + "loss": 86.5804, + "step": 110300 + }, + { + "epoch": 0.445666358270341, + "grad_norm": 983.2825927734375, + "learning_rate": 2.7411797177074327e-05, + "loss": 101.4151, + "step": 110310 + }, + { + "epoch": 0.44570675953570865, + "grad_norm": 672.8036499023438, + "learning_rate": 2.7409203415385523e-05, + "loss": 76.5019, + "step": 110320 + }, + { + "epoch": 0.4457471608010763, + "grad_norm": 835.3341674804688, + "learning_rate": 2.7406609509250483e-05, + "loss": 109.891, + "step": 110330 + }, + { + "epoch": 0.44578756206644393, + "grad_norm": 1360.91845703125, + "learning_rate": 2.740401545871977e-05, + "loss": 174.6724, + "step": 110340 + }, + { + "epoch": 0.44582796333181157, + "grad_norm": 806.0589599609375, + "learning_rate": 2.740142126384397e-05, + "loss": 84.488, + "step": 110350 + }, + { + "epoch": 0.44586836459717916, + "grad_norm": 570.8064575195312, + "learning_rate": 2.739882692467365e-05, + "loss": 71.9228, + "step": 110360 + }, + { + "epoch": 0.4459087658625468, + "grad_norm": 255.3467559814453, + "learning_rate": 2.7396232441259393e-05, + "loss": 61.1575, + "step": 110370 + }, + { + "epoch": 0.44594916712791444, + "grad_norm": 1424.6134033203125, + "learning_rate": 2.739363781365177e-05, + "loss": 85.0408, + "step": 110380 + }, + { + "epoch": 0.4459895683932821, + "grad_norm": 580.8385009765625, + "learning_rate": 2.7391043041901375e-05, + "loss": 66.3685, + "step": 110390 + }, + { + "epoch": 0.4460299696586497, + "grad_norm": 861.6211547851562, + "learning_rate": 2.7388448126058792e-05, + "loss": 80.9481, + "step": 110400 + }, + { + "epoch": 0.44607037092401736, + "grad_norm": 657.4696044921875, + "learning_rate": 2.738585306617461e-05, + "loss": 100.182, + "step": 110410 + }, + { + "epoch": 0.446110772189385, + "grad_norm": 1438.278564453125, + "learning_rate": 2.738325786229942e-05, + "loss": 89.821, + "step": 110420 + }, + { + "epoch": 0.4461511734547526, + "grad_norm": 518.69482421875, + "learning_rate": 2.7380662514483814e-05, + "loss": 92.8553, + "step": 110430 + }, + { + "epoch": 0.4461915747201202, + "grad_norm": 959.0108032226562, + "learning_rate": 2.7378067022778398e-05, + "loss": 49.5757, + "step": 110440 + }, + { + "epoch": 0.44623197598548786, + "grad_norm": 850.2694702148438, + "learning_rate": 2.7375471387233753e-05, + "loss": 120.2829, + "step": 110450 + }, + { + "epoch": 0.4462723772508555, + "grad_norm": 443.8161926269531, + "learning_rate": 2.7372875607900508e-05, + "loss": 55.4089, + "step": 110460 + }, + { + "epoch": 0.44631277851622314, + "grad_norm": 306.594970703125, + "learning_rate": 2.7370279684829255e-05, + "loss": 59.8429, + "step": 110470 + }, + { + "epoch": 0.4463531797815908, + "grad_norm": 805.4652709960938, + "learning_rate": 2.7367683618070606e-05, + "loss": 70.3112, + "step": 110480 + }, + { + "epoch": 0.44639358104695837, + "grad_norm": 706.4088745117188, + "learning_rate": 2.7365087407675166e-05, + "loss": 50.734, + "step": 110490 + }, + { + "epoch": 0.446433982312326, + "grad_norm": 573.681640625, + "learning_rate": 2.7362491053693564e-05, + "loss": 77.3149, + "step": 110500 + }, + { + "epoch": 0.44647438357769365, + "grad_norm": 634.3530883789062, + "learning_rate": 2.7359894556176404e-05, + "loss": 94.6407, + "step": 110510 + }, + { + "epoch": 0.4465147848430613, + "grad_norm": 713.2092895507812, + "learning_rate": 2.735729791517431e-05, + "loss": 64.083, + "step": 110520 + }, + { + "epoch": 0.4465551861084289, + "grad_norm": 540.277099609375, + "learning_rate": 2.735470113073791e-05, + "loss": 128.1044, + "step": 110530 + }, + { + "epoch": 0.44659558737379657, + "grad_norm": 1032.994384765625, + "learning_rate": 2.7352104202917814e-05, + "loss": 78.2797, + "step": 110540 + }, + { + "epoch": 0.4466359886391642, + "grad_norm": 1041.9434814453125, + "learning_rate": 2.734950713176467e-05, + "loss": 146.9043, + "step": 110550 + }, + { + "epoch": 0.4466763899045318, + "grad_norm": 523.1193237304688, + "learning_rate": 2.7346909917329098e-05, + "loss": 47.434, + "step": 110560 + }, + { + "epoch": 0.44671679116989943, + "grad_norm": 524.615478515625, + "learning_rate": 2.7344312559661737e-05, + "loss": 72.3804, + "step": 110570 + }, + { + "epoch": 0.44675719243526707, + "grad_norm": 350.77978515625, + "learning_rate": 2.7341715058813218e-05, + "loss": 61.7473, + "step": 110580 + }, + { + "epoch": 0.4467975937006347, + "grad_norm": 1046.4366455078125, + "learning_rate": 2.733911741483419e-05, + "loss": 107.5656, + "step": 110590 + }, + { + "epoch": 0.44683799496600235, + "grad_norm": 472.0415344238281, + "learning_rate": 2.7336519627775288e-05, + "loss": 62.5765, + "step": 110600 + }, + { + "epoch": 0.44687839623137, + "grad_norm": 737.67041015625, + "learning_rate": 2.7333921697687154e-05, + "loss": 97.1129, + "step": 110610 + }, + { + "epoch": 0.4469187974967376, + "grad_norm": 591.0413208007812, + "learning_rate": 2.7331323624620442e-05, + "loss": 81.6282, + "step": 110620 + }, + { + "epoch": 0.4469591987621052, + "grad_norm": 1301.3326416015625, + "learning_rate": 2.7328725408625804e-05, + "loss": 123.7314, + "step": 110630 + }, + { + "epoch": 0.44699960002747285, + "grad_norm": 630.206298828125, + "learning_rate": 2.7326127049753888e-05, + "loss": 101.6768, + "step": 110640 + }, + { + "epoch": 0.4470400012928405, + "grad_norm": 887.99365234375, + "learning_rate": 2.7323528548055355e-05, + "loss": 98.8928, + "step": 110650 + }, + { + "epoch": 0.44708040255820813, + "grad_norm": 1279.0919189453125, + "learning_rate": 2.732092990358086e-05, + "loss": 164.0209, + "step": 110660 + }, + { + "epoch": 0.4471208038235758, + "grad_norm": 942.41455078125, + "learning_rate": 2.7318331116381077e-05, + "loss": 87.3545, + "step": 110670 + }, + { + "epoch": 0.44716120508894336, + "grad_norm": 268.3739318847656, + "learning_rate": 2.7315732186506654e-05, + "loss": 64.1143, + "step": 110680 + }, + { + "epoch": 0.447201606354311, + "grad_norm": 1016.048828125, + "learning_rate": 2.7313133114008268e-05, + "loss": 66.699, + "step": 110690 + }, + { + "epoch": 0.44724200761967864, + "grad_norm": 1185.6982421875, + "learning_rate": 2.731053389893658e-05, + "loss": 98.5653, + "step": 110700 + }, + { + "epoch": 0.4472824088850463, + "grad_norm": 696.7045288085938, + "learning_rate": 2.7307934541342276e-05, + "loss": 82.9786, + "step": 110710 + }, + { + "epoch": 0.4473228101504139, + "grad_norm": 1079.7420654296875, + "learning_rate": 2.7305335041276024e-05, + "loss": 87.7584, + "step": 110720 + }, + { + "epoch": 0.44736321141578156, + "grad_norm": 307.4581298828125, + "learning_rate": 2.7302735398788507e-05, + "loss": 100.2085, + "step": 110730 + }, + { + "epoch": 0.4474036126811492, + "grad_norm": 999.6603393554688, + "learning_rate": 2.7300135613930404e-05, + "loss": 85.7632, + "step": 110740 + }, + { + "epoch": 0.4474440139465168, + "grad_norm": 431.947021484375, + "learning_rate": 2.7297535686752392e-05, + "loss": 68.5617, + "step": 110750 + }, + { + "epoch": 0.4474844152118844, + "grad_norm": 518.1717529296875, + "learning_rate": 2.729493561730517e-05, + "loss": 72.8301, + "step": 110760 + }, + { + "epoch": 0.44752481647725206, + "grad_norm": 1103.127197265625, + "learning_rate": 2.729233540563943e-05, + "loss": 100.6588, + "step": 110770 + }, + { + "epoch": 0.4475652177426197, + "grad_norm": 439.6687927246094, + "learning_rate": 2.7289735051805846e-05, + "loss": 86.8078, + "step": 110780 + }, + { + "epoch": 0.44760561900798734, + "grad_norm": 1083.612548828125, + "learning_rate": 2.7287134555855127e-05, + "loss": 107.1022, + "step": 110790 + }, + { + "epoch": 0.447646020273355, + "grad_norm": 743.3803100585938, + "learning_rate": 2.728453391783797e-05, + "loss": 60.4662, + "step": 110800 + }, + { + "epoch": 0.44768642153872257, + "grad_norm": 368.5544738769531, + "learning_rate": 2.7281933137805068e-05, + "loss": 70.9766, + "step": 110810 + }, + { + "epoch": 0.4477268228040902, + "grad_norm": 536.345458984375, + "learning_rate": 2.727933221580713e-05, + "loss": 78.8305, + "step": 110820 + }, + { + "epoch": 0.44776722406945785, + "grad_norm": 604.1950073242188, + "learning_rate": 2.727673115189487e-05, + "loss": 88.7669, + "step": 110830 + }, + { + "epoch": 0.4478076253348255, + "grad_norm": 631.6414184570312, + "learning_rate": 2.727412994611899e-05, + "loss": 82.928, + "step": 110840 + }, + { + "epoch": 0.4478480266001931, + "grad_norm": 885.3919067382812, + "learning_rate": 2.72715285985302e-05, + "loss": 72.9106, + "step": 110850 + }, + { + "epoch": 0.44788842786556077, + "grad_norm": 1172.4149169921875, + "learning_rate": 2.7268927109179216e-05, + "loss": 84.244, + "step": 110860 + }, + { + "epoch": 0.4479288291309284, + "grad_norm": 858.4666137695312, + "learning_rate": 2.7266325478116755e-05, + "loss": 82.8366, + "step": 110870 + }, + { + "epoch": 0.447969230396296, + "grad_norm": 1023.9484252929688, + "learning_rate": 2.7263723705393535e-05, + "loss": 67.9586, + "step": 110880 + }, + { + "epoch": 0.44800963166166363, + "grad_norm": 741.8781127929688, + "learning_rate": 2.726112179106029e-05, + "loss": 74.4154, + "step": 110890 + }, + { + "epoch": 0.44805003292703127, + "grad_norm": 594.3267822265625, + "learning_rate": 2.7258519735167727e-05, + "loss": 80.1729, + "step": 110900 + }, + { + "epoch": 0.4480904341923989, + "grad_norm": 647.2243041992188, + "learning_rate": 2.7255917537766593e-05, + "loss": 101.7151, + "step": 110910 + }, + { + "epoch": 0.44813083545776655, + "grad_norm": 602.187744140625, + "learning_rate": 2.7253315198907605e-05, + "loss": 103.1495, + "step": 110920 + }, + { + "epoch": 0.4481712367231342, + "grad_norm": 1121.0169677734375, + "learning_rate": 2.725071271864151e-05, + "loss": 167.1037, + "step": 110930 + }, + { + "epoch": 0.4482116379885018, + "grad_norm": 448.618408203125, + "learning_rate": 2.7248110097019033e-05, + "loss": 81.8193, + "step": 110940 + }, + { + "epoch": 0.4482520392538694, + "grad_norm": 689.898681640625, + "learning_rate": 2.7245507334090917e-05, + "loss": 101.4988, + "step": 110950 + }, + { + "epoch": 0.44829244051923706, + "grad_norm": 1051.905029296875, + "learning_rate": 2.7242904429907907e-05, + "loss": 69.6487, + "step": 110960 + }, + { + "epoch": 0.4483328417846047, + "grad_norm": 923.610595703125, + "learning_rate": 2.7240301384520747e-05, + "loss": 68.7283, + "step": 110970 + }, + { + "epoch": 0.44837324304997234, + "grad_norm": 563.29345703125, + "learning_rate": 2.723769819798018e-05, + "loss": 60.2192, + "step": 110980 + }, + { + "epoch": 0.44841364431534, + "grad_norm": 1732.873046875, + "learning_rate": 2.723509487033696e-05, + "loss": 80.3361, + "step": 110990 + }, + { + "epoch": 0.44845404558070756, + "grad_norm": 337.2344055175781, + "learning_rate": 2.7232491401641844e-05, + "loss": 100.4508, + "step": 111000 + }, + { + "epoch": 0.4484944468460752, + "grad_norm": 595.6773681640625, + "learning_rate": 2.722988779194559e-05, + "loss": 108.1934, + "step": 111010 + }, + { + "epoch": 0.44853484811144284, + "grad_norm": 574.8885498046875, + "learning_rate": 2.722728404129895e-05, + "loss": 84.0403, + "step": 111020 + }, + { + "epoch": 0.4485752493768105, + "grad_norm": 622.235595703125, + "learning_rate": 2.722468014975268e-05, + "loss": 98.8944, + "step": 111030 + }, + { + "epoch": 0.4486156506421781, + "grad_norm": 692.3932495117188, + "learning_rate": 2.7222076117357552e-05, + "loss": 64.0281, + "step": 111040 + }, + { + "epoch": 0.44865605190754576, + "grad_norm": 891.7725219726562, + "learning_rate": 2.7219471944164336e-05, + "loss": 75.8269, + "step": 111050 + }, + { + "epoch": 0.4486964531729134, + "grad_norm": 576.2487182617188, + "learning_rate": 2.7216867630223792e-05, + "loss": 79.8835, + "step": 111060 + }, + { + "epoch": 0.448736854438281, + "grad_norm": 772.5215454101562, + "learning_rate": 2.7214263175586705e-05, + "loss": 98.8817, + "step": 111070 + }, + { + "epoch": 0.4487772557036486, + "grad_norm": 670.7537841796875, + "learning_rate": 2.7211658580303835e-05, + "loss": 67.3457, + "step": 111080 + }, + { + "epoch": 0.44881765696901627, + "grad_norm": 624.036865234375, + "learning_rate": 2.7209053844425978e-05, + "loss": 65.5105, + "step": 111090 + }, + { + "epoch": 0.4488580582343839, + "grad_norm": 668.5901489257812, + "learning_rate": 2.7206448968003898e-05, + "loss": 70.1418, + "step": 111100 + }, + { + "epoch": 0.44889845949975155, + "grad_norm": 855.1578369140625, + "learning_rate": 2.7203843951088383e-05, + "loss": 77.0337, + "step": 111110 + }, + { + "epoch": 0.4489388607651192, + "grad_norm": 845.0704956054688, + "learning_rate": 2.7201238793730228e-05, + "loss": 65.9357, + "step": 111120 + }, + { + "epoch": 0.44897926203048677, + "grad_norm": 1071.5892333984375, + "learning_rate": 2.7198633495980214e-05, + "loss": 68.0374, + "step": 111130 + }, + { + "epoch": 0.4490196632958544, + "grad_norm": 647.1695556640625, + "learning_rate": 2.7196028057889134e-05, + "loss": 75.3528, + "step": 111140 + }, + { + "epoch": 0.44906006456122205, + "grad_norm": 285.20135498046875, + "learning_rate": 2.7193422479507777e-05, + "loss": 41.196, + "step": 111150 + }, + { + "epoch": 0.4491004658265897, + "grad_norm": 587.009765625, + "learning_rate": 2.7190816760886946e-05, + "loss": 76.5525, + "step": 111160 + }, + { + "epoch": 0.44914086709195733, + "grad_norm": 770.2496337890625, + "learning_rate": 2.7188210902077447e-05, + "loss": 81.1933, + "step": 111170 + }, + { + "epoch": 0.44918126835732497, + "grad_norm": 539.3468017578125, + "learning_rate": 2.718560490313007e-05, + "loss": 43.7219, + "step": 111180 + }, + { + "epoch": 0.4492216696226926, + "grad_norm": 777.7388916015625, + "learning_rate": 2.718299876409563e-05, + "loss": 108.3768, + "step": 111190 + }, + { + "epoch": 0.4492620708880602, + "grad_norm": 466.24005126953125, + "learning_rate": 2.7180392485024926e-05, + "loss": 71.8974, + "step": 111200 + }, + { + "epoch": 0.44930247215342783, + "grad_norm": 2576.03369140625, + "learning_rate": 2.717778606596878e-05, + "loss": 77.8036, + "step": 111210 + }, + { + "epoch": 0.4493428734187955, + "grad_norm": 349.8440246582031, + "learning_rate": 2.7175179506977997e-05, + "loss": 56.9487, + "step": 111220 + }, + { + "epoch": 0.4493832746841631, + "grad_norm": 1509.8602294921875, + "learning_rate": 2.7172572808103392e-05, + "loss": 141.7231, + "step": 111230 + }, + { + "epoch": 0.44942367594953075, + "grad_norm": 1008.7871704101562, + "learning_rate": 2.7169965969395788e-05, + "loss": 57.5224, + "step": 111240 + }, + { + "epoch": 0.4494640772148984, + "grad_norm": 1029.047119140625, + "learning_rate": 2.7167358990906e-05, + "loss": 69.1403, + "step": 111250 + }, + { + "epoch": 0.449504478480266, + "grad_norm": 197.4867401123047, + "learning_rate": 2.7164751872684876e-05, + "loss": 78.6234, + "step": 111260 + }, + { + "epoch": 0.4495448797456336, + "grad_norm": 835.0730590820312, + "learning_rate": 2.7162144614783214e-05, + "loss": 67.6819, + "step": 111270 + }, + { + "epoch": 0.44958528101100126, + "grad_norm": 420.2360534667969, + "learning_rate": 2.715953721725186e-05, + "loss": 62.3657, + "step": 111280 + }, + { + "epoch": 0.4496256822763689, + "grad_norm": 405.85809326171875, + "learning_rate": 2.715692968014164e-05, + "loss": 62.1703, + "step": 111290 + }, + { + "epoch": 0.44966608354173654, + "grad_norm": 787.0912475585938, + "learning_rate": 2.715432200350339e-05, + "loss": 94.2912, + "step": 111300 + }, + { + "epoch": 0.4497064848071042, + "grad_norm": 582.2385864257812, + "learning_rate": 2.7151714187387948e-05, + "loss": 138.7918, + "step": 111310 + }, + { + "epoch": 0.44974688607247176, + "grad_norm": 717.2625732421875, + "learning_rate": 2.714910623184616e-05, + "loss": 110.1915, + "step": 111320 + }, + { + "epoch": 0.4497872873378394, + "grad_norm": 865.0787353515625, + "learning_rate": 2.714649813692886e-05, + "loss": 73.3408, + "step": 111330 + }, + { + "epoch": 0.44982768860320704, + "grad_norm": 760.9541625976562, + "learning_rate": 2.7143889902686904e-05, + "loss": 74.7243, + "step": 111340 + }, + { + "epoch": 0.4498680898685747, + "grad_norm": 390.6227111816406, + "learning_rate": 2.7141281529171134e-05, + "loss": 59.0301, + "step": 111350 + }, + { + "epoch": 0.4499084911339423, + "grad_norm": 404.87078857421875, + "learning_rate": 2.713867301643241e-05, + "loss": 74.5373, + "step": 111360 + }, + { + "epoch": 0.44994889239930996, + "grad_norm": 1134.2117919921875, + "learning_rate": 2.713606436452157e-05, + "loss": 73.6906, + "step": 111370 + }, + { + "epoch": 0.4499892936646776, + "grad_norm": 494.1714172363281, + "learning_rate": 2.713345557348949e-05, + "loss": 115.8314, + "step": 111380 + }, + { + "epoch": 0.4500296949300452, + "grad_norm": 647.5482788085938, + "learning_rate": 2.713084664338702e-05, + "loss": 72.281, + "step": 111390 + }, + { + "epoch": 0.4500700961954128, + "grad_norm": 569.9494018554688, + "learning_rate": 2.7128237574265014e-05, + "loss": 87.0812, + "step": 111400 + }, + { + "epoch": 0.45011049746078047, + "grad_norm": 1380.7606201171875, + "learning_rate": 2.7125628366174356e-05, + "loss": 98.1593, + "step": 111410 + }, + { + "epoch": 0.4501508987261481, + "grad_norm": 699.2203979492188, + "learning_rate": 2.7123019019165906e-05, + "loss": 65.4075, + "step": 111420 + }, + { + "epoch": 0.45019129999151575, + "grad_norm": 1272.3875732421875, + "learning_rate": 2.7120409533290524e-05, + "loss": 140.5978, + "step": 111430 + }, + { + "epoch": 0.4502317012568834, + "grad_norm": 977.0611572265625, + "learning_rate": 2.7117799908599097e-05, + "loss": 62.0545, + "step": 111440 + }, + { + "epoch": 0.45027210252225097, + "grad_norm": 382.8816833496094, + "learning_rate": 2.71151901451425e-05, + "loss": 58.2071, + "step": 111450 + }, + { + "epoch": 0.4503125037876186, + "grad_norm": 1130.7744140625, + "learning_rate": 2.7112580242971608e-05, + "loss": 73.3821, + "step": 111460 + }, + { + "epoch": 0.45035290505298625, + "grad_norm": 427.85748291015625, + "learning_rate": 2.71099702021373e-05, + "loss": 88.9347, + "step": 111470 + }, + { + "epoch": 0.4503933063183539, + "grad_norm": 1024.6610107421875, + "learning_rate": 2.710736002269046e-05, + "loss": 68.4678, + "step": 111480 + }, + { + "epoch": 0.45043370758372153, + "grad_norm": 1001.8733520507812, + "learning_rate": 2.7104749704681975e-05, + "loss": 78.0787, + "step": 111490 + }, + { + "epoch": 0.45047410884908917, + "grad_norm": 730.2445678710938, + "learning_rate": 2.7102139248162743e-05, + "loss": 104.2587, + "step": 111500 + }, + { + "epoch": 0.4505145101144568, + "grad_norm": 674.4457397460938, + "learning_rate": 2.709952865318365e-05, + "loss": 71.353, + "step": 111510 + }, + { + "epoch": 0.4505549113798244, + "grad_norm": 633.5430297851562, + "learning_rate": 2.7096917919795586e-05, + "loss": 104.459, + "step": 111520 + }, + { + "epoch": 0.45059531264519204, + "grad_norm": 1657.76318359375, + "learning_rate": 2.709430704804946e-05, + "loss": 113.2853, + "step": 111530 + }, + { + "epoch": 0.4506357139105597, + "grad_norm": 2573.68701171875, + "learning_rate": 2.709169603799616e-05, + "loss": 69.4011, + "step": 111540 + }, + { + "epoch": 0.4506761151759273, + "grad_norm": 679.908203125, + "learning_rate": 2.7089084889686597e-05, + "loss": 77.9138, + "step": 111550 + }, + { + "epoch": 0.45071651644129496, + "grad_norm": 608.4434814453125, + "learning_rate": 2.7086473603171676e-05, + "loss": 79.0985, + "step": 111560 + }, + { + "epoch": 0.4507569177066626, + "grad_norm": 493.4070739746094, + "learning_rate": 2.7083862178502296e-05, + "loss": 62.4198, + "step": 111570 + }, + { + "epoch": 0.4507973189720302, + "grad_norm": 360.2536926269531, + "learning_rate": 2.7081250615729385e-05, + "loss": 118.5692, + "step": 111580 + }, + { + "epoch": 0.4508377202373978, + "grad_norm": 433.3017272949219, + "learning_rate": 2.707863891490384e-05, + "loss": 58.87, + "step": 111590 + }, + { + "epoch": 0.45087812150276546, + "grad_norm": 549.46484375, + "learning_rate": 2.707602707607659e-05, + "loss": 64.3202, + "step": 111600 + }, + { + "epoch": 0.4509185227681331, + "grad_norm": 1106.81298828125, + "learning_rate": 2.7073415099298545e-05, + "loss": 76.6456, + "step": 111610 + }, + { + "epoch": 0.45095892403350074, + "grad_norm": 853.4287719726562, + "learning_rate": 2.7070802984620636e-05, + "loss": 69.0355, + "step": 111620 + }, + { + "epoch": 0.4509993252988684, + "grad_norm": 937.868408203125, + "learning_rate": 2.7068190732093787e-05, + "loss": 113.476, + "step": 111630 + }, + { + "epoch": 0.45103972656423597, + "grad_norm": 457.1081237792969, + "learning_rate": 2.706557834176891e-05, + "loss": 59.7334, + "step": 111640 + }, + { + "epoch": 0.4510801278296036, + "grad_norm": 1302.5484619140625, + "learning_rate": 2.706296581369695e-05, + "loss": 101.9465, + "step": 111650 + }, + { + "epoch": 0.45112052909497125, + "grad_norm": 1241.7603759765625, + "learning_rate": 2.7060353147928837e-05, + "loss": 98.9128, + "step": 111660 + }, + { + "epoch": 0.4511609303603389, + "grad_norm": 598.4838256835938, + "learning_rate": 2.7057740344515503e-05, + "loss": 53.8062, + "step": 111670 + }, + { + "epoch": 0.4512013316257065, + "grad_norm": 1021.545166015625, + "learning_rate": 2.7055127403507887e-05, + "loss": 45.6801, + "step": 111680 + }, + { + "epoch": 0.45124173289107417, + "grad_norm": 1179.3560791015625, + "learning_rate": 2.7052514324956923e-05, + "loss": 94.888, + "step": 111690 + }, + { + "epoch": 0.4512821341564418, + "grad_norm": 869.30712890625, + "learning_rate": 2.7049901108913573e-05, + "loss": 66.8841, + "step": 111700 + }, + { + "epoch": 0.4513225354218094, + "grad_norm": 819.3466796875, + "learning_rate": 2.7047287755428765e-05, + "loss": 70.6838, + "step": 111710 + }, + { + "epoch": 0.45136293668717703, + "grad_norm": 410.7763366699219, + "learning_rate": 2.7044674264553463e-05, + "loss": 58.7589, + "step": 111720 + }, + { + "epoch": 0.45140333795254467, + "grad_norm": 746.4517822265625, + "learning_rate": 2.7042060636338598e-05, + "loss": 126.9735, + "step": 111730 + }, + { + "epoch": 0.4514437392179123, + "grad_norm": 440.8060302734375, + "learning_rate": 2.703944687083514e-05, + "loss": 56.8863, + "step": 111740 + }, + { + "epoch": 0.45148414048327995, + "grad_norm": 533.440673828125, + "learning_rate": 2.7036832968094036e-05, + "loss": 66.5473, + "step": 111750 + }, + { + "epoch": 0.4515245417486476, + "grad_norm": 929.2205200195312, + "learning_rate": 2.7034218928166258e-05, + "loss": 92.4864, + "step": 111760 + }, + { + "epoch": 0.4515649430140152, + "grad_norm": 829.3902587890625, + "learning_rate": 2.7031604751102757e-05, + "loss": 60.2454, + "step": 111770 + }, + { + "epoch": 0.4516053442793828, + "grad_norm": 554.6519165039062, + "learning_rate": 2.70289904369545e-05, + "loss": 58.745, + "step": 111780 + }, + { + "epoch": 0.45164574554475045, + "grad_norm": 1243.239013671875, + "learning_rate": 2.702637598577246e-05, + "loss": 71.2572, + "step": 111790 + }, + { + "epoch": 0.4516861468101181, + "grad_norm": 952.412353515625, + "learning_rate": 2.7023761397607603e-05, + "loss": 94.7561, + "step": 111800 + }, + { + "epoch": 0.45172654807548573, + "grad_norm": 691.4224853515625, + "learning_rate": 2.7021146672510896e-05, + "loss": 58.2043, + "step": 111810 + }, + { + "epoch": 0.4517669493408534, + "grad_norm": 876.7526245117188, + "learning_rate": 2.7018531810533322e-05, + "loss": 86.6898, + "step": 111820 + }, + { + "epoch": 0.451807350606221, + "grad_norm": 568.8126831054688, + "learning_rate": 2.7015916811725863e-05, + "loss": 76.7317, + "step": 111830 + }, + { + "epoch": 0.4518477518715886, + "grad_norm": 650.3406982421875, + "learning_rate": 2.7013301676139483e-05, + "loss": 72.8726, + "step": 111840 + }, + { + "epoch": 0.45188815313695624, + "grad_norm": 681.595947265625, + "learning_rate": 2.701068640382518e-05, + "loss": 74.344, + "step": 111850 + }, + { + "epoch": 0.4519285544023239, + "grad_norm": 518.1609497070312, + "learning_rate": 2.700807099483394e-05, + "loss": 151.6998, + "step": 111860 + }, + { + "epoch": 0.4519689556676915, + "grad_norm": 592.99169921875, + "learning_rate": 2.7005455449216745e-05, + "loss": 77.2571, + "step": 111870 + }, + { + "epoch": 0.45200935693305916, + "grad_norm": 421.22845458984375, + "learning_rate": 2.700283976702459e-05, + "loss": 70.4197, + "step": 111880 + }, + { + "epoch": 0.4520497581984268, + "grad_norm": 567.314208984375, + "learning_rate": 2.700022394830847e-05, + "loss": 77.4611, + "step": 111890 + }, + { + "epoch": 0.4520901594637944, + "grad_norm": 554.619384765625, + "learning_rate": 2.699760799311938e-05, + "loss": 95.7468, + "step": 111900 + }, + { + "epoch": 0.452130560729162, + "grad_norm": 761.4104614257812, + "learning_rate": 2.699499190150832e-05, + "loss": 94.3435, + "step": 111910 + }, + { + "epoch": 0.45217096199452966, + "grad_norm": 929.3544921875, + "learning_rate": 2.699237567352629e-05, + "loss": 117.6628, + "step": 111920 + }, + { + "epoch": 0.4522113632598973, + "grad_norm": 1529.741455078125, + "learning_rate": 2.698975930922429e-05, + "loss": 79.1271, + "step": 111930 + }, + { + "epoch": 0.45225176452526494, + "grad_norm": 1833.4913330078125, + "learning_rate": 2.6987142808653342e-05, + "loss": 106.9303, + "step": 111940 + }, + { + "epoch": 0.4522921657906326, + "grad_norm": 330.6051025390625, + "learning_rate": 2.698452617186445e-05, + "loss": 79.6267, + "step": 111950 + }, + { + "epoch": 0.45233256705600017, + "grad_norm": 909.468994140625, + "learning_rate": 2.6981909398908616e-05, + "loss": 69.9654, + "step": 111960 + }, + { + "epoch": 0.4523729683213678, + "grad_norm": 621.5585327148438, + "learning_rate": 2.697929248983687e-05, + "loss": 60.3358, + "step": 111970 + }, + { + "epoch": 0.45241336958673545, + "grad_norm": 390.74908447265625, + "learning_rate": 2.6976675444700223e-05, + "loss": 51.8026, + "step": 111980 + }, + { + "epoch": 0.4524537708521031, + "grad_norm": 587.7196655273438, + "learning_rate": 2.6974058263549695e-05, + "loss": 58.3623, + "step": 111990 + }, + { + "epoch": 0.4524941721174707, + "grad_norm": 651.2232055664062, + "learning_rate": 2.6971440946436306e-05, + "loss": 97.0935, + "step": 112000 + }, + { + "epoch": 0.45253457338283837, + "grad_norm": 617.6880493164062, + "learning_rate": 2.6968823493411093e-05, + "loss": 42.0092, + "step": 112010 + }, + { + "epoch": 0.452574974648206, + "grad_norm": 1021.8900146484375, + "learning_rate": 2.6966205904525075e-05, + "loss": 107.5359, + "step": 112020 + }, + { + "epoch": 0.4526153759135736, + "grad_norm": 1098.7264404296875, + "learning_rate": 2.6963588179829287e-05, + "loss": 95.865, + "step": 112030 + }, + { + "epoch": 0.45265577717894123, + "grad_norm": 3340.5634765625, + "learning_rate": 2.6960970319374765e-05, + "loss": 133.4831, + "step": 112040 + }, + { + "epoch": 0.45269617844430887, + "grad_norm": 347.992919921875, + "learning_rate": 2.6958352323212538e-05, + "loss": 69.175, + "step": 112050 + }, + { + "epoch": 0.4527365797096765, + "grad_norm": 485.7565612792969, + "learning_rate": 2.6955734191393657e-05, + "loss": 71.4841, + "step": 112060 + }, + { + "epoch": 0.45277698097504415, + "grad_norm": 711.3272705078125, + "learning_rate": 2.6953115923969155e-05, + "loss": 75.9042, + "step": 112070 + }, + { + "epoch": 0.4528173822404118, + "grad_norm": 981.9896850585938, + "learning_rate": 2.6950497520990075e-05, + "loss": 66.0978, + "step": 112080 + }, + { + "epoch": 0.4528577835057794, + "grad_norm": 1040.938720703125, + "learning_rate": 2.6947878982507468e-05, + "loss": 80.6278, + "step": 112090 + }, + { + "epoch": 0.452898184771147, + "grad_norm": 261.05108642578125, + "learning_rate": 2.694526030857238e-05, + "loss": 82.9424, + "step": 112100 + }, + { + "epoch": 0.45293858603651466, + "grad_norm": 1109.604736328125, + "learning_rate": 2.6942641499235877e-05, + "loss": 75.6132, + "step": 112110 + }, + { + "epoch": 0.4529789873018823, + "grad_norm": 628.9629516601562, + "learning_rate": 2.6940022554548994e-05, + "loss": 65.8465, + "step": 112120 + }, + { + "epoch": 0.45301938856724994, + "grad_norm": 3754.19140625, + "learning_rate": 2.69374034745628e-05, + "loss": 73.0601, + "step": 112130 + }, + { + "epoch": 0.4530597898326176, + "grad_norm": 589.1320190429688, + "learning_rate": 2.6934784259328357e-05, + "loss": 68.8124, + "step": 112140 + }, + { + "epoch": 0.4531001910979852, + "grad_norm": 599.7484130859375, + "learning_rate": 2.6932164908896728e-05, + "loss": 90.5521, + "step": 112150 + }, + { + "epoch": 0.4531405923633528, + "grad_norm": 1004.9889526367188, + "learning_rate": 2.692954542331897e-05, + "loss": 87.9781, + "step": 112160 + }, + { + "epoch": 0.45318099362872044, + "grad_norm": 870.6629638671875, + "learning_rate": 2.6926925802646154e-05, + "loss": 67.7651, + "step": 112170 + }, + { + "epoch": 0.4532213948940881, + "grad_norm": 372.28216552734375, + "learning_rate": 2.692430604692935e-05, + "loss": 80.7835, + "step": 112180 + }, + { + "epoch": 0.4532617961594557, + "grad_norm": 1295.7413330078125, + "learning_rate": 2.6921686156219644e-05, + "loss": 53.6498, + "step": 112190 + }, + { + "epoch": 0.45330219742482336, + "grad_norm": 666.787109375, + "learning_rate": 2.6919066130568097e-05, + "loss": 118.4207, + "step": 112200 + }, + { + "epoch": 0.453342598690191, + "grad_norm": 356.92840576171875, + "learning_rate": 2.69164459700258e-05, + "loss": 54.7656, + "step": 112210 + }, + { + "epoch": 0.4533829999555586, + "grad_norm": 965.26904296875, + "learning_rate": 2.6913825674643817e-05, + "loss": 74.3733, + "step": 112220 + }, + { + "epoch": 0.4534234012209262, + "grad_norm": 783.9791870117188, + "learning_rate": 2.6911205244473256e-05, + "loss": 92.6115, + "step": 112230 + }, + { + "epoch": 0.45346380248629387, + "grad_norm": 817.1396484375, + "learning_rate": 2.6908584679565187e-05, + "loss": 53.9894, + "step": 112240 + }, + { + "epoch": 0.4535042037516615, + "grad_norm": 404.9577941894531, + "learning_rate": 2.6905963979970694e-05, + "loss": 74.1203, + "step": 112250 + }, + { + "epoch": 0.45354460501702915, + "grad_norm": 398.9828796386719, + "learning_rate": 2.6903343145740887e-05, + "loss": 87.6008, + "step": 112260 + }, + { + "epoch": 0.4535850062823968, + "grad_norm": 598.596435546875, + "learning_rate": 2.6900722176926844e-05, + "loss": 82.758, + "step": 112270 + }, + { + "epoch": 0.45362540754776437, + "grad_norm": 871.5032958984375, + "learning_rate": 2.6898101073579677e-05, + "loss": 73.7763, + "step": 112280 + }, + { + "epoch": 0.453665808813132, + "grad_norm": 759.2030029296875, + "learning_rate": 2.6895479835750475e-05, + "loss": 75.5295, + "step": 112290 + }, + { + "epoch": 0.45370621007849965, + "grad_norm": 338.97503662109375, + "learning_rate": 2.689285846349034e-05, + "loss": 61.5387, + "step": 112300 + }, + { + "epoch": 0.4537466113438673, + "grad_norm": 591.1249389648438, + "learning_rate": 2.689023695685039e-05, + "loss": 82.8471, + "step": 112310 + }, + { + "epoch": 0.45378701260923493, + "grad_norm": 1498.0521240234375, + "learning_rate": 2.6887615315881718e-05, + "loss": 82.0202, + "step": 112320 + }, + { + "epoch": 0.45382741387460257, + "grad_norm": 296.4162292480469, + "learning_rate": 2.6884993540635436e-05, + "loss": 62.2502, + "step": 112330 + }, + { + "epoch": 0.4538678151399702, + "grad_norm": 751.862060546875, + "learning_rate": 2.6882371631162666e-05, + "loss": 75.1184, + "step": 112340 + }, + { + "epoch": 0.4539082164053378, + "grad_norm": 1389.797119140625, + "learning_rate": 2.687974958751451e-05, + "loss": 74.3811, + "step": 112350 + }, + { + "epoch": 0.45394861767070543, + "grad_norm": 319.79278564453125, + "learning_rate": 2.6877127409742106e-05, + "loss": 49.5658, + "step": 112360 + }, + { + "epoch": 0.4539890189360731, + "grad_norm": 1041.8232421875, + "learning_rate": 2.6874505097896553e-05, + "loss": 54.5847, + "step": 112370 + }, + { + "epoch": 0.4540294202014407, + "grad_norm": 498.2091064453125, + "learning_rate": 2.687188265202898e-05, + "loss": 79.0955, + "step": 112380 + }, + { + "epoch": 0.45406982146680835, + "grad_norm": 364.1988525390625, + "learning_rate": 2.6869260072190525e-05, + "loss": 81.7981, + "step": 112390 + }, + { + "epoch": 0.454110222732176, + "grad_norm": 1017.3944702148438, + "learning_rate": 2.6866637358432308e-05, + "loss": 89.5289, + "step": 112400 + }, + { + "epoch": 0.4541506239975436, + "grad_norm": 326.36767578125, + "learning_rate": 2.686401451080546e-05, + "loss": 121.068, + "step": 112410 + }, + { + "epoch": 0.4541910252629112, + "grad_norm": 546.8922729492188, + "learning_rate": 2.6861391529361113e-05, + "loss": 69.2025, + "step": 112420 + }, + { + "epoch": 0.45423142652827886, + "grad_norm": 877.6389770507812, + "learning_rate": 2.6858768414150404e-05, + "loss": 85.2959, + "step": 112430 + }, + { + "epoch": 0.4542718277936465, + "grad_norm": 558.3817749023438, + "learning_rate": 2.6856145165224485e-05, + "loss": 78.8211, + "step": 112440 + }, + { + "epoch": 0.45431222905901414, + "grad_norm": 952.12060546875, + "learning_rate": 2.6853521782634473e-05, + "loss": 52.3497, + "step": 112450 + }, + { + "epoch": 0.4543526303243818, + "grad_norm": 567.7320556640625, + "learning_rate": 2.685089826643153e-05, + "loss": 69.1512, + "step": 112460 + }, + { + "epoch": 0.45439303158974936, + "grad_norm": 628.2738037109375, + "learning_rate": 2.6848274616666797e-05, + "loss": 74.7546, + "step": 112470 + }, + { + "epoch": 0.454433432855117, + "grad_norm": 1422.66650390625, + "learning_rate": 2.6845650833391427e-05, + "loss": 104.1378, + "step": 112480 + }, + { + "epoch": 0.45447383412048464, + "grad_norm": 897.3776245117188, + "learning_rate": 2.684302691665657e-05, + "loss": 104.8609, + "step": 112490 + }, + { + "epoch": 0.4545142353858523, + "grad_norm": 324.0787658691406, + "learning_rate": 2.684040286651338e-05, + "loss": 52.8043, + "step": 112500 + }, + { + "epoch": 0.4545546366512199, + "grad_norm": 1034.5660400390625, + "learning_rate": 2.683777868301301e-05, + "loss": 73.0455, + "step": 112510 + }, + { + "epoch": 0.45459503791658756, + "grad_norm": 601.3583984375, + "learning_rate": 2.6835154366206632e-05, + "loss": 58.2055, + "step": 112520 + }, + { + "epoch": 0.4546354391819552, + "grad_norm": 593.420166015625, + "learning_rate": 2.6832529916145395e-05, + "loss": 92.5754, + "step": 112530 + }, + { + "epoch": 0.4546758404473228, + "grad_norm": 957.552001953125, + "learning_rate": 2.6829905332880464e-05, + "loss": 58.4162, + "step": 112540 + }, + { + "epoch": 0.4547162417126904, + "grad_norm": 567.3688354492188, + "learning_rate": 2.6827280616463022e-05, + "loss": 100.6487, + "step": 112550 + }, + { + "epoch": 0.45475664297805807, + "grad_norm": 743.3108520507812, + "learning_rate": 2.6824655766944228e-05, + "loss": 70.0816, + "step": 112560 + }, + { + "epoch": 0.4547970442434257, + "grad_norm": 357.12982177734375, + "learning_rate": 2.6822030784375254e-05, + "loss": 51.7822, + "step": 112570 + }, + { + "epoch": 0.45483744550879335, + "grad_norm": 524.3549194335938, + "learning_rate": 2.6819405668807284e-05, + "loss": 43.197, + "step": 112580 + }, + { + "epoch": 0.454877846774161, + "grad_norm": 1208.1800537109375, + "learning_rate": 2.6816780420291483e-05, + "loss": 73.3852, + "step": 112590 + }, + { + "epoch": 0.45491824803952857, + "grad_norm": 1156.74462890625, + "learning_rate": 2.681415503887904e-05, + "loss": 101.1923, + "step": 112600 + }, + { + "epoch": 0.4549586493048962, + "grad_norm": 960.2762451171875, + "learning_rate": 2.6811529524621133e-05, + "loss": 90.7265, + "step": 112610 + }, + { + "epoch": 0.45499905057026385, + "grad_norm": 793.8656005859375, + "learning_rate": 2.680890387756895e-05, + "loss": 78.3631, + "step": 112620 + }, + { + "epoch": 0.4550394518356315, + "grad_norm": 598.03271484375, + "learning_rate": 2.6806278097773685e-05, + "loss": 110.3126, + "step": 112630 + }, + { + "epoch": 0.45507985310099913, + "grad_norm": 731.1873779296875, + "learning_rate": 2.6803652185286524e-05, + "loss": 60.77, + "step": 112640 + }, + { + "epoch": 0.45512025436636677, + "grad_norm": 988.82275390625, + "learning_rate": 2.680102614015866e-05, + "loss": 110.9561, + "step": 112650 + }, + { + "epoch": 0.4551606556317344, + "grad_norm": 457.52435302734375, + "learning_rate": 2.6798399962441296e-05, + "loss": 54.9066, + "step": 112660 + }, + { + "epoch": 0.455201056897102, + "grad_norm": 807.8894653320312, + "learning_rate": 2.6795773652185616e-05, + "loss": 78.0789, + "step": 112670 + }, + { + "epoch": 0.45524145816246964, + "grad_norm": 737.5829467773438, + "learning_rate": 2.6793147209442833e-05, + "loss": 84.757, + "step": 112680 + }, + { + "epoch": 0.4552818594278373, + "grad_norm": 636.5844116210938, + "learning_rate": 2.679052063426415e-05, + "loss": 81.1594, + "step": 112690 + }, + { + "epoch": 0.4553222606932049, + "grad_norm": 890.6803588867188, + "learning_rate": 2.6787893926700762e-05, + "loss": 79.2259, + "step": 112700 + }, + { + "epoch": 0.45536266195857256, + "grad_norm": 1165.771728515625, + "learning_rate": 2.6785267086803898e-05, + "loss": 78.9433, + "step": 112710 + }, + { + "epoch": 0.4554030632239402, + "grad_norm": 848.1123657226562, + "learning_rate": 2.6782640114624757e-05, + "loss": 92.939, + "step": 112720 + }, + { + "epoch": 0.4554434644893078, + "grad_norm": 746.1470336914062, + "learning_rate": 2.6780013010214553e-05, + "loss": 92.2738, + "step": 112730 + }, + { + "epoch": 0.4554838657546754, + "grad_norm": 531.7431030273438, + "learning_rate": 2.6777385773624503e-05, + "loss": 53.2404, + "step": 112740 + }, + { + "epoch": 0.45552426702004306, + "grad_norm": 534.9210815429688, + "learning_rate": 2.6774758404905833e-05, + "loss": 68.7203, + "step": 112750 + }, + { + "epoch": 0.4555646682854107, + "grad_norm": 1020.3621826171875, + "learning_rate": 2.6772130904109754e-05, + "loss": 126.7624, + "step": 112760 + }, + { + "epoch": 0.45560506955077834, + "grad_norm": 294.8741149902344, + "learning_rate": 2.6769503271287502e-05, + "loss": 52.6577, + "step": 112770 + }, + { + "epoch": 0.455645470816146, + "grad_norm": 601.3861083984375, + "learning_rate": 2.6766875506490294e-05, + "loss": 73.956, + "step": 112780 + }, + { + "epoch": 0.45568587208151357, + "grad_norm": 1287.9842529296875, + "learning_rate": 2.6764247609769368e-05, + "loss": 128.2831, + "step": 112790 + }, + { + "epoch": 0.4557262733468812, + "grad_norm": 711.5743408203125, + "learning_rate": 2.676161958117595e-05, + "loss": 58.1624, + "step": 112800 + }, + { + "epoch": 0.45576667461224885, + "grad_norm": 984.993408203125, + "learning_rate": 2.6758991420761277e-05, + "loss": 84.2691, + "step": 112810 + }, + { + "epoch": 0.4558070758776165, + "grad_norm": 484.4772033691406, + "learning_rate": 2.6756363128576585e-05, + "loss": 56.9175, + "step": 112820 + }, + { + "epoch": 0.4558474771429841, + "grad_norm": 717.5982055664062, + "learning_rate": 2.675373470467312e-05, + "loss": 84.357, + "step": 112830 + }, + { + "epoch": 0.45588787840835177, + "grad_norm": 773.6087646484375, + "learning_rate": 2.675110614910212e-05, + "loss": 83.2446, + "step": 112840 + }, + { + "epoch": 0.4559282796737194, + "grad_norm": 2990.2216796875, + "learning_rate": 2.674847746191483e-05, + "loss": 131.7289, + "step": 112850 + }, + { + "epoch": 0.455968680939087, + "grad_norm": 772.2841796875, + "learning_rate": 2.6745848643162493e-05, + "loss": 71.5113, + "step": 112860 + }, + { + "epoch": 0.45600908220445463, + "grad_norm": 521.4644165039062, + "learning_rate": 2.6743219692896363e-05, + "loss": 55.004, + "step": 112870 + }, + { + "epoch": 0.45604948346982227, + "grad_norm": 860.3211669921875, + "learning_rate": 2.6740590611167694e-05, + "loss": 54.5938, + "step": 112880 + }, + { + "epoch": 0.4560898847351899, + "grad_norm": 714.7534790039062, + "learning_rate": 2.673796139802775e-05, + "loss": 88.9539, + "step": 112890 + }, + { + "epoch": 0.45613028600055755, + "grad_norm": 707.4707641601562, + "learning_rate": 2.6735332053527768e-05, + "loss": 58.4551, + "step": 112900 + }, + { + "epoch": 0.4561706872659252, + "grad_norm": 317.87322998046875, + "learning_rate": 2.6732702577719023e-05, + "loss": 87.4749, + "step": 112910 + }, + { + "epoch": 0.4562110885312928, + "grad_norm": 954.2454833984375, + "learning_rate": 2.673007297065278e-05, + "loss": 48.6804, + "step": 112920 + }, + { + "epoch": 0.4562514897966604, + "grad_norm": 744.9954223632812, + "learning_rate": 2.6727443232380296e-05, + "loss": 79.9932, + "step": 112930 + }, + { + "epoch": 0.45629189106202805, + "grad_norm": 567.6287841796875, + "learning_rate": 2.6724813362952846e-05, + "loss": 77.3547, + "step": 112940 + }, + { + "epoch": 0.4563322923273957, + "grad_norm": 746.0390625, + "learning_rate": 2.672218336242169e-05, + "loss": 115.2766, + "step": 112950 + }, + { + "epoch": 0.45637269359276333, + "grad_norm": 562.1192016601562, + "learning_rate": 2.671955323083811e-05, + "loss": 69.0507, + "step": 112960 + }, + { + "epoch": 0.456413094858131, + "grad_norm": 562.4815063476562, + "learning_rate": 2.671692296825339e-05, + "loss": 82.2035, + "step": 112970 + }, + { + "epoch": 0.4564534961234986, + "grad_norm": 1962.4112548828125, + "learning_rate": 2.6714292574718784e-05, + "loss": 126.1687, + "step": 112980 + }, + { + "epoch": 0.4564938973888662, + "grad_norm": 421.26824951171875, + "learning_rate": 2.6711662050285595e-05, + "loss": 93.4661, + "step": 112990 + }, + { + "epoch": 0.45653429865423384, + "grad_norm": 1324.8153076171875, + "learning_rate": 2.6709031395005103e-05, + "loss": 96.0904, + "step": 113000 + }, + { + "epoch": 0.4565746999196015, + "grad_norm": 1263.3470458984375, + "learning_rate": 2.6706400608928586e-05, + "loss": 115.7524, + "step": 113010 + }, + { + "epoch": 0.4566151011849691, + "grad_norm": 988.87548828125, + "learning_rate": 2.6703769692107337e-05, + "loss": 73.4984, + "step": 113020 + }, + { + "epoch": 0.45665550245033676, + "grad_norm": 780.6795043945312, + "learning_rate": 2.6701138644592647e-05, + "loss": 76.2612, + "step": 113030 + }, + { + "epoch": 0.4566959037157044, + "grad_norm": 566.6692504882812, + "learning_rate": 2.6698507466435804e-05, + "loss": 42.9877, + "step": 113040 + }, + { + "epoch": 0.456736304981072, + "grad_norm": 1157.4658203125, + "learning_rate": 2.6695876157688116e-05, + "loss": 62.1694, + "step": 113050 + }, + { + "epoch": 0.4567767062464396, + "grad_norm": 835.0228881835938, + "learning_rate": 2.669324471840087e-05, + "loss": 71.8841, + "step": 113060 + }, + { + "epoch": 0.45681710751180726, + "grad_norm": 984.0977783203125, + "learning_rate": 2.669061314862538e-05, + "loss": 72.2181, + "step": 113070 + }, + { + "epoch": 0.4568575087771749, + "grad_norm": 481.43133544921875, + "learning_rate": 2.668798144841293e-05, + "loss": 70.6602, + "step": 113080 + }, + { + "epoch": 0.45689791004254254, + "grad_norm": 943.81103515625, + "learning_rate": 2.668534961781485e-05, + "loss": 131.5536, + "step": 113090 + }, + { + "epoch": 0.4569383113079102, + "grad_norm": 721.571533203125, + "learning_rate": 2.6682717656882434e-05, + "loss": 74.491, + "step": 113100 + }, + { + "epoch": 0.45697871257327777, + "grad_norm": 626.635009765625, + "learning_rate": 2.6680085565666994e-05, + "loss": 98.327, + "step": 113110 + }, + { + "epoch": 0.4570191138386454, + "grad_norm": 895.9033813476562, + "learning_rate": 2.6677453344219846e-05, + "loss": 128.4076, + "step": 113120 + }, + { + "epoch": 0.45705951510401305, + "grad_norm": 782.482666015625, + "learning_rate": 2.6674820992592316e-05, + "loss": 66.9401, + "step": 113130 + }, + { + "epoch": 0.4570999163693807, + "grad_norm": 789.6358642578125, + "learning_rate": 2.6672188510835707e-05, + "loss": 95.1378, + "step": 113140 + }, + { + "epoch": 0.4571403176347483, + "grad_norm": 1218.0447998046875, + "learning_rate": 2.6669555899001346e-05, + "loss": 74.5477, + "step": 113150 + }, + { + "epoch": 0.45718071890011597, + "grad_norm": 505.59228515625, + "learning_rate": 2.666692315714056e-05, + "loss": 74.3671, + "step": 113160 + }, + { + "epoch": 0.4572211201654836, + "grad_norm": 742.8641357421875, + "learning_rate": 2.666429028530468e-05, + "loss": 82.7957, + "step": 113170 + }, + { + "epoch": 0.4572615214308512, + "grad_norm": 450.63702392578125, + "learning_rate": 2.6661657283545023e-05, + "loss": 59.0941, + "step": 113180 + }, + { + "epoch": 0.45730192269621883, + "grad_norm": 834.7881469726562, + "learning_rate": 2.6659024151912932e-05, + "loss": 66.6311, + "step": 113190 + }, + { + "epoch": 0.45734232396158647, + "grad_norm": 308.73095703125, + "learning_rate": 2.6656390890459737e-05, + "loss": 73.7243, + "step": 113200 + }, + { + "epoch": 0.4573827252269541, + "grad_norm": 547.4154052734375, + "learning_rate": 2.6653757499236775e-05, + "loss": 116.7067, + "step": 113210 + }, + { + "epoch": 0.45742312649232175, + "grad_norm": 672.879638671875, + "learning_rate": 2.6651123978295382e-05, + "loss": 73.7367, + "step": 113220 + }, + { + "epoch": 0.4574635277576894, + "grad_norm": 898.431640625, + "learning_rate": 2.6648490327686903e-05, + "loss": 109.3304, + "step": 113230 + }, + { + "epoch": 0.457503929023057, + "grad_norm": 804.0678100585938, + "learning_rate": 2.6645856547462684e-05, + "loss": 89.3997, + "step": 113240 + }, + { + "epoch": 0.4575443302884246, + "grad_norm": 1517.988037109375, + "learning_rate": 2.6643222637674066e-05, + "loss": 78.1878, + "step": 113250 + }, + { + "epoch": 0.45758473155379226, + "grad_norm": 777.5194091796875, + "learning_rate": 2.6640588598372406e-05, + "loss": 65.7955, + "step": 113260 + }, + { + "epoch": 0.4576251328191599, + "grad_norm": 1076.205810546875, + "learning_rate": 2.663795442960906e-05, + "loss": 110.9622, + "step": 113270 + }, + { + "epoch": 0.45766553408452754, + "grad_norm": 507.85211181640625, + "learning_rate": 2.6635320131435363e-05, + "loss": 56.1999, + "step": 113280 + }, + { + "epoch": 0.4577059353498952, + "grad_norm": 1180.1060791015625, + "learning_rate": 2.663268570390269e-05, + "loss": 71.7824, + "step": 113290 + }, + { + "epoch": 0.4577463366152628, + "grad_norm": 552.9801635742188, + "learning_rate": 2.6630051147062396e-05, + "loss": 80.579, + "step": 113300 + }, + { + "epoch": 0.4577867378806304, + "grad_norm": 772.8911743164062, + "learning_rate": 2.6627416460965832e-05, + "loss": 77.0672, + "step": 113310 + }, + { + "epoch": 0.45782713914599804, + "grad_norm": 377.945556640625, + "learning_rate": 2.6624781645664377e-05, + "loss": 52.3128, + "step": 113320 + }, + { + "epoch": 0.4578675404113657, + "grad_norm": 1145.2020263671875, + "learning_rate": 2.6622146701209395e-05, + "loss": 88.6449, + "step": 113330 + }, + { + "epoch": 0.4579079416767333, + "grad_norm": 872.3106689453125, + "learning_rate": 2.6619511627652256e-05, + "loss": 59.4446, + "step": 113340 + }, + { + "epoch": 0.45794834294210096, + "grad_norm": 592.0942993164062, + "learning_rate": 2.6616876425044328e-05, + "loss": 92.5439, + "step": 113350 + }, + { + "epoch": 0.4579887442074686, + "grad_norm": 525.9798583984375, + "learning_rate": 2.6614241093436986e-05, + "loss": 60.6389, + "step": 113360 + }, + { + "epoch": 0.4580291454728362, + "grad_norm": 733.073486328125, + "learning_rate": 2.661160563288161e-05, + "loss": 58.8831, + "step": 113370 + }, + { + "epoch": 0.4580695467382038, + "grad_norm": 852.8565063476562, + "learning_rate": 2.660897004342958e-05, + "loss": 79.915, + "step": 113380 + }, + { + "epoch": 0.45810994800357147, + "grad_norm": 766.9078369140625, + "learning_rate": 2.660633432513227e-05, + "loss": 106.1164, + "step": 113390 + }, + { + "epoch": 0.4581503492689391, + "grad_norm": 364.3603210449219, + "learning_rate": 2.6603698478041074e-05, + "loss": 73.5211, + "step": 113400 + }, + { + "epoch": 0.45819075053430675, + "grad_norm": 248.873779296875, + "learning_rate": 2.6601062502207374e-05, + "loss": 90.7178, + "step": 113410 + }, + { + "epoch": 0.4582311517996744, + "grad_norm": 1080.5108642578125, + "learning_rate": 2.6598426397682572e-05, + "loss": 105.1127, + "step": 113420 + }, + { + "epoch": 0.45827155306504197, + "grad_norm": 376.14495849609375, + "learning_rate": 2.6595790164518044e-05, + "loss": 101.5988, + "step": 113430 + }, + { + "epoch": 0.4583119543304096, + "grad_norm": 1310.903076171875, + "learning_rate": 2.6593153802765192e-05, + "loss": 78.4677, + "step": 113440 + }, + { + "epoch": 0.45835235559577725, + "grad_norm": 1097.3486328125, + "learning_rate": 2.6590517312475406e-05, + "loss": 88.4737, + "step": 113450 + }, + { + "epoch": 0.4583927568611449, + "grad_norm": 1102.570068359375, + "learning_rate": 2.6587880693700104e-05, + "loss": 90.3104, + "step": 113460 + }, + { + "epoch": 0.45843315812651253, + "grad_norm": 1407.9200439453125, + "learning_rate": 2.658524394649067e-05, + "loss": 87.6553, + "step": 113470 + }, + { + "epoch": 0.45847355939188017, + "grad_norm": 999.8683471679688, + "learning_rate": 2.6582607070898512e-05, + "loss": 81.1545, + "step": 113480 + }, + { + "epoch": 0.4585139606572478, + "grad_norm": 513.2388305664062, + "learning_rate": 2.6579970066975042e-05, + "loss": 65.5917, + "step": 113490 + }, + { + "epoch": 0.4585543619226154, + "grad_norm": 99.1220703125, + "learning_rate": 2.6577332934771667e-05, + "loss": 67.2777, + "step": 113500 + }, + { + "epoch": 0.45859476318798303, + "grad_norm": 872.3470458984375, + "learning_rate": 2.6574695674339803e-05, + "loss": 68.5045, + "step": 113510 + }, + { + "epoch": 0.4586351644533507, + "grad_norm": 1404.48876953125, + "learning_rate": 2.657205828573086e-05, + "loss": 91.6158, + "step": 113520 + }, + { + "epoch": 0.4586755657187183, + "grad_norm": 1375.1158447265625, + "learning_rate": 2.656942076899626e-05, + "loss": 61.0799, + "step": 113530 + }, + { + "epoch": 0.45871596698408595, + "grad_norm": 604.9822387695312, + "learning_rate": 2.6566783124187422e-05, + "loss": 89.283, + "step": 113540 + }, + { + "epoch": 0.4587563682494536, + "grad_norm": 1082.412841796875, + "learning_rate": 2.656414535135576e-05, + "loss": 68.1157, + "step": 113550 + }, + { + "epoch": 0.4587967695148212, + "grad_norm": 943.1004638671875, + "learning_rate": 2.6561507450552704e-05, + "loss": 50.3113, + "step": 113560 + }, + { + "epoch": 0.4588371707801888, + "grad_norm": 560.8616943359375, + "learning_rate": 2.655886942182968e-05, + "loss": 95.1979, + "step": 113570 + }, + { + "epoch": 0.45887757204555646, + "grad_norm": 543.01708984375, + "learning_rate": 2.6556231265238128e-05, + "loss": 68.6648, + "step": 113580 + }, + { + "epoch": 0.4589179733109241, + "grad_norm": 626.4618530273438, + "learning_rate": 2.655359298082947e-05, + "loss": 43.5112, + "step": 113590 + }, + { + "epoch": 0.45895837457629174, + "grad_norm": 952.7501831054688, + "learning_rate": 2.655095456865514e-05, + "loss": 99.0865, + "step": 113600 + }, + { + "epoch": 0.4589987758416594, + "grad_norm": 913.9346313476562, + "learning_rate": 2.6548316028766582e-05, + "loss": 76.3627, + "step": 113610 + }, + { + "epoch": 0.459039177107027, + "grad_norm": 924.994140625, + "learning_rate": 2.654567736121523e-05, + "loss": 89.2226, + "step": 113620 + }, + { + "epoch": 0.4590795783723946, + "grad_norm": 806.6512451171875, + "learning_rate": 2.654303856605253e-05, + "loss": 83.3842, + "step": 113630 + }, + { + "epoch": 0.45911997963776224, + "grad_norm": 1085.2701416015625, + "learning_rate": 2.654039964332992e-05, + "loss": 75.7076, + "step": 113640 + }, + { + "epoch": 0.4591603809031299, + "grad_norm": 637.1243896484375, + "learning_rate": 2.653776059309885e-05, + "loss": 179.8734, + "step": 113650 + }, + { + "epoch": 0.4592007821684975, + "grad_norm": 431.5501708984375, + "learning_rate": 2.653512141541078e-05, + "loss": 60.1172, + "step": 113660 + }, + { + "epoch": 0.45924118343386516, + "grad_norm": 1128.5308837890625, + "learning_rate": 2.6532482110317145e-05, + "loss": 124.6562, + "step": 113670 + }, + { + "epoch": 0.4592815846992328, + "grad_norm": 678.9619750976562, + "learning_rate": 2.6529842677869412e-05, + "loss": 100.2162, + "step": 113680 + }, + { + "epoch": 0.4593219859646004, + "grad_norm": 872.8757934570312, + "learning_rate": 2.6527203118119036e-05, + "loss": 61.9864, + "step": 113690 + }, + { + "epoch": 0.459362387229968, + "grad_norm": 703.1404418945312, + "learning_rate": 2.6524563431117472e-05, + "loss": 49.4995, + "step": 113700 + }, + { + "epoch": 0.45940278849533567, + "grad_norm": 1589.8712158203125, + "learning_rate": 2.6521923616916187e-05, + "loss": 73.0462, + "step": 113710 + }, + { + "epoch": 0.4594431897607033, + "grad_norm": 1035.56787109375, + "learning_rate": 2.6519283675566644e-05, + "loss": 74.8495, + "step": 113720 + }, + { + "epoch": 0.45948359102607095, + "grad_norm": 930.1005249023438, + "learning_rate": 2.6516643607120305e-05, + "loss": 91.2964, + "step": 113730 + }, + { + "epoch": 0.4595239922914386, + "grad_norm": 599.20751953125, + "learning_rate": 2.6514003411628646e-05, + "loss": 71.0429, + "step": 113740 + }, + { + "epoch": 0.45956439355680617, + "grad_norm": 1437.7662353515625, + "learning_rate": 2.6511363089143137e-05, + "loss": 89.5026, + "step": 113750 + }, + { + "epoch": 0.4596047948221738, + "grad_norm": 1174.54736328125, + "learning_rate": 2.650872263971525e-05, + "loss": 78.9539, + "step": 113760 + }, + { + "epoch": 0.45964519608754145, + "grad_norm": 588.1651000976562, + "learning_rate": 2.6506082063396467e-05, + "loss": 103.5036, + "step": 113770 + }, + { + "epoch": 0.4596855973529091, + "grad_norm": 254.2494354248047, + "learning_rate": 2.6503441360238267e-05, + "loss": 76.0184, + "step": 113780 + }, + { + "epoch": 0.45972599861827673, + "grad_norm": 525.8836059570312, + "learning_rate": 2.6500800530292128e-05, + "loss": 67.0347, + "step": 113790 + }, + { + "epoch": 0.45976639988364437, + "grad_norm": 359.4972229003906, + "learning_rate": 2.649815957360953e-05, + "loss": 66.4681, + "step": 113800 + }, + { + "epoch": 0.459806801149012, + "grad_norm": 684.4945068359375, + "learning_rate": 2.649551849024197e-05, + "loss": 59.3099, + "step": 113810 + }, + { + "epoch": 0.4598472024143796, + "grad_norm": 204.22955322265625, + "learning_rate": 2.6492877280240934e-05, + "loss": 85.5394, + "step": 113820 + }, + { + "epoch": 0.45988760367974724, + "grad_norm": 329.8168640136719, + "learning_rate": 2.649023594365791e-05, + "loss": 109.4566, + "step": 113830 + }, + { + "epoch": 0.4599280049451149, + "grad_norm": 545.3468017578125, + "learning_rate": 2.64875944805444e-05, + "loss": 77.7081, + "step": 113840 + }, + { + "epoch": 0.4599684062104825, + "grad_norm": 794.8700561523438, + "learning_rate": 2.648495289095189e-05, + "loss": 79.392, + "step": 113850 + }, + { + "epoch": 0.46000880747585016, + "grad_norm": 610.8760986328125, + "learning_rate": 2.6482311174931886e-05, + "loss": 52.8303, + "step": 113860 + }, + { + "epoch": 0.4600492087412178, + "grad_norm": 484.84906005859375, + "learning_rate": 2.647966933253589e-05, + "loss": 95.2181, + "step": 113870 + }, + { + "epoch": 0.4600896100065854, + "grad_norm": 659.332275390625, + "learning_rate": 2.6477027363815407e-05, + "loss": 86.0949, + "step": 113880 + }, + { + "epoch": 0.460130011271953, + "grad_norm": 1217.6085205078125, + "learning_rate": 2.647438526882194e-05, + "loss": 64.04, + "step": 113890 + }, + { + "epoch": 0.46017041253732066, + "grad_norm": 1585.7891845703125, + "learning_rate": 2.6471743047606992e-05, + "loss": 106.7962, + "step": 113900 + }, + { + "epoch": 0.4602108138026883, + "grad_norm": 840.039794921875, + "learning_rate": 2.6469100700222087e-05, + "loss": 146.4675, + "step": 113910 + }, + { + "epoch": 0.46025121506805594, + "grad_norm": 833.8072509765625, + "learning_rate": 2.6466458226718736e-05, + "loss": 123.4206, + "step": 113920 + }, + { + "epoch": 0.4602916163334236, + "grad_norm": 495.6810607910156, + "learning_rate": 2.6463815627148446e-05, + "loss": 62.0684, + "step": 113930 + }, + { + "epoch": 0.4603320175987912, + "grad_norm": 821.72998046875, + "learning_rate": 2.6461172901562743e-05, + "loss": 78.5718, + "step": 113940 + }, + { + "epoch": 0.4603724188641588, + "grad_norm": 471.9949645996094, + "learning_rate": 2.6458530050013155e-05, + "loss": 104.0745, + "step": 113950 + }, + { + "epoch": 0.46041282012952645, + "grad_norm": 1198.97900390625, + "learning_rate": 2.6455887072551197e-05, + "loss": 92.9783, + "step": 113960 + }, + { + "epoch": 0.4604532213948941, + "grad_norm": 598.3755493164062, + "learning_rate": 2.6453243969228387e-05, + "loss": 61.2534, + "step": 113970 + }, + { + "epoch": 0.4604936226602617, + "grad_norm": 724.9803466796875, + "learning_rate": 2.645060074009627e-05, + "loss": 77.952, + "step": 113980 + }, + { + "epoch": 0.46053402392562937, + "grad_norm": 1079.2523193359375, + "learning_rate": 2.6447957385206377e-05, + "loss": 82.2019, + "step": 113990 + }, + { + "epoch": 0.460574425190997, + "grad_norm": 698.1065673828125, + "learning_rate": 2.6445313904610227e-05, + "loss": 65.5211, + "step": 114000 + }, + { + "epoch": 0.4606148264563646, + "grad_norm": 797.7476806640625, + "learning_rate": 2.6442670298359364e-05, + "loss": 81.3968, + "step": 114010 + }, + { + "epoch": 0.46065522772173223, + "grad_norm": 428.3217468261719, + "learning_rate": 2.6440026566505327e-05, + "loss": 59.0565, + "step": 114020 + }, + { + "epoch": 0.46069562898709987, + "grad_norm": 442.43994140625, + "learning_rate": 2.6437382709099662e-05, + "loss": 99.5741, + "step": 114030 + }, + { + "epoch": 0.4607360302524675, + "grad_norm": 419.9986877441406, + "learning_rate": 2.64347387261939e-05, + "loss": 79.7548, + "step": 114040 + }, + { + "epoch": 0.46077643151783515, + "grad_norm": 963.0272827148438, + "learning_rate": 2.64320946178396e-05, + "loss": 153.4569, + "step": 114050 + }, + { + "epoch": 0.4608168327832028, + "grad_norm": 1966.126708984375, + "learning_rate": 2.6429450384088298e-05, + "loss": 100.4458, + "step": 114060 + }, + { + "epoch": 0.4608572340485704, + "grad_norm": 361.90875244140625, + "learning_rate": 2.6426806024991557e-05, + "loss": 57.0643, + "step": 114070 + }, + { + "epoch": 0.460897635313938, + "grad_norm": 724.8544921875, + "learning_rate": 2.642416154060092e-05, + "loss": 68.3037, + "step": 114080 + }, + { + "epoch": 0.46093803657930565, + "grad_norm": 737.5326538085938, + "learning_rate": 2.6421516930967944e-05, + "loss": 67.6592, + "step": 114090 + }, + { + "epoch": 0.4609784378446733, + "grad_norm": 779.3790893554688, + "learning_rate": 2.641887219614419e-05, + "loss": 41.3311, + "step": 114100 + }, + { + "epoch": 0.46101883911004093, + "grad_norm": 723.6954345703125, + "learning_rate": 2.6416227336181224e-05, + "loss": 82.2918, + "step": 114110 + }, + { + "epoch": 0.4610592403754086, + "grad_norm": 532.8678588867188, + "learning_rate": 2.6413582351130598e-05, + "loss": 75.683, + "step": 114120 + }, + { + "epoch": 0.4610996416407762, + "grad_norm": 1178.80908203125, + "learning_rate": 2.6410937241043882e-05, + "loss": 108.3002, + "step": 114130 + }, + { + "epoch": 0.4611400429061438, + "grad_norm": 635.8715209960938, + "learning_rate": 2.6408292005972643e-05, + "loss": 69.5675, + "step": 114140 + }, + { + "epoch": 0.46118044417151144, + "grad_norm": 604.1587524414062, + "learning_rate": 2.6405646645968457e-05, + "loss": 54.293, + "step": 114150 + }, + { + "epoch": 0.4612208454368791, + "grad_norm": 512.0824584960938, + "learning_rate": 2.640300116108289e-05, + "loss": 79.5501, + "step": 114160 + }, + { + "epoch": 0.4612612467022467, + "grad_norm": 398.6564636230469, + "learning_rate": 2.6400355551367517e-05, + "loss": 74.799, + "step": 114170 + }, + { + "epoch": 0.46130164796761436, + "grad_norm": 567.1099853515625, + "learning_rate": 2.6397709816873917e-05, + "loss": 96.645, + "step": 114180 + }, + { + "epoch": 0.461342049232982, + "grad_norm": 381.8641052246094, + "learning_rate": 2.6395063957653674e-05, + "loss": 106.2633, + "step": 114190 + }, + { + "epoch": 0.4613824504983496, + "grad_norm": 1483.6571044921875, + "learning_rate": 2.639241797375836e-05, + "loss": 99.7252, + "step": 114200 + }, + { + "epoch": 0.4614228517637172, + "grad_norm": 912.604736328125, + "learning_rate": 2.638977186523957e-05, + "loss": 60.5856, + "step": 114210 + }, + { + "epoch": 0.46146325302908486, + "grad_norm": 512.1865234375, + "learning_rate": 2.638712563214889e-05, + "loss": 89.3499, + "step": 114220 + }, + { + "epoch": 0.4615036542944525, + "grad_norm": 736.9364624023438, + "learning_rate": 2.638447927453791e-05, + "loss": 83.937, + "step": 114230 + }, + { + "epoch": 0.46154405555982014, + "grad_norm": 753.5262451171875, + "learning_rate": 2.6381832792458214e-05, + "loss": 73.6253, + "step": 114240 + }, + { + "epoch": 0.4615844568251878, + "grad_norm": 1595.89697265625, + "learning_rate": 2.6379186185961403e-05, + "loss": 98.1879, + "step": 114250 + }, + { + "epoch": 0.4616248580905554, + "grad_norm": 1767.184814453125, + "learning_rate": 2.6376539455099074e-05, + "loss": 71.1026, + "step": 114260 + }, + { + "epoch": 0.461665259355923, + "grad_norm": 1099.088623046875, + "learning_rate": 2.6373892599922828e-05, + "loss": 106.5376, + "step": 114270 + }, + { + "epoch": 0.46170566062129065, + "grad_norm": 325.9820861816406, + "learning_rate": 2.6371245620484257e-05, + "loss": 105.6366, + "step": 114280 + }, + { + "epoch": 0.4617460618866583, + "grad_norm": 1049.932373046875, + "learning_rate": 2.6368598516834976e-05, + "loss": 146.334, + "step": 114290 + }, + { + "epoch": 0.46178646315202593, + "grad_norm": 507.06060791015625, + "learning_rate": 2.6365951289026592e-05, + "loss": 59.1321, + "step": 114300 + }, + { + "epoch": 0.46182686441739357, + "grad_norm": 1159.0625, + "learning_rate": 2.6363303937110707e-05, + "loss": 102.0715, + "step": 114310 + }, + { + "epoch": 0.4618672656827612, + "grad_norm": 438.0673828125, + "learning_rate": 2.6360656461138936e-05, + "loss": 57.551, + "step": 114320 + }, + { + "epoch": 0.4619076669481288, + "grad_norm": 1596.067626953125, + "learning_rate": 2.6358008861162893e-05, + "loss": 88.764, + "step": 114330 + }, + { + "epoch": 0.46194806821349643, + "grad_norm": 695.786376953125, + "learning_rate": 2.6355361137234193e-05, + "loss": 97.2558, + "step": 114340 + }, + { + "epoch": 0.46198846947886407, + "grad_norm": 687.4926147460938, + "learning_rate": 2.635271328940445e-05, + "loss": 161.9082, + "step": 114350 + }, + { + "epoch": 0.4620288707442317, + "grad_norm": 1291.8145751953125, + "learning_rate": 2.6350065317725306e-05, + "loss": 84.1473, + "step": 114360 + }, + { + "epoch": 0.46206927200959935, + "grad_norm": 747.5413208007812, + "learning_rate": 2.6347417222248358e-05, + "loss": 56.5286, + "step": 114370 + }, + { + "epoch": 0.462109673274967, + "grad_norm": 900.836669921875, + "learning_rate": 2.6344769003025247e-05, + "loss": 92.8681, + "step": 114380 + }, + { + "epoch": 0.4621500745403346, + "grad_norm": 1331.9080810546875, + "learning_rate": 2.6342120660107597e-05, + "loss": 115.7233, + "step": 114390 + }, + { + "epoch": 0.4621904758057022, + "grad_norm": 593.8460083007812, + "learning_rate": 2.633947219354704e-05, + "loss": 54.5704, + "step": 114400 + }, + { + "epoch": 0.46223087707106986, + "grad_norm": 449.32049560546875, + "learning_rate": 2.6336823603395204e-05, + "loss": 93.7284, + "step": 114410 + }, + { + "epoch": 0.4622712783364375, + "grad_norm": 601.999267578125, + "learning_rate": 2.6334174889703735e-05, + "loss": 58.3792, + "step": 114420 + }, + { + "epoch": 0.46231167960180514, + "grad_norm": 692.7201538085938, + "learning_rate": 2.633152605252426e-05, + "loss": 86.9033, + "step": 114430 + }, + { + "epoch": 0.4623520808671728, + "grad_norm": 285.8746032714844, + "learning_rate": 2.6328877091908433e-05, + "loss": 110.9091, + "step": 114440 + }, + { + "epoch": 0.4623924821325404, + "grad_norm": 492.05267333984375, + "learning_rate": 2.632622800790788e-05, + "loss": 74.728, + "step": 114450 + }, + { + "epoch": 0.462432883397908, + "grad_norm": 434.1992492675781, + "learning_rate": 2.632357880057426e-05, + "loss": 80.968, + "step": 114460 + }, + { + "epoch": 0.46247328466327564, + "grad_norm": 737.6741943359375, + "learning_rate": 2.632092946995922e-05, + "loss": 68.8449, + "step": 114470 + }, + { + "epoch": 0.4625136859286433, + "grad_norm": 768.0128784179688, + "learning_rate": 2.6318280016114406e-05, + "loss": 83.459, + "step": 114480 + }, + { + "epoch": 0.4625540871940109, + "grad_norm": 829.5357666015625, + "learning_rate": 2.6315630439091463e-05, + "loss": 108.0736, + "step": 114490 + }, + { + "epoch": 0.46259448845937856, + "grad_norm": 443.0619201660156, + "learning_rate": 2.6312980738942052e-05, + "loss": 66.1986, + "step": 114500 + }, + { + "epoch": 0.4626348897247462, + "grad_norm": 923.6192016601562, + "learning_rate": 2.6310330915717832e-05, + "loss": 58.211, + "step": 114510 + }, + { + "epoch": 0.4626752909901138, + "grad_norm": 512.396240234375, + "learning_rate": 2.630768096947047e-05, + "loss": 103.6867, + "step": 114520 + }, + { + "epoch": 0.4627156922554814, + "grad_norm": 701.5645751953125, + "learning_rate": 2.6305030900251612e-05, + "loss": 70.5896, + "step": 114530 + }, + { + "epoch": 0.46275609352084907, + "grad_norm": 1003.6924438476562, + "learning_rate": 2.6302380708112933e-05, + "loss": 95.463, + "step": 114540 + }, + { + "epoch": 0.4627964947862167, + "grad_norm": 439.3457946777344, + "learning_rate": 2.6299730393106097e-05, + "loss": 73.8293, + "step": 114550 + }, + { + "epoch": 0.46283689605158435, + "grad_norm": 378.0598449707031, + "learning_rate": 2.6297079955282777e-05, + "loss": 76.9201, + "step": 114560 + }, + { + "epoch": 0.462877297316952, + "grad_norm": 1021.908447265625, + "learning_rate": 2.6294429394694642e-05, + "loss": 52.3696, + "step": 114570 + }, + { + "epoch": 0.4629176985823196, + "grad_norm": 904.9420166015625, + "learning_rate": 2.6291778711393358e-05, + "loss": 112.2631, + "step": 114580 + }, + { + "epoch": 0.4629580998476872, + "grad_norm": 604.9569091796875, + "learning_rate": 2.628912790543061e-05, + "loss": 63.0775, + "step": 114590 + }, + { + "epoch": 0.46299850111305485, + "grad_norm": 735.7324829101562, + "learning_rate": 2.6286476976858084e-05, + "loss": 75.2424, + "step": 114600 + }, + { + "epoch": 0.4630389023784225, + "grad_norm": 645.4496459960938, + "learning_rate": 2.6283825925727447e-05, + "loss": 62.8955, + "step": 114610 + }, + { + "epoch": 0.46307930364379013, + "grad_norm": 443.483642578125, + "learning_rate": 2.6281174752090387e-05, + "loss": 71.455, + "step": 114620 + }, + { + "epoch": 0.46311970490915777, + "grad_norm": 514.104248046875, + "learning_rate": 2.627852345599859e-05, + "loss": 61.6043, + "step": 114630 + }, + { + "epoch": 0.4631601061745254, + "grad_norm": 736.0419311523438, + "learning_rate": 2.6275872037503753e-05, + "loss": 65.1548, + "step": 114640 + }, + { + "epoch": 0.463200507439893, + "grad_norm": 757.0994873046875, + "learning_rate": 2.6273220496657558e-05, + "loss": 50.0551, + "step": 114650 + }, + { + "epoch": 0.46324090870526063, + "grad_norm": 508.2020568847656, + "learning_rate": 2.6270568833511697e-05, + "loss": 78.6093, + "step": 114660 + }, + { + "epoch": 0.4632813099706283, + "grad_norm": 717.403564453125, + "learning_rate": 2.6267917048117868e-05, + "loss": 87.9904, + "step": 114670 + }, + { + "epoch": 0.4633217112359959, + "grad_norm": 976.2297973632812, + "learning_rate": 2.6265265140527772e-05, + "loss": 96.0001, + "step": 114680 + }, + { + "epoch": 0.46336211250136355, + "grad_norm": 1009.6636962890625, + "learning_rate": 2.6262613110793106e-05, + "loss": 84.9819, + "step": 114690 + }, + { + "epoch": 0.4634025137667312, + "grad_norm": 858.8338623046875, + "learning_rate": 2.6259960958965566e-05, + "loss": 75.355, + "step": 114700 + }, + { + "epoch": 0.4634429150320988, + "grad_norm": 560.441650390625, + "learning_rate": 2.625730868509687e-05, + "loss": 80.7289, + "step": 114710 + }, + { + "epoch": 0.4634833162974664, + "grad_norm": 503.17156982421875, + "learning_rate": 2.625465628923872e-05, + "loss": 72.7366, + "step": 114720 + }, + { + "epoch": 0.46352371756283406, + "grad_norm": 966.0131225585938, + "learning_rate": 2.6252003771442826e-05, + "loss": 75.885, + "step": 114730 + }, + { + "epoch": 0.4635641188282017, + "grad_norm": 843.3148803710938, + "learning_rate": 2.6249351131760897e-05, + "loss": 82.6017, + "step": 114740 + }, + { + "epoch": 0.46360452009356934, + "grad_norm": 260.50238037109375, + "learning_rate": 2.6246698370244656e-05, + "loss": 67.5398, + "step": 114750 + }, + { + "epoch": 0.463644921358937, + "grad_norm": 614.8167724609375, + "learning_rate": 2.624404548694581e-05, + "loss": 40.5519, + "step": 114760 + }, + { + "epoch": 0.4636853226243046, + "grad_norm": 722.75244140625, + "learning_rate": 2.6241392481916082e-05, + "loss": 76.3754, + "step": 114770 + }, + { + "epoch": 0.4637257238896722, + "grad_norm": 855.9271240234375, + "learning_rate": 2.6238739355207193e-05, + "loss": 120.8888, + "step": 114780 + }, + { + "epoch": 0.46376612515503984, + "grad_norm": 1688.67431640625, + "learning_rate": 2.6236086106870868e-05, + "loss": 84.8504, + "step": 114790 + }, + { + "epoch": 0.4638065264204075, + "grad_norm": 736.0219116210938, + "learning_rate": 2.6233432736958837e-05, + "loss": 91.6505, + "step": 114800 + }, + { + "epoch": 0.4638469276857751, + "grad_norm": 951.322509765625, + "learning_rate": 2.623077924552283e-05, + "loss": 128.7462, + "step": 114810 + }, + { + "epoch": 0.46388732895114276, + "grad_norm": 367.22271728515625, + "learning_rate": 2.622812563261457e-05, + "loss": 77.9096, + "step": 114820 + }, + { + "epoch": 0.4639277302165104, + "grad_norm": 743.8671875, + "learning_rate": 2.622547189828579e-05, + "loss": 69.1021, + "step": 114830 + }, + { + "epoch": 0.463968131481878, + "grad_norm": 548.1196899414062, + "learning_rate": 2.6222818042588235e-05, + "loss": 61.0841, + "step": 114840 + }, + { + "epoch": 0.46400853274724563, + "grad_norm": 935.0789794921875, + "learning_rate": 2.622016406557364e-05, + "loss": 100.8085, + "step": 114850 + }, + { + "epoch": 0.46404893401261327, + "grad_norm": 633.4144287109375, + "learning_rate": 2.6217509967293735e-05, + "loss": 86.7997, + "step": 114860 + }, + { + "epoch": 0.4640893352779809, + "grad_norm": 1505.43310546875, + "learning_rate": 2.6214855747800282e-05, + "loss": 99.7435, + "step": 114870 + }, + { + "epoch": 0.46412973654334855, + "grad_norm": 334.6433410644531, + "learning_rate": 2.621220140714501e-05, + "loss": 77.6374, + "step": 114880 + }, + { + "epoch": 0.4641701378087162, + "grad_norm": 878.5792236328125, + "learning_rate": 2.620954694537968e-05, + "loss": 68.9163, + "step": 114890 + }, + { + "epoch": 0.46421053907408383, + "grad_norm": 879.5414428710938, + "learning_rate": 2.620689236255603e-05, + "loss": 79.9191, + "step": 114900 + }, + { + "epoch": 0.4642509403394514, + "grad_norm": 617.0711059570312, + "learning_rate": 2.620423765872582e-05, + "loss": 88.1346, + "step": 114910 + }, + { + "epoch": 0.46429134160481905, + "grad_norm": 477.0278625488281, + "learning_rate": 2.62015828339408e-05, + "loss": 75.9172, + "step": 114920 + }, + { + "epoch": 0.4643317428701867, + "grad_norm": 1257.2669677734375, + "learning_rate": 2.6198927888252733e-05, + "loss": 61.6756, + "step": 114930 + }, + { + "epoch": 0.46437214413555433, + "grad_norm": 675.1260986328125, + "learning_rate": 2.6196272821713376e-05, + "loss": 71.925, + "step": 114940 + }, + { + "epoch": 0.46441254540092197, + "grad_norm": 571.593017578125, + "learning_rate": 2.6193617634374486e-05, + "loss": 73.4739, + "step": 114950 + }, + { + "epoch": 0.4644529466662896, + "grad_norm": 721.459228515625, + "learning_rate": 2.6190962326287835e-05, + "loss": 94.1048, + "step": 114960 + }, + { + "epoch": 0.4644933479316572, + "grad_norm": 716.8490600585938, + "learning_rate": 2.6188306897505185e-05, + "loss": 55.0164, + "step": 114970 + }, + { + "epoch": 0.46453374919702484, + "grad_norm": 646.6680908203125, + "learning_rate": 2.6185651348078308e-05, + "loss": 88.9204, + "step": 114980 + }, + { + "epoch": 0.4645741504623925, + "grad_norm": 1103.2906494140625, + "learning_rate": 2.618299567805897e-05, + "loss": 86.2375, + "step": 114990 + }, + { + "epoch": 0.4646145517277601, + "grad_norm": 1224.04931640625, + "learning_rate": 2.618033988749895e-05, + "loss": 100.5058, + "step": 115000 + }, + { + "epoch": 0.46465495299312776, + "grad_norm": 555.1777954101562, + "learning_rate": 2.6177683976450022e-05, + "loss": 115.1698, + "step": 115010 + }, + { + "epoch": 0.4646953542584954, + "grad_norm": 1511.6473388671875, + "learning_rate": 2.617502794496397e-05, + "loss": 73.6122, + "step": 115020 + }, + { + "epoch": 0.464735755523863, + "grad_norm": 411.3492736816406, + "learning_rate": 2.617237179309256e-05, + "loss": 77.6193, + "step": 115030 + }, + { + "epoch": 0.4647761567892306, + "grad_norm": 629.3460693359375, + "learning_rate": 2.6169715520887585e-05, + "loss": 91.1864, + "step": 115040 + }, + { + "epoch": 0.46481655805459826, + "grad_norm": 400.366943359375, + "learning_rate": 2.6167059128400833e-05, + "loss": 59.3934, + "step": 115050 + }, + { + "epoch": 0.4648569593199659, + "grad_norm": 770.9197998046875, + "learning_rate": 2.6164402615684087e-05, + "loss": 88.6018, + "step": 115060 + }, + { + "epoch": 0.46489736058533354, + "grad_norm": 1571.6004638671875, + "learning_rate": 2.6161745982789137e-05, + "loss": 137.9897, + "step": 115070 + }, + { + "epoch": 0.4649377618507012, + "grad_norm": 717.1170654296875, + "learning_rate": 2.6159089229767782e-05, + "loss": 88.0344, + "step": 115080 + }, + { + "epoch": 0.4649781631160688, + "grad_norm": 1129.611328125, + "learning_rate": 2.615643235667181e-05, + "loss": 74.9976, + "step": 115090 + }, + { + "epoch": 0.4650185643814364, + "grad_norm": 393.9241943359375, + "learning_rate": 2.6153775363553018e-05, + "loss": 65.7581, + "step": 115100 + }, + { + "epoch": 0.46505896564680405, + "grad_norm": 1086.6634521484375, + "learning_rate": 2.6151118250463204e-05, + "loss": 73.9207, + "step": 115110 + }, + { + "epoch": 0.4650993669121717, + "grad_norm": 481.0355224609375, + "learning_rate": 2.6148461017454176e-05, + "loss": 70.0164, + "step": 115120 + }, + { + "epoch": 0.4651397681775393, + "grad_norm": 862.0154418945312, + "learning_rate": 2.614580366457774e-05, + "loss": 90.931, + "step": 115130 + }, + { + "epoch": 0.46518016944290697, + "grad_norm": 1850.7154541015625, + "learning_rate": 2.6143146191885685e-05, + "loss": 94.8435, + "step": 115140 + }, + { + "epoch": 0.4652205707082746, + "grad_norm": 728.4447631835938, + "learning_rate": 2.614048859942984e-05, + "loss": 72.2169, + "step": 115150 + }, + { + "epoch": 0.4652609719736422, + "grad_norm": 736.9239501953125, + "learning_rate": 2.613783088726201e-05, + "loss": 61.1701, + "step": 115160 + }, + { + "epoch": 0.46530137323900983, + "grad_norm": 987.6494140625, + "learning_rate": 2.6135173055434003e-05, + "loss": 67.0238, + "step": 115170 + }, + { + "epoch": 0.46534177450437747, + "grad_norm": 389.91656494140625, + "learning_rate": 2.6132515103997643e-05, + "loss": 96.1323, + "step": 115180 + }, + { + "epoch": 0.4653821757697451, + "grad_norm": 827.3607788085938, + "learning_rate": 2.612985703300474e-05, + "loss": 60.8911, + "step": 115190 + }, + { + "epoch": 0.46542257703511275, + "grad_norm": 879.573486328125, + "learning_rate": 2.612719884250712e-05, + "loss": 56.7561, + "step": 115200 + }, + { + "epoch": 0.4654629783004804, + "grad_norm": 416.34228515625, + "learning_rate": 2.6124540532556607e-05, + "loss": 69.4381, + "step": 115210 + }, + { + "epoch": 0.46550337956584803, + "grad_norm": 866.3668823242188, + "learning_rate": 2.6121882103205017e-05, + "loss": 91.6065, + "step": 115220 + }, + { + "epoch": 0.4655437808312156, + "grad_norm": 517.7424926757812, + "learning_rate": 2.6119223554504185e-05, + "loss": 69.9989, + "step": 115230 + }, + { + "epoch": 0.46558418209658325, + "grad_norm": 727.4532470703125, + "learning_rate": 2.611656488650594e-05, + "loss": 71.9086, + "step": 115240 + }, + { + "epoch": 0.4656245833619509, + "grad_norm": 684.062744140625, + "learning_rate": 2.6113906099262118e-05, + "loss": 69.8937, + "step": 115250 + }, + { + "epoch": 0.46566498462731853, + "grad_norm": 707.6514892578125, + "learning_rate": 2.6111247192824543e-05, + "loss": 54.0938, + "step": 115260 + }, + { + "epoch": 0.4657053858926862, + "grad_norm": 775.35546875, + "learning_rate": 2.6108588167245063e-05, + "loss": 73.9428, + "step": 115270 + }, + { + "epoch": 0.4657457871580538, + "grad_norm": 946.7510375976562, + "learning_rate": 2.6105929022575508e-05, + "loss": 74.9219, + "step": 115280 + }, + { + "epoch": 0.4657861884234214, + "grad_norm": 721.5150146484375, + "learning_rate": 2.6103269758867727e-05, + "loss": 56.3273, + "step": 115290 + }, + { + "epoch": 0.46582658968878904, + "grad_norm": 443.8489074707031, + "learning_rate": 2.6100610376173555e-05, + "loss": 65.4958, + "step": 115300 + }, + { + "epoch": 0.4658669909541567, + "grad_norm": 900.3790893554688, + "learning_rate": 2.6097950874544838e-05, + "loss": 77.0747, + "step": 115310 + }, + { + "epoch": 0.4659073922195243, + "grad_norm": 490.86456298828125, + "learning_rate": 2.6095291254033437e-05, + "loss": 120.3429, + "step": 115320 + }, + { + "epoch": 0.46594779348489196, + "grad_norm": 331.149169921875, + "learning_rate": 2.6092631514691195e-05, + "loss": 67.8142, + "step": 115330 + }, + { + "epoch": 0.4659881947502596, + "grad_norm": 1538.13427734375, + "learning_rate": 2.6089971656569962e-05, + "loss": 86.6018, + "step": 115340 + }, + { + "epoch": 0.4660285960156272, + "grad_norm": 1192.0313720703125, + "learning_rate": 2.6087311679721606e-05, + "loss": 99.6332, + "step": 115350 + }, + { + "epoch": 0.4660689972809948, + "grad_norm": 902.2559204101562, + "learning_rate": 2.6084651584197963e-05, + "loss": 73.3442, + "step": 115360 + }, + { + "epoch": 0.46610939854636246, + "grad_norm": 677.5616455078125, + "learning_rate": 2.608199137005091e-05, + "loss": 49.7257, + "step": 115370 + }, + { + "epoch": 0.4661497998117301, + "grad_norm": 683.1897583007812, + "learning_rate": 2.6079331037332304e-05, + "loss": 104.7382, + "step": 115380 + }, + { + "epoch": 0.46619020107709774, + "grad_norm": 590.16162109375, + "learning_rate": 2.6076670586094004e-05, + "loss": 82.7925, + "step": 115390 + }, + { + "epoch": 0.4662306023424654, + "grad_norm": 571.5917358398438, + "learning_rate": 2.6074010016387887e-05, + "loss": 62.7534, + "step": 115400 + }, + { + "epoch": 0.466271003607833, + "grad_norm": 636.1116943359375, + "learning_rate": 2.607134932826582e-05, + "loss": 87.5836, + "step": 115410 + }, + { + "epoch": 0.4663114048732006, + "grad_norm": 624.4752807617188, + "learning_rate": 2.6068688521779672e-05, + "loss": 68.3954, + "step": 115420 + }, + { + "epoch": 0.46635180613856825, + "grad_norm": 689.4891967773438, + "learning_rate": 2.6066027596981314e-05, + "loss": 67.9524, + "step": 115430 + }, + { + "epoch": 0.4663922074039359, + "grad_norm": 680.38525390625, + "learning_rate": 2.6063366553922622e-05, + "loss": 80.0022, + "step": 115440 + }, + { + "epoch": 0.46643260866930353, + "grad_norm": 630.1315307617188, + "learning_rate": 2.6060705392655482e-05, + "loss": 64.5112, + "step": 115450 + }, + { + "epoch": 0.46647300993467117, + "grad_norm": 534.8250732421875, + "learning_rate": 2.605804411323177e-05, + "loss": 81.2812, + "step": 115460 + }, + { + "epoch": 0.4665134112000388, + "grad_norm": 472.06170654296875, + "learning_rate": 2.6055382715703367e-05, + "loss": 76.1223, + "step": 115470 + }, + { + "epoch": 0.4665538124654064, + "grad_norm": 926.7582397460938, + "learning_rate": 2.6052721200122162e-05, + "loss": 141.3341, + "step": 115480 + }, + { + "epoch": 0.46659421373077403, + "grad_norm": 775.48974609375, + "learning_rate": 2.6050059566540034e-05, + "loss": 66.2329, + "step": 115490 + }, + { + "epoch": 0.4666346149961417, + "grad_norm": 901.13916015625, + "learning_rate": 2.6047397815008894e-05, + "loss": 80.7233, + "step": 115500 + }, + { + "epoch": 0.4666750162615093, + "grad_norm": 676.7378540039062, + "learning_rate": 2.6044735945580613e-05, + "loss": 98.4829, + "step": 115510 + }, + { + "epoch": 0.46671541752687695, + "grad_norm": 1384.15576171875, + "learning_rate": 2.6042073958307095e-05, + "loss": 81.2791, + "step": 115520 + }, + { + "epoch": 0.4667558187922446, + "grad_norm": 1012.1597290039062, + "learning_rate": 2.603941185324023e-05, + "loss": 67.5291, + "step": 115530 + }, + { + "epoch": 0.4667962200576122, + "grad_norm": 623.9782104492188, + "learning_rate": 2.6036749630431923e-05, + "loss": 98.6119, + "step": 115540 + }, + { + "epoch": 0.4668366213229798, + "grad_norm": 1340.05126953125, + "learning_rate": 2.6034087289934072e-05, + "loss": 104.4568, + "step": 115550 + }, + { + "epoch": 0.46687702258834746, + "grad_norm": 338.4691162109375, + "learning_rate": 2.6031424831798587e-05, + "loss": 100.5001, + "step": 115560 + }, + { + "epoch": 0.4669174238537151, + "grad_norm": 700.010986328125, + "learning_rate": 2.6028762256077365e-05, + "loss": 50.7099, + "step": 115570 + }, + { + "epoch": 0.46695782511908274, + "grad_norm": 2429.616943359375, + "learning_rate": 2.6026099562822323e-05, + "loss": 98.6113, + "step": 115580 + }, + { + "epoch": 0.4669982263844504, + "grad_norm": 458.4536437988281, + "learning_rate": 2.6023436752085366e-05, + "loss": 80.2103, + "step": 115590 + }, + { + "epoch": 0.467038627649818, + "grad_norm": 784.9609985351562, + "learning_rate": 2.6020773823918414e-05, + "loss": 86.2658, + "step": 115600 + }, + { + "epoch": 0.4670790289151856, + "grad_norm": 594.8240356445312, + "learning_rate": 2.601811077837337e-05, + "loss": 89.2637, + "step": 115610 + }, + { + "epoch": 0.46711943018055324, + "grad_norm": 988.6428833007812, + "learning_rate": 2.601544761550216e-05, + "loss": 46.3688, + "step": 115620 + }, + { + "epoch": 0.4671598314459209, + "grad_norm": 1001.7023315429688, + "learning_rate": 2.60127843353567e-05, + "loss": 88.5945, + "step": 115630 + }, + { + "epoch": 0.4672002327112885, + "grad_norm": 573.270751953125, + "learning_rate": 2.6010120937988915e-05, + "loss": 65.2594, + "step": 115640 + }, + { + "epoch": 0.46724063397665616, + "grad_norm": 1551.462158203125, + "learning_rate": 2.6007457423450727e-05, + "loss": 79.3653, + "step": 115650 + }, + { + "epoch": 0.4672810352420238, + "grad_norm": 720.85498046875, + "learning_rate": 2.6004793791794066e-05, + "loss": 120.2793, + "step": 115660 + }, + { + "epoch": 0.4673214365073914, + "grad_norm": 497.3236999511719, + "learning_rate": 2.6002130043070858e-05, + "loss": 77.1124, + "step": 115670 + }, + { + "epoch": 0.467361837772759, + "grad_norm": 1054.2122802734375, + "learning_rate": 2.599946617733303e-05, + "loss": 112.7934, + "step": 115680 + }, + { + "epoch": 0.46740223903812667, + "grad_norm": 1149.6624755859375, + "learning_rate": 2.599680219463253e-05, + "loss": 73.5404, + "step": 115690 + }, + { + "epoch": 0.4674426403034943, + "grad_norm": 610.9464721679688, + "learning_rate": 2.599413809502128e-05, + "loss": 79.8058, + "step": 115700 + }, + { + "epoch": 0.46748304156886195, + "grad_norm": 621.6395874023438, + "learning_rate": 2.599147387855122e-05, + "loss": 95.1546, + "step": 115710 + }, + { + "epoch": 0.4675234428342296, + "grad_norm": 716.53125, + "learning_rate": 2.598880954527429e-05, + "loss": 98.3651, + "step": 115720 + }, + { + "epoch": 0.4675638440995972, + "grad_norm": 600.2053833007812, + "learning_rate": 2.5986145095242436e-05, + "loss": 67.7368, + "step": 115730 + }, + { + "epoch": 0.4676042453649648, + "grad_norm": 794.8717651367188, + "learning_rate": 2.5983480528507605e-05, + "loss": 77.5994, + "step": 115740 + }, + { + "epoch": 0.46764464663033245, + "grad_norm": 431.24102783203125, + "learning_rate": 2.5980815845121737e-05, + "loss": 67.029, + "step": 115750 + }, + { + "epoch": 0.4676850478957001, + "grad_norm": 1030.2110595703125, + "learning_rate": 2.5978151045136787e-05, + "loss": 60.4294, + "step": 115760 + }, + { + "epoch": 0.46772544916106773, + "grad_norm": 425.44854736328125, + "learning_rate": 2.5975486128604705e-05, + "loss": 73.0438, + "step": 115770 + }, + { + "epoch": 0.46776585042643537, + "grad_norm": 829.6524047851562, + "learning_rate": 2.5972821095577443e-05, + "loss": 122.3286, + "step": 115780 + }, + { + "epoch": 0.467806251691803, + "grad_norm": 725.9635009765625, + "learning_rate": 2.597015594610696e-05, + "loss": 83.8229, + "step": 115790 + }, + { + "epoch": 0.4678466529571706, + "grad_norm": 344.1451416015625, + "learning_rate": 2.596749068024521e-05, + "loss": 87.7201, + "step": 115800 + }, + { + "epoch": 0.46788705422253823, + "grad_norm": 643.0404052734375, + "learning_rate": 2.5964825298044156e-05, + "loss": 62.0316, + "step": 115810 + }, + { + "epoch": 0.4679274554879059, + "grad_norm": 630.2131958007812, + "learning_rate": 2.596215979955577e-05, + "loss": 105.3087, + "step": 115820 + }, + { + "epoch": 0.4679678567532735, + "grad_norm": 794.2405395507812, + "learning_rate": 2.5959494184832e-05, + "loss": 71.1687, + "step": 115830 + }, + { + "epoch": 0.46800825801864115, + "grad_norm": 2211.93310546875, + "learning_rate": 2.5956828453924826e-05, + "loss": 97.0355, + "step": 115840 + }, + { + "epoch": 0.4680486592840088, + "grad_norm": 344.4617614746094, + "learning_rate": 2.5954162606886214e-05, + "loss": 68.1076, + "step": 115850 + }, + { + "epoch": 0.4680890605493764, + "grad_norm": 419.6672668457031, + "learning_rate": 2.595149664376814e-05, + "loss": 98.7234, + "step": 115860 + }, + { + "epoch": 0.468129461814744, + "grad_norm": 414.0491638183594, + "learning_rate": 2.5948830564622574e-05, + "loss": 77.9826, + "step": 115870 + }, + { + "epoch": 0.46816986308011166, + "grad_norm": 859.5167846679688, + "learning_rate": 2.5946164369501495e-05, + "loss": 55.8543, + "step": 115880 + }, + { + "epoch": 0.4682102643454793, + "grad_norm": 1677.3466796875, + "learning_rate": 2.5943498058456875e-05, + "loss": 96.4282, + "step": 115890 + }, + { + "epoch": 0.46825066561084694, + "grad_norm": 1184.870849609375, + "learning_rate": 2.59408316315407e-05, + "loss": 68.8927, + "step": 115900 + }, + { + "epoch": 0.4682910668762146, + "grad_norm": 1714.949462890625, + "learning_rate": 2.5938165088804956e-05, + "loss": 78.2174, + "step": 115910 + }, + { + "epoch": 0.4683314681415822, + "grad_norm": 799.5128173828125, + "learning_rate": 2.5935498430301628e-05, + "loss": 67.6495, + "step": 115920 + }, + { + "epoch": 0.4683718694069498, + "grad_norm": 963.5465698242188, + "learning_rate": 2.59328316560827e-05, + "loss": 76.0293, + "step": 115930 + }, + { + "epoch": 0.46841227067231744, + "grad_norm": 698.3724975585938, + "learning_rate": 2.593016476620017e-05, + "loss": 101.9522, + "step": 115940 + }, + { + "epoch": 0.4684526719376851, + "grad_norm": 972.49853515625, + "learning_rate": 2.5927497760706023e-05, + "loss": 52.373, + "step": 115950 + }, + { + "epoch": 0.4684930732030527, + "grad_norm": 1098.3272705078125, + "learning_rate": 2.592483063965225e-05, + "loss": 107.8488, + "step": 115960 + }, + { + "epoch": 0.46853347446842036, + "grad_norm": 725.2745361328125, + "learning_rate": 2.5922163403090856e-05, + "loss": 102.3117, + "step": 115970 + }, + { + "epoch": 0.468573875733788, + "grad_norm": 575.4659423828125, + "learning_rate": 2.5919496051073838e-05, + "loss": 63.2006, + "step": 115980 + }, + { + "epoch": 0.4686142769991556, + "grad_norm": 542.0641479492188, + "learning_rate": 2.59168285836532e-05, + "loss": 58.7564, + "step": 115990 + }, + { + "epoch": 0.46865467826452323, + "grad_norm": 320.8777160644531, + "learning_rate": 2.5914161000880938e-05, + "loss": 105.1612, + "step": 116000 + }, + { + "epoch": 0.46869507952989087, + "grad_norm": 630.9784545898438, + "learning_rate": 2.591149330280906e-05, + "loss": 64.1075, + "step": 116010 + }, + { + "epoch": 0.4687354807952585, + "grad_norm": 643.2815551757812, + "learning_rate": 2.5908825489489583e-05, + "loss": 42.9188, + "step": 116020 + }, + { + "epoch": 0.46877588206062615, + "grad_norm": 826.1723022460938, + "learning_rate": 2.5906157560974507e-05, + "loss": 123.3357, + "step": 116030 + }, + { + "epoch": 0.4688162833259938, + "grad_norm": 300.50726318359375, + "learning_rate": 2.5903489517315855e-05, + "loss": 66.1133, + "step": 116040 + }, + { + "epoch": 0.46885668459136143, + "grad_norm": 494.48876953125, + "learning_rate": 2.590082135856563e-05, + "loss": 83.0862, + "step": 116050 + }, + { + "epoch": 0.468897085856729, + "grad_norm": 717.3709716796875, + "learning_rate": 2.5898153084775852e-05, + "loss": 75.1968, + "step": 116060 + }, + { + "epoch": 0.46893748712209665, + "grad_norm": 508.69207763671875, + "learning_rate": 2.589548469599855e-05, + "loss": 70.1793, + "step": 116070 + }, + { + "epoch": 0.4689778883874643, + "grad_norm": 208.58041381835938, + "learning_rate": 2.5892816192285733e-05, + "loss": 79.6503, + "step": 116080 + }, + { + "epoch": 0.46901828965283193, + "grad_norm": 609.8517456054688, + "learning_rate": 2.5890147573689435e-05, + "loss": 85.5485, + "step": 116090 + }, + { + "epoch": 0.4690586909181996, + "grad_norm": 480.93402099609375, + "learning_rate": 2.5887478840261673e-05, + "loss": 69.4897, + "step": 116100 + }, + { + "epoch": 0.4690990921835672, + "grad_norm": 1058.9134521484375, + "learning_rate": 2.5884809992054486e-05, + "loss": 111.8323, + "step": 116110 + }, + { + "epoch": 0.4691394934489348, + "grad_norm": 663.572021484375, + "learning_rate": 2.5882141029119895e-05, + "loss": 83.3952, + "step": 116120 + }, + { + "epoch": 0.46917989471430244, + "grad_norm": 910.2940063476562, + "learning_rate": 2.5879471951509937e-05, + "loss": 58.8663, + "step": 116130 + }, + { + "epoch": 0.4692202959796701, + "grad_norm": 478.2908020019531, + "learning_rate": 2.5876802759276642e-05, + "loss": 66.7963, + "step": 116140 + }, + { + "epoch": 0.4692606972450377, + "grad_norm": 396.9051818847656, + "learning_rate": 2.5874133452472058e-05, + "loss": 107.7653, + "step": 116150 + }, + { + "epoch": 0.46930109851040536, + "grad_norm": 287.2270202636719, + "learning_rate": 2.5871464031148213e-05, + "loss": 72.5333, + "step": 116160 + }, + { + "epoch": 0.469341499775773, + "grad_norm": 665.8568725585938, + "learning_rate": 2.5868794495357152e-05, + "loss": 83.0261, + "step": 116170 + }, + { + "epoch": 0.4693819010411406, + "grad_norm": 556.3671264648438, + "learning_rate": 2.586612484515093e-05, + "loss": 84.9525, + "step": 116180 + }, + { + "epoch": 0.4694223023065082, + "grad_norm": 1899.8004150390625, + "learning_rate": 2.5863455080581576e-05, + "loss": 84.9822, + "step": 116190 + }, + { + "epoch": 0.46946270357187586, + "grad_norm": 1182.76953125, + "learning_rate": 2.5860785201701147e-05, + "loss": 78.9611, + "step": 116200 + }, + { + "epoch": 0.4695031048372435, + "grad_norm": 685.7833862304688, + "learning_rate": 2.58581152085617e-05, + "loss": 76.4859, + "step": 116210 + }, + { + "epoch": 0.46954350610261114, + "grad_norm": 375.30413818359375, + "learning_rate": 2.5855445101215277e-05, + "loss": 66.7903, + "step": 116220 + }, + { + "epoch": 0.4695839073679788, + "grad_norm": 855.1605834960938, + "learning_rate": 2.5852774879713938e-05, + "loss": 83.9544, + "step": 116230 + }, + { + "epoch": 0.4696243086333464, + "grad_norm": 813.657470703125, + "learning_rate": 2.585010454410974e-05, + "loss": 45.0946, + "step": 116240 + }, + { + "epoch": 0.469664709898714, + "grad_norm": 655.6005859375, + "learning_rate": 2.5847434094454737e-05, + "loss": 108.0663, + "step": 116250 + }, + { + "epoch": 0.46970511116408165, + "grad_norm": 130.6782684326172, + "learning_rate": 2.5844763530801002e-05, + "loss": 76.9015, + "step": 116260 + }, + { + "epoch": 0.4697455124294493, + "grad_norm": 392.3663330078125, + "learning_rate": 2.584209285320059e-05, + "loss": 60.7554, + "step": 116270 + }, + { + "epoch": 0.4697859136948169, + "grad_norm": 823.48095703125, + "learning_rate": 2.5839422061705566e-05, + "loss": 84.162, + "step": 116280 + }, + { + "epoch": 0.46982631496018457, + "grad_norm": 1146.59765625, + "learning_rate": 2.583675115636801e-05, + "loss": 75.869, + "step": 116290 + }, + { + "epoch": 0.4698667162255522, + "grad_norm": 621.495361328125, + "learning_rate": 2.583408013723998e-05, + "loss": 124.2503, + "step": 116300 + }, + { + "epoch": 0.4699071174909198, + "grad_norm": 678.830078125, + "learning_rate": 2.5831409004373562e-05, + "loss": 82.1241, + "step": 116310 + }, + { + "epoch": 0.46994751875628743, + "grad_norm": 745.513427734375, + "learning_rate": 2.5828737757820815e-05, + "loss": 113.1406, + "step": 116320 + }, + { + "epoch": 0.46998792002165507, + "grad_norm": 1162.2847900390625, + "learning_rate": 2.5826066397633827e-05, + "loss": 65.9462, + "step": 116330 + }, + { + "epoch": 0.4700283212870227, + "grad_norm": 712.9412841796875, + "learning_rate": 2.582339492386467e-05, + "loss": 100.5458, + "step": 116340 + }, + { + "epoch": 0.47006872255239035, + "grad_norm": 769.9151611328125, + "learning_rate": 2.5820723336565436e-05, + "loss": 84.2568, + "step": 116350 + }, + { + "epoch": 0.470109123817758, + "grad_norm": 959.0943603515625, + "learning_rate": 2.5818051635788208e-05, + "loss": 67.0183, + "step": 116360 + }, + { + "epoch": 0.47014952508312563, + "grad_norm": 412.6194763183594, + "learning_rate": 2.581537982158506e-05, + "loss": 79.9952, + "step": 116370 + }, + { + "epoch": 0.4701899263484932, + "grad_norm": 826.76123046875, + "learning_rate": 2.5812707894008097e-05, + "loss": 89.7833, + "step": 116380 + }, + { + "epoch": 0.47023032761386085, + "grad_norm": 1795.7869873046875, + "learning_rate": 2.5810035853109393e-05, + "loss": 66.2808, + "step": 116390 + }, + { + "epoch": 0.4702707288792285, + "grad_norm": 403.2627258300781, + "learning_rate": 2.5807363698941056e-05, + "loss": 85.9762, + "step": 116400 + }, + { + "epoch": 0.47031113014459613, + "grad_norm": 451.9342041015625, + "learning_rate": 2.5804691431555168e-05, + "loss": 56.4226, + "step": 116410 + }, + { + "epoch": 0.4703515314099638, + "grad_norm": 404.1800842285156, + "learning_rate": 2.5802019051003835e-05, + "loss": 92.4906, + "step": 116420 + }, + { + "epoch": 0.4703919326753314, + "grad_norm": 752.7086181640625, + "learning_rate": 2.5799346557339153e-05, + "loss": 112.5996, + "step": 116430 + }, + { + "epoch": 0.470432333940699, + "grad_norm": 678.25244140625, + "learning_rate": 2.579667395061322e-05, + "loss": 91.0815, + "step": 116440 + }, + { + "epoch": 0.47047273520606664, + "grad_norm": 805.9823608398438, + "learning_rate": 2.579400123087815e-05, + "loss": 76.0842, + "step": 116450 + }, + { + "epoch": 0.4705131364714343, + "grad_norm": 692.4321899414062, + "learning_rate": 2.5791328398186046e-05, + "loss": 53.6029, + "step": 116460 + }, + { + "epoch": 0.4705535377368019, + "grad_norm": 1117.1575927734375, + "learning_rate": 2.5788655452589005e-05, + "loss": 88.5601, + "step": 116470 + }, + { + "epoch": 0.47059393900216956, + "grad_norm": 365.29449462890625, + "learning_rate": 2.578598239413915e-05, + "loss": 105.0375, + "step": 116480 + }, + { + "epoch": 0.4706343402675372, + "grad_norm": 775.416748046875, + "learning_rate": 2.578330922288859e-05, + "loss": 56.152, + "step": 116490 + }, + { + "epoch": 0.4706747415329048, + "grad_norm": 351.73486328125, + "learning_rate": 2.5780635938889433e-05, + "loss": 89.789, + "step": 116500 + }, + { + "epoch": 0.4707151427982724, + "grad_norm": 788.6669311523438, + "learning_rate": 2.5777962542193805e-05, + "loss": 77.6806, + "step": 116510 + }, + { + "epoch": 0.47075554406364006, + "grad_norm": 1173.169677734375, + "learning_rate": 2.5775289032853827e-05, + "loss": 78.8852, + "step": 116520 + }, + { + "epoch": 0.4707959453290077, + "grad_norm": 767.7442016601562, + "learning_rate": 2.5772615410921614e-05, + "loss": 79.3311, + "step": 116530 + }, + { + "epoch": 0.47083634659437534, + "grad_norm": 552.5950927734375, + "learning_rate": 2.5769941676449288e-05, + "loss": 104.8679, + "step": 116540 + }, + { + "epoch": 0.470876747859743, + "grad_norm": 1114.0340576171875, + "learning_rate": 2.5767267829488987e-05, + "loss": 95.6374, + "step": 116550 + }, + { + "epoch": 0.4709171491251106, + "grad_norm": 845.2505493164062, + "learning_rate": 2.5764593870092824e-05, + "loss": 66.4253, + "step": 116560 + }, + { + "epoch": 0.4709575503904782, + "grad_norm": 1400.5618896484375, + "learning_rate": 2.5761919798312937e-05, + "loss": 131.5966, + "step": 116570 + }, + { + "epoch": 0.47099795165584585, + "grad_norm": 587.4052124023438, + "learning_rate": 2.5759245614201458e-05, + "loss": 53.0818, + "step": 116580 + }, + { + "epoch": 0.4710383529212135, + "grad_norm": 1146.667724609375, + "learning_rate": 2.5756571317810517e-05, + "loss": 74.6485, + "step": 116590 + }, + { + "epoch": 0.47107875418658113, + "grad_norm": 304.9200744628906, + "learning_rate": 2.575389690919226e-05, + "loss": 68.6557, + "step": 116600 + }, + { + "epoch": 0.47111915545194877, + "grad_norm": 1113.6209716796875, + "learning_rate": 2.5751222388398815e-05, + "loss": 124.6654, + "step": 116610 + }, + { + "epoch": 0.4711595567173164, + "grad_norm": 577.2706298828125, + "learning_rate": 2.574854775548233e-05, + "loss": 62.2026, + "step": 116620 + }, + { + "epoch": 0.471199957982684, + "grad_norm": 3892.22705078125, + "learning_rate": 2.574587301049495e-05, + "loss": 114.3918, + "step": 116630 + }, + { + "epoch": 0.47124035924805163, + "grad_norm": 806.165283203125, + "learning_rate": 2.574319815348881e-05, + "loss": 83.6133, + "step": 116640 + }, + { + "epoch": 0.4712807605134193, + "grad_norm": 392.5348205566406, + "learning_rate": 2.5740523184516077e-05, + "loss": 42.9556, + "step": 116650 + }, + { + "epoch": 0.4713211617787869, + "grad_norm": 841.2348022460938, + "learning_rate": 2.573784810362888e-05, + "loss": 70.7973, + "step": 116660 + }, + { + "epoch": 0.47136156304415455, + "grad_norm": 680.9302368164062, + "learning_rate": 2.573517291087938e-05, + "loss": 126.0641, + "step": 116670 + }, + { + "epoch": 0.4714019643095222, + "grad_norm": 846.09765625, + "learning_rate": 2.5732497606319743e-05, + "loss": 61.1958, + "step": 116680 + }, + { + "epoch": 0.47144236557488983, + "grad_norm": 707.9385375976562, + "learning_rate": 2.57298221900021e-05, + "loss": 52.9895, + "step": 116690 + }, + { + "epoch": 0.4714827668402574, + "grad_norm": 886.8209228515625, + "learning_rate": 2.572714666197863e-05, + "loss": 95.5937, + "step": 116700 + }, + { + "epoch": 0.47152316810562506, + "grad_norm": 356.46246337890625, + "learning_rate": 2.5724471022301484e-05, + "loss": 67.7847, + "step": 116710 + }, + { + "epoch": 0.4715635693709927, + "grad_norm": 730.9976806640625, + "learning_rate": 2.5721795271022833e-05, + "loss": 71.2723, + "step": 116720 + }, + { + "epoch": 0.47160397063636034, + "grad_norm": 714.7294311523438, + "learning_rate": 2.5719119408194835e-05, + "loss": 56.1697, + "step": 116730 + }, + { + "epoch": 0.471644371901728, + "grad_norm": 495.8500061035156, + "learning_rate": 2.5716443433869657e-05, + "loss": 61.6769, + "step": 116740 + }, + { + "epoch": 0.4716847731670956, + "grad_norm": 1695.54736328125, + "learning_rate": 2.5713767348099473e-05, + "loss": 102.6252, + "step": 116750 + }, + { + "epoch": 0.4717251744324632, + "grad_norm": 619.1504516601562, + "learning_rate": 2.5711091150936457e-05, + "loss": 81.4368, + "step": 116760 + }, + { + "epoch": 0.47176557569783084, + "grad_norm": 521.9263305664062, + "learning_rate": 2.5708414842432773e-05, + "loss": 74.7047, + "step": 116770 + }, + { + "epoch": 0.4718059769631985, + "grad_norm": 471.4647521972656, + "learning_rate": 2.5705738422640605e-05, + "loss": 88.642, + "step": 116780 + }, + { + "epoch": 0.4718463782285661, + "grad_norm": 802.669189453125, + "learning_rate": 2.5703061891612127e-05, + "loss": 73.8984, + "step": 116790 + }, + { + "epoch": 0.47188677949393376, + "grad_norm": 525.2122192382812, + "learning_rate": 2.5700385249399525e-05, + "loss": 65.2468, + "step": 116800 + }, + { + "epoch": 0.4719271807593014, + "grad_norm": 659.9879760742188, + "learning_rate": 2.5697708496054974e-05, + "loss": 101.5622, + "step": 116810 + }, + { + "epoch": 0.471967582024669, + "grad_norm": 561.3828125, + "learning_rate": 2.5695031631630668e-05, + "loss": 103.0478, + "step": 116820 + }, + { + "epoch": 0.4720079832900366, + "grad_norm": 1215.7286376953125, + "learning_rate": 2.5692354656178785e-05, + "loss": 79.7174, + "step": 116830 + }, + { + "epoch": 0.47204838455540427, + "grad_norm": 315.1688232421875, + "learning_rate": 2.5689677569751517e-05, + "loss": 83.9211, + "step": 116840 + }, + { + "epoch": 0.4720887858207719, + "grad_norm": 578.9766235351562, + "learning_rate": 2.5687000372401057e-05, + "loss": 58.647, + "step": 116850 + }, + { + "epoch": 0.47212918708613955, + "grad_norm": 786.9614868164062, + "learning_rate": 2.5684323064179592e-05, + "loss": 102.5347, + "step": 116860 + }, + { + "epoch": 0.4721695883515072, + "grad_norm": 409.8266906738281, + "learning_rate": 2.5681645645139325e-05, + "loss": 101.6467, + "step": 116870 + }, + { + "epoch": 0.4722099896168748, + "grad_norm": 514.0018310546875, + "learning_rate": 2.5678968115332455e-05, + "loss": 52.8295, + "step": 116880 + }, + { + "epoch": 0.4722503908822424, + "grad_norm": 856.03564453125, + "learning_rate": 2.5676290474811176e-05, + "loss": 108.7909, + "step": 116890 + }, + { + "epoch": 0.47229079214761005, + "grad_norm": 934.2723388671875, + "learning_rate": 2.567361272362769e-05, + "loss": 110.5154, + "step": 116900 + }, + { + "epoch": 0.4723311934129777, + "grad_norm": 707.3862915039062, + "learning_rate": 2.5670934861834206e-05, + "loss": 83.4589, + "step": 116910 + }, + { + "epoch": 0.47237159467834533, + "grad_norm": 521.76123046875, + "learning_rate": 2.566825688948292e-05, + "loss": 71.344, + "step": 116920 + }, + { + "epoch": 0.47241199594371297, + "grad_norm": 629.689697265625, + "learning_rate": 2.5665578806626057e-05, + "loss": 104.6371, + "step": 116930 + }, + { + "epoch": 0.4724523972090806, + "grad_norm": 753.2691650390625, + "learning_rate": 2.5662900613315813e-05, + "loss": 88.3537, + "step": 116940 + }, + { + "epoch": 0.4724927984744482, + "grad_norm": 673.842041015625, + "learning_rate": 2.5660222309604405e-05, + "loss": 100.0589, + "step": 116950 + }, + { + "epoch": 0.47253319973981583, + "grad_norm": 780.7237548828125, + "learning_rate": 2.565754389554405e-05, + "loss": 108.3527, + "step": 116960 + }, + { + "epoch": 0.4725736010051835, + "grad_norm": 685.215087890625, + "learning_rate": 2.5654865371186966e-05, + "loss": 93.1937, + "step": 116970 + }, + { + "epoch": 0.4726140022705511, + "grad_norm": 528.1571044921875, + "learning_rate": 2.5652186736585373e-05, + "loss": 85.4315, + "step": 116980 + }, + { + "epoch": 0.47265440353591875, + "grad_norm": 525.4091796875, + "learning_rate": 2.5649507991791486e-05, + "loss": 79.5242, + "step": 116990 + }, + { + "epoch": 0.4726948048012864, + "grad_norm": 536.5635375976562, + "learning_rate": 2.5646829136857535e-05, + "loss": 74.998, + "step": 117000 + }, + { + "epoch": 0.47273520606665403, + "grad_norm": 1175.0877685546875, + "learning_rate": 2.564415017183574e-05, + "loss": 123.4196, + "step": 117010 + }, + { + "epoch": 0.4727756073320216, + "grad_norm": 733.11865234375, + "learning_rate": 2.564147109677833e-05, + "loss": 71.3791, + "step": 117020 + }, + { + "epoch": 0.47281600859738926, + "grad_norm": 635.416259765625, + "learning_rate": 2.5638791911737535e-05, + "loss": 84.8683, + "step": 117030 + }, + { + "epoch": 0.4728564098627569, + "grad_norm": 736.8175659179688, + "learning_rate": 2.563611261676559e-05, + "loss": 75.3226, + "step": 117040 + }, + { + "epoch": 0.47289681112812454, + "grad_norm": 575.5137939453125, + "learning_rate": 2.5633433211914732e-05, + "loss": 92.4218, + "step": 117050 + }, + { + "epoch": 0.4729372123934922, + "grad_norm": 335.1080627441406, + "learning_rate": 2.563075369723719e-05, + "loss": 82.4347, + "step": 117060 + }, + { + "epoch": 0.4729776136588598, + "grad_norm": 1177.5318603515625, + "learning_rate": 2.562807407278521e-05, + "loss": 95.7299, + "step": 117070 + }, + { + "epoch": 0.4730180149242274, + "grad_norm": 1075.945556640625, + "learning_rate": 2.5625394338611023e-05, + "loss": 87.9253, + "step": 117080 + }, + { + "epoch": 0.47305841618959504, + "grad_norm": 694.8682250976562, + "learning_rate": 2.5622714494766886e-05, + "loss": 71.7689, + "step": 117090 + }, + { + "epoch": 0.4730988174549627, + "grad_norm": 652.8735961914062, + "learning_rate": 2.5620034541305026e-05, + "loss": 50.7132, + "step": 117100 + }, + { + "epoch": 0.4731392187203303, + "grad_norm": 617.4186401367188, + "learning_rate": 2.56173544782777e-05, + "loss": 84.2486, + "step": 117110 + }, + { + "epoch": 0.47317961998569796, + "grad_norm": 633.4347534179688, + "learning_rate": 2.5614674305737155e-05, + "loss": 46.8438, + "step": 117120 + }, + { + "epoch": 0.4732200212510656, + "grad_norm": 856.6358642578125, + "learning_rate": 2.561199402373565e-05, + "loss": 57.3978, + "step": 117130 + }, + { + "epoch": 0.4732604225164332, + "grad_norm": 499.88909912109375, + "learning_rate": 2.560931363232543e-05, + "loss": 65.1392, + "step": 117140 + }, + { + "epoch": 0.47330082378180083, + "grad_norm": 419.126708984375, + "learning_rate": 2.5606633131558755e-05, + "loss": 115.3283, + "step": 117150 + }, + { + "epoch": 0.47334122504716847, + "grad_norm": 411.9915771484375, + "learning_rate": 2.5603952521487875e-05, + "loss": 68.7074, + "step": 117160 + }, + { + "epoch": 0.4733816263125361, + "grad_norm": 860.0364990234375, + "learning_rate": 2.560127180216506e-05, + "loss": 93.5762, + "step": 117170 + }, + { + "epoch": 0.47342202757790375, + "grad_norm": 1416.3692626953125, + "learning_rate": 2.5598590973642562e-05, + "loss": 89.2747, + "step": 117180 + }, + { + "epoch": 0.4734624288432714, + "grad_norm": 1367.1455078125, + "learning_rate": 2.5595910035972657e-05, + "loss": 87.385, + "step": 117190 + }, + { + "epoch": 0.47350283010863903, + "grad_norm": 669.2705688476562, + "learning_rate": 2.55932289892076e-05, + "loss": 74.6653, + "step": 117200 + }, + { + "epoch": 0.4735432313740066, + "grad_norm": 795.1613159179688, + "learning_rate": 2.559054783339967e-05, + "loss": 70.9672, + "step": 117210 + }, + { + "epoch": 0.47358363263937425, + "grad_norm": 665.045166015625, + "learning_rate": 2.558786656860113e-05, + "loss": 87.004, + "step": 117220 + }, + { + "epoch": 0.4736240339047419, + "grad_norm": 365.8616943359375, + "learning_rate": 2.5585185194864245e-05, + "loss": 72.34, + "step": 117230 + }, + { + "epoch": 0.47366443517010953, + "grad_norm": 731.5771484375, + "learning_rate": 2.558250371224131e-05, + "loss": 69.099, + "step": 117240 + }, + { + "epoch": 0.4737048364354772, + "grad_norm": 974.5120849609375, + "learning_rate": 2.557982212078459e-05, + "loss": 77.4439, + "step": 117250 + }, + { + "epoch": 0.4737452377008448, + "grad_norm": 1134.7784423828125, + "learning_rate": 2.557714042054636e-05, + "loss": 73.9144, + "step": 117260 + }, + { + "epoch": 0.4737856389662124, + "grad_norm": 1916.0299072265625, + "learning_rate": 2.557445861157891e-05, + "loss": 103.482, + "step": 117270 + }, + { + "epoch": 0.47382604023158004, + "grad_norm": 582.3016357421875, + "learning_rate": 2.557177669393452e-05, + "loss": 79.7559, + "step": 117280 + }, + { + "epoch": 0.4738664414969477, + "grad_norm": 348.1301574707031, + "learning_rate": 2.5569094667665476e-05, + "loss": 60.2577, + "step": 117290 + }, + { + "epoch": 0.4739068427623153, + "grad_norm": 815.274658203125, + "learning_rate": 2.556641253282406e-05, + "loss": 87.4501, + "step": 117300 + }, + { + "epoch": 0.47394724402768296, + "grad_norm": 857.7070922851562, + "learning_rate": 2.5563730289462565e-05, + "loss": 111.0988, + "step": 117310 + }, + { + "epoch": 0.4739876452930506, + "grad_norm": 1069.94873046875, + "learning_rate": 2.556104793763329e-05, + "loss": 75.2825, + "step": 117320 + }, + { + "epoch": 0.47402804655841824, + "grad_norm": 395.0133972167969, + "learning_rate": 2.5558365477388515e-05, + "loss": 101.3367, + "step": 117330 + }, + { + "epoch": 0.4740684478237858, + "grad_norm": 405.2221984863281, + "learning_rate": 2.5555682908780552e-05, + "loss": 57.7718, + "step": 117340 + }, + { + "epoch": 0.47410884908915346, + "grad_norm": 387.3051452636719, + "learning_rate": 2.5553000231861684e-05, + "loss": 69.4565, + "step": 117350 + }, + { + "epoch": 0.4741492503545211, + "grad_norm": 1937.12890625, + "learning_rate": 2.5550317446684215e-05, + "loss": 80.4645, + "step": 117360 + }, + { + "epoch": 0.47418965161988874, + "grad_norm": 622.9381103515625, + "learning_rate": 2.5547634553300456e-05, + "loss": 98.5785, + "step": 117370 + }, + { + "epoch": 0.4742300528852564, + "grad_norm": 901.4403686523438, + "learning_rate": 2.5544951551762706e-05, + "loss": 116.8878, + "step": 117380 + }, + { + "epoch": 0.474270454150624, + "grad_norm": 767.3594970703125, + "learning_rate": 2.5542268442123264e-05, + "loss": 104.5708, + "step": 117390 + }, + { + "epoch": 0.4743108554159916, + "grad_norm": 571.1878051757812, + "learning_rate": 2.5539585224434448e-05, + "loss": 88.7495, + "step": 117400 + }, + { + "epoch": 0.47435125668135925, + "grad_norm": 812.3685302734375, + "learning_rate": 2.553690189874857e-05, + "loss": 91.1674, + "step": 117410 + }, + { + "epoch": 0.4743916579467269, + "grad_norm": 541.8758544921875, + "learning_rate": 2.553421846511794e-05, + "loss": 89.5999, + "step": 117420 + }, + { + "epoch": 0.4744320592120945, + "grad_norm": 457.3678894042969, + "learning_rate": 2.5531534923594867e-05, + "loss": 93.9794, + "step": 117430 + }, + { + "epoch": 0.47447246047746217, + "grad_norm": 339.7641906738281, + "learning_rate": 2.552885127423167e-05, + "loss": 83.4518, + "step": 117440 + }, + { + "epoch": 0.4745128617428298, + "grad_norm": 930.1015625, + "learning_rate": 2.5526167517080674e-05, + "loss": 69.7134, + "step": 117450 + }, + { + "epoch": 0.4745532630081974, + "grad_norm": 383.1784362792969, + "learning_rate": 2.55234836521942e-05, + "loss": 58.1959, + "step": 117460 + }, + { + "epoch": 0.47459366427356503, + "grad_norm": 604.8629760742188, + "learning_rate": 2.5520799679624565e-05, + "loss": 48.816, + "step": 117470 + }, + { + "epoch": 0.47463406553893267, + "grad_norm": 652.7484130859375, + "learning_rate": 2.55181155994241e-05, + "loss": 117.2281, + "step": 117480 + }, + { + "epoch": 0.4746744668043003, + "grad_norm": 479.4799499511719, + "learning_rate": 2.5515431411645132e-05, + "loss": 81.687, + "step": 117490 + }, + { + "epoch": 0.47471486806966795, + "grad_norm": 564.9569702148438, + "learning_rate": 2.5512747116339985e-05, + "loss": 61.0154, + "step": 117500 + }, + { + "epoch": 0.4747552693350356, + "grad_norm": 470.33380126953125, + "learning_rate": 2.5510062713561e-05, + "loss": 64.4732, + "step": 117510 + }, + { + "epoch": 0.47479567060040323, + "grad_norm": 569.4948120117188, + "learning_rate": 2.5507378203360504e-05, + "loss": 74.3291, + "step": 117520 + }, + { + "epoch": 0.4748360718657708, + "grad_norm": 723.9646606445312, + "learning_rate": 2.5504693585790826e-05, + "loss": 82.9236, + "step": 117530 + }, + { + "epoch": 0.47487647313113845, + "grad_norm": 968.2244262695312, + "learning_rate": 2.5502008860904327e-05, + "loss": 68.1677, + "step": 117540 + }, + { + "epoch": 0.4749168743965061, + "grad_norm": 963.5968627929688, + "learning_rate": 2.5499324028753323e-05, + "loss": 64.0365, + "step": 117550 + }, + { + "epoch": 0.47495727566187373, + "grad_norm": 794.4799194335938, + "learning_rate": 2.5496639089390164e-05, + "loss": 102.2764, + "step": 117560 + }, + { + "epoch": 0.4749976769272414, + "grad_norm": 594.8271484375, + "learning_rate": 2.54939540428672e-05, + "loss": 53.014, + "step": 117570 + }, + { + "epoch": 0.475038078192609, + "grad_norm": 2216.67041015625, + "learning_rate": 2.5491268889236777e-05, + "loss": 78.3273, + "step": 117580 + }, + { + "epoch": 0.4750784794579766, + "grad_norm": 1707.472900390625, + "learning_rate": 2.548858362855124e-05, + "loss": 108.4267, + "step": 117590 + }, + { + "epoch": 0.47511888072334424, + "grad_norm": 776.3013305664062, + "learning_rate": 2.5485898260862936e-05, + "loss": 66.7128, + "step": 117600 + }, + { + "epoch": 0.4751592819887119, + "grad_norm": 775.5328979492188, + "learning_rate": 2.5483212786224218e-05, + "loss": 99.3402, + "step": 117610 + }, + { + "epoch": 0.4751996832540795, + "grad_norm": 920.9109497070312, + "learning_rate": 2.548052720468745e-05, + "loss": 94.357, + "step": 117620 + }, + { + "epoch": 0.47524008451944716, + "grad_norm": 1287.133056640625, + "learning_rate": 2.5477841516304976e-05, + "loss": 91.3817, + "step": 117630 + }, + { + "epoch": 0.4752804857848148, + "grad_norm": 1081.5802001953125, + "learning_rate": 2.5475155721129167e-05, + "loss": 65.0733, + "step": 117640 + }, + { + "epoch": 0.47532088705018244, + "grad_norm": 489.70330810546875, + "learning_rate": 2.5472469819212373e-05, + "loss": 60.5342, + "step": 117650 + }, + { + "epoch": 0.47536128831555, + "grad_norm": 792.1094360351562, + "learning_rate": 2.5469783810606968e-05, + "loss": 120.6981, + "step": 117660 + }, + { + "epoch": 0.47540168958091766, + "grad_norm": 767.6053466796875, + "learning_rate": 2.5467097695365305e-05, + "loss": 105.6953, + "step": 117670 + }, + { + "epoch": 0.4754420908462853, + "grad_norm": 552.6087036132812, + "learning_rate": 2.546441147353977e-05, + "loss": 58.453, + "step": 117680 + }, + { + "epoch": 0.47548249211165294, + "grad_norm": 453.8253479003906, + "learning_rate": 2.5461725145182706e-05, + "loss": 96.7252, + "step": 117690 + }, + { + "epoch": 0.4755228933770206, + "grad_norm": 472.0673522949219, + "learning_rate": 2.5459038710346507e-05, + "loss": 53.3366, + "step": 117700 + }, + { + "epoch": 0.4755632946423882, + "grad_norm": 788.909912109375, + "learning_rate": 2.5456352169083536e-05, + "loss": 88.4889, + "step": 117710 + }, + { + "epoch": 0.4756036959077558, + "grad_norm": 599.0029296875, + "learning_rate": 2.545366552144617e-05, + "loss": 58.8574, + "step": 117720 + }, + { + "epoch": 0.47564409717312345, + "grad_norm": 599.04443359375, + "learning_rate": 2.5450978767486786e-05, + "loss": 69.1473, + "step": 117730 + }, + { + "epoch": 0.4756844984384911, + "grad_norm": 659.229736328125, + "learning_rate": 2.5448291907257766e-05, + "loss": 90.9101, + "step": 117740 + }, + { + "epoch": 0.47572489970385873, + "grad_norm": 495.751953125, + "learning_rate": 2.544560494081149e-05, + "loss": 76.8799, + "step": 117750 + }, + { + "epoch": 0.47576530096922637, + "grad_norm": 654.9795532226562, + "learning_rate": 2.5442917868200344e-05, + "loss": 57.4717, + "step": 117760 + }, + { + "epoch": 0.475805702234594, + "grad_norm": 545.3356323242188, + "learning_rate": 2.5440230689476707e-05, + "loss": 77.6958, + "step": 117770 + }, + { + "epoch": 0.4758461034999616, + "grad_norm": 734.40234375, + "learning_rate": 2.5437543404692978e-05, + "loss": 76.2459, + "step": 117780 + }, + { + "epoch": 0.47588650476532923, + "grad_norm": 1414.4375, + "learning_rate": 2.5434856013901534e-05, + "loss": 80.7983, + "step": 117790 + }, + { + "epoch": 0.4759269060306969, + "grad_norm": 705.0997924804688, + "learning_rate": 2.5432168517154776e-05, + "loss": 97.3929, + "step": 117800 + }, + { + "epoch": 0.4759673072960645, + "grad_norm": 711.5299682617188, + "learning_rate": 2.5429480914505094e-05, + "loss": 46.4436, + "step": 117810 + }, + { + "epoch": 0.47600770856143215, + "grad_norm": 418.557861328125, + "learning_rate": 2.5426793206004897e-05, + "loss": 99.0725, + "step": 117820 + }, + { + "epoch": 0.4760481098267998, + "grad_norm": 839.0230102539062, + "learning_rate": 2.542410539170656e-05, + "loss": 81.9687, + "step": 117830 + }, + { + "epoch": 0.47608851109216743, + "grad_norm": 727.9471435546875, + "learning_rate": 2.5421417471662504e-05, + "loss": 69.0391, + "step": 117840 + }, + { + "epoch": 0.476128912357535, + "grad_norm": 537.8472900390625, + "learning_rate": 2.541872944592512e-05, + "loss": 63.1178, + "step": 117850 + }, + { + "epoch": 0.47616931362290266, + "grad_norm": 884.6707763671875, + "learning_rate": 2.541604131454682e-05, + "loss": 68.4741, + "step": 117860 + }, + { + "epoch": 0.4762097148882703, + "grad_norm": 1429.069580078125, + "learning_rate": 2.541335307758e-05, + "loss": 69.1582, + "step": 117870 + }, + { + "epoch": 0.47625011615363794, + "grad_norm": 1217.08447265625, + "learning_rate": 2.5410664735077078e-05, + "loss": 105.3684, + "step": 117880 + }, + { + "epoch": 0.4762905174190056, + "grad_norm": 1392.9642333984375, + "learning_rate": 2.540797628709046e-05, + "loss": 75.684, + "step": 117890 + }, + { + "epoch": 0.4763309186843732, + "grad_norm": 1130.8841552734375, + "learning_rate": 2.540528773367256e-05, + "loss": 96.0674, + "step": 117900 + }, + { + "epoch": 0.4763713199497408, + "grad_norm": 708.3011474609375, + "learning_rate": 2.5402599074875792e-05, + "loss": 157.7551, + "step": 117910 + }, + { + "epoch": 0.47641172121510844, + "grad_norm": 305.86932373046875, + "learning_rate": 2.5399910310752574e-05, + "loss": 61.8702, + "step": 117920 + }, + { + "epoch": 0.4764521224804761, + "grad_norm": 842.8548583984375, + "learning_rate": 2.539722144135533e-05, + "loss": 63.1055, + "step": 117930 + }, + { + "epoch": 0.4764925237458437, + "grad_norm": 651.6754760742188, + "learning_rate": 2.539453246673647e-05, + "loss": 96.2545, + "step": 117940 + }, + { + "epoch": 0.47653292501121136, + "grad_norm": 598.6243896484375, + "learning_rate": 2.5391843386948422e-05, + "loss": 77.6958, + "step": 117950 + }, + { + "epoch": 0.476573326276579, + "grad_norm": 606.384033203125, + "learning_rate": 2.538915420204361e-05, + "loss": 117.4647, + "step": 117960 + }, + { + "epoch": 0.47661372754194664, + "grad_norm": 446.481689453125, + "learning_rate": 2.5386464912074466e-05, + "loss": 47.1882, + "step": 117970 + }, + { + "epoch": 0.4766541288073142, + "grad_norm": 702.9208374023438, + "learning_rate": 2.538377551709341e-05, + "loss": 60.7986, + "step": 117980 + }, + { + "epoch": 0.47669453007268187, + "grad_norm": 1335.060302734375, + "learning_rate": 2.5381086017152886e-05, + "loss": 67.9722, + "step": 117990 + }, + { + "epoch": 0.4767349313380495, + "grad_norm": 1076.9583740234375, + "learning_rate": 2.5378396412305315e-05, + "loss": 96.1867, + "step": 118000 + }, + { + "epoch": 0.47677533260341715, + "grad_norm": 436.61956787109375, + "learning_rate": 2.5375706702603138e-05, + "loss": 65.5477, + "step": 118010 + }, + { + "epoch": 0.4768157338687848, + "grad_norm": 785.1160278320312, + "learning_rate": 2.5373016888098795e-05, + "loss": 68.6622, + "step": 118020 + }, + { + "epoch": 0.4768561351341524, + "grad_norm": 1991.3980712890625, + "learning_rate": 2.537032696884472e-05, + "loss": 93.8869, + "step": 118030 + }, + { + "epoch": 0.47689653639952, + "grad_norm": 684.842529296875, + "learning_rate": 2.5367636944893353e-05, + "loss": 88.3587, + "step": 118040 + }, + { + "epoch": 0.47693693766488765, + "grad_norm": 695.7582397460938, + "learning_rate": 2.536494681629714e-05, + "loss": 73.4592, + "step": 118050 + }, + { + "epoch": 0.4769773389302553, + "grad_norm": 597.2736206054688, + "learning_rate": 2.5362256583108525e-05, + "loss": 61.3818, + "step": 118060 + }, + { + "epoch": 0.47701774019562293, + "grad_norm": 925.0534057617188, + "learning_rate": 2.5359566245379963e-05, + "loss": 63.2063, + "step": 118070 + }, + { + "epoch": 0.47705814146099057, + "grad_norm": 1593.7103271484375, + "learning_rate": 2.535687580316389e-05, + "loss": 116.301, + "step": 118080 + }, + { + "epoch": 0.4770985427263582, + "grad_norm": 605.5830078125, + "learning_rate": 2.5354185256512772e-05, + "loss": 76.6277, + "step": 118090 + }, + { + "epoch": 0.4771389439917258, + "grad_norm": 1106.090576171875, + "learning_rate": 2.5351494605479056e-05, + "loss": 58.4826, + "step": 118100 + }, + { + "epoch": 0.47717934525709343, + "grad_norm": 3364.98291015625, + "learning_rate": 2.53488038501152e-05, + "loss": 102.7509, + "step": 118110 + }, + { + "epoch": 0.4772197465224611, + "grad_norm": 657.0576171875, + "learning_rate": 2.534611299047365e-05, + "loss": 58.8241, + "step": 118120 + }, + { + "epoch": 0.4772601477878287, + "grad_norm": 673.058349609375, + "learning_rate": 2.5343422026606883e-05, + "loss": 124.7456, + "step": 118130 + }, + { + "epoch": 0.47730054905319635, + "grad_norm": 709.9848022460938, + "learning_rate": 2.5340730958567347e-05, + "loss": 84.4995, + "step": 118140 + }, + { + "epoch": 0.477340950318564, + "grad_norm": 778.3622436523438, + "learning_rate": 2.5338039786407516e-05, + "loss": 71.1394, + "step": 118150 + }, + { + "epoch": 0.47738135158393163, + "grad_norm": 400.885498046875, + "learning_rate": 2.5335348510179846e-05, + "loss": 71.296, + "step": 118160 + }, + { + "epoch": 0.4774217528492992, + "grad_norm": 640.0421752929688, + "learning_rate": 2.5332657129936814e-05, + "loss": 57.9215, + "step": 118170 + }, + { + "epoch": 0.47746215411466686, + "grad_norm": 812.71044921875, + "learning_rate": 2.5329965645730882e-05, + "loss": 61.1703, + "step": 118180 + }, + { + "epoch": 0.4775025553800345, + "grad_norm": 856.3941040039062, + "learning_rate": 2.532727405761453e-05, + "loss": 95.7183, + "step": 118190 + }, + { + "epoch": 0.47754295664540214, + "grad_norm": 417.7923278808594, + "learning_rate": 2.532458236564023e-05, + "loss": 60.7846, + "step": 118200 + }, + { + "epoch": 0.4775833579107698, + "grad_norm": 644.7319946289062, + "learning_rate": 2.532189056986045e-05, + "loss": 71.0913, + "step": 118210 + }, + { + "epoch": 0.4776237591761374, + "grad_norm": 747.6492919921875, + "learning_rate": 2.5319198670327672e-05, + "loss": 83.9172, + "step": 118220 + }, + { + "epoch": 0.477664160441505, + "grad_norm": 368.999267578125, + "learning_rate": 2.5316506667094384e-05, + "loss": 84.6007, + "step": 118230 + }, + { + "epoch": 0.47770456170687264, + "grad_norm": 224.06178283691406, + "learning_rate": 2.5313814560213056e-05, + "loss": 54.4159, + "step": 118240 + }, + { + "epoch": 0.4777449629722403, + "grad_norm": 1690.8433837890625, + "learning_rate": 2.531112234973618e-05, + "loss": 73.9191, + "step": 118250 + }, + { + "epoch": 0.4777853642376079, + "grad_norm": 646.7085571289062, + "learning_rate": 2.5308430035716237e-05, + "loss": 90.5652, + "step": 118260 + }, + { + "epoch": 0.47782576550297556, + "grad_norm": 729.527099609375, + "learning_rate": 2.530573761820572e-05, + "loss": 63.2017, + "step": 118270 + }, + { + "epoch": 0.4778661667683432, + "grad_norm": 3115.35888671875, + "learning_rate": 2.5303045097257117e-05, + "loss": 117.1973, + "step": 118280 + }, + { + "epoch": 0.47790656803371084, + "grad_norm": 561.7537841796875, + "learning_rate": 2.5300352472922914e-05, + "loss": 95.7702, + "step": 118290 + }, + { + "epoch": 0.47794696929907843, + "grad_norm": 434.7034912109375, + "learning_rate": 2.5297659745255618e-05, + "loss": 89.1468, + "step": 118300 + }, + { + "epoch": 0.47798737056444607, + "grad_norm": 487.3309020996094, + "learning_rate": 2.5294966914307714e-05, + "loss": 78.0473, + "step": 118310 + }, + { + "epoch": 0.4780277718298137, + "grad_norm": 1108.7586669921875, + "learning_rate": 2.5292273980131708e-05, + "loss": 77.1403, + "step": 118320 + }, + { + "epoch": 0.47806817309518135, + "grad_norm": 578.4681396484375, + "learning_rate": 2.528958094278009e-05, + "loss": 54.8617, + "step": 118330 + }, + { + "epoch": 0.478108574360549, + "grad_norm": 739.4935913085938, + "learning_rate": 2.528688780230537e-05, + "loss": 76.6457, + "step": 118340 + }, + { + "epoch": 0.47814897562591663, + "grad_norm": 372.785888671875, + "learning_rate": 2.528419455876005e-05, + "loss": 73.0862, + "step": 118350 + }, + { + "epoch": 0.4781893768912842, + "grad_norm": 344.1497497558594, + "learning_rate": 2.528150121219664e-05, + "loss": 59.6301, + "step": 118360 + }, + { + "epoch": 0.47822977815665185, + "grad_norm": 606.3152465820312, + "learning_rate": 2.5278807762667644e-05, + "loss": 99.5825, + "step": 118370 + }, + { + "epoch": 0.4782701794220195, + "grad_norm": 463.89312744140625, + "learning_rate": 2.5276114210225573e-05, + "loss": 72.7147, + "step": 118380 + }, + { + "epoch": 0.47831058068738713, + "grad_norm": 550.8854370117188, + "learning_rate": 2.527342055492294e-05, + "loss": 65.7367, + "step": 118390 + }, + { + "epoch": 0.4783509819527548, + "grad_norm": 1684.239013671875, + "learning_rate": 2.527072679681226e-05, + "loss": 89.8193, + "step": 118400 + }, + { + "epoch": 0.4783913832181224, + "grad_norm": 638.3787841796875, + "learning_rate": 2.5268032935946037e-05, + "loss": 55.5805, + "step": 118410 + }, + { + "epoch": 0.47843178448349, + "grad_norm": 1483.0516357421875, + "learning_rate": 2.5265338972376806e-05, + "loss": 87.4753, + "step": 118420 + }, + { + "epoch": 0.47847218574885764, + "grad_norm": 609.91943359375, + "learning_rate": 2.526264490615708e-05, + "loss": 67.3791, + "step": 118430 + }, + { + "epoch": 0.4785125870142253, + "grad_norm": 420.71697998046875, + "learning_rate": 2.5259950737339387e-05, + "loss": 80.3119, + "step": 118440 + }, + { + "epoch": 0.4785529882795929, + "grad_norm": 479.2693176269531, + "learning_rate": 2.5257256465976246e-05, + "loss": 67.5532, + "step": 118450 + }, + { + "epoch": 0.47859338954496056, + "grad_norm": 712.501708984375, + "learning_rate": 2.525456209212018e-05, + "loss": 115.2102, + "step": 118460 + }, + { + "epoch": 0.4786337908103282, + "grad_norm": 780.1712036132812, + "learning_rate": 2.5251867615823718e-05, + "loss": 59.1891, + "step": 118470 + }, + { + "epoch": 0.47867419207569584, + "grad_norm": 807.6984252929688, + "learning_rate": 2.5249173037139398e-05, + "loss": 72.4968, + "step": 118480 + }, + { + "epoch": 0.4787145933410634, + "grad_norm": 890.257080078125, + "learning_rate": 2.524647835611975e-05, + "loss": 118.1015, + "step": 118490 + }, + { + "epoch": 0.47875499460643106, + "grad_norm": 973.5971069335938, + "learning_rate": 2.5243783572817297e-05, + "loss": 82.972, + "step": 118500 + }, + { + "epoch": 0.4787953958717987, + "grad_norm": 867.973876953125, + "learning_rate": 2.524108868728458e-05, + "loss": 79.6101, + "step": 118510 + }, + { + "epoch": 0.47883579713716634, + "grad_norm": 827.6520385742188, + "learning_rate": 2.5238393699574157e-05, + "loss": 52.767, + "step": 118520 + }, + { + "epoch": 0.478876198402534, + "grad_norm": 1005.7306518554688, + "learning_rate": 2.5235698609738545e-05, + "loss": 78.2982, + "step": 118530 + }, + { + "epoch": 0.4789165996679016, + "grad_norm": 740.44384765625, + "learning_rate": 2.523300341783029e-05, + "loss": 77.7332, + "step": 118540 + }, + { + "epoch": 0.4789570009332692, + "grad_norm": 1179.2183837890625, + "learning_rate": 2.523030812390194e-05, + "loss": 92.0178, + "step": 118550 + }, + { + "epoch": 0.47899740219863685, + "grad_norm": 812.2997436523438, + "learning_rate": 2.5227612728006046e-05, + "loss": 143.6835, + "step": 118560 + }, + { + "epoch": 0.4790378034640045, + "grad_norm": 802.2848510742188, + "learning_rate": 2.5224917230195142e-05, + "loss": 106.4961, + "step": 118570 + }, + { + "epoch": 0.4790782047293721, + "grad_norm": 873.8909912109375, + "learning_rate": 2.522222163052179e-05, + "loss": 87.422, + "step": 118580 + }, + { + "epoch": 0.47911860599473977, + "grad_norm": 803.8841552734375, + "learning_rate": 2.5219525929038537e-05, + "loss": 87.9163, + "step": 118590 + }, + { + "epoch": 0.4791590072601074, + "grad_norm": 1002.1643676757812, + "learning_rate": 2.5216830125797943e-05, + "loss": 68.2344, + "step": 118600 + }, + { + "epoch": 0.479199408525475, + "grad_norm": 1456.388916015625, + "learning_rate": 2.5214134220852558e-05, + "loss": 93.9058, + "step": 118610 + }, + { + "epoch": 0.47923980979084263, + "grad_norm": 1007.4156494140625, + "learning_rate": 2.5211438214254946e-05, + "loss": 71.326, + "step": 118620 + }, + { + "epoch": 0.47928021105621027, + "grad_norm": 510.22357177734375, + "learning_rate": 2.5208742106057652e-05, + "loss": 99.4237, + "step": 118630 + }, + { + "epoch": 0.4793206123215779, + "grad_norm": 7654.884765625, + "learning_rate": 2.520604589631326e-05, + "loss": 104.545, + "step": 118640 + }, + { + "epoch": 0.47936101358694555, + "grad_norm": 856.18310546875, + "learning_rate": 2.5203349585074314e-05, + "loss": 63.2249, + "step": 118650 + }, + { + "epoch": 0.4794014148523132, + "grad_norm": 664.7891845703125, + "learning_rate": 2.5200653172393393e-05, + "loss": 118.2844, + "step": 118660 + }, + { + "epoch": 0.47944181611768083, + "grad_norm": 561.0073852539062, + "learning_rate": 2.519795665832306e-05, + "loss": 69.5773, + "step": 118670 + }, + { + "epoch": 0.4794822173830484, + "grad_norm": 608.8665161132812, + "learning_rate": 2.519526004291589e-05, + "loss": 82.0766, + "step": 118680 + }, + { + "epoch": 0.47952261864841605, + "grad_norm": 642.3348999023438, + "learning_rate": 2.519256332622445e-05, + "loss": 52.3765, + "step": 118690 + }, + { + "epoch": 0.4795630199137837, + "grad_norm": 555.3021240234375, + "learning_rate": 2.518986650830131e-05, + "loss": 79.0671, + "step": 118700 + }, + { + "epoch": 0.47960342117915133, + "grad_norm": 1580.2501220703125, + "learning_rate": 2.5187169589199056e-05, + "loss": 97.6462, + "step": 118710 + }, + { + "epoch": 0.479643822444519, + "grad_norm": 401.60028076171875, + "learning_rate": 2.5184472568970258e-05, + "loss": 93.0489, + "step": 118720 + }, + { + "epoch": 0.4796842237098866, + "grad_norm": 703.6646728515625, + "learning_rate": 2.51817754476675e-05, + "loss": 71.659, + "step": 118730 + }, + { + "epoch": 0.4797246249752542, + "grad_norm": 715.7129516601562, + "learning_rate": 2.5179078225343355e-05, + "loss": 74.127, + "step": 118740 + }, + { + "epoch": 0.47976502624062184, + "grad_norm": 1289.9637451171875, + "learning_rate": 2.5176380902050418e-05, + "loss": 57.5694, + "step": 118750 + }, + { + "epoch": 0.4798054275059895, + "grad_norm": 1281.754638671875, + "learning_rate": 2.5173683477841273e-05, + "loss": 68.8426, + "step": 118760 + }, + { + "epoch": 0.4798458287713571, + "grad_norm": 430.5948181152344, + "learning_rate": 2.51709859527685e-05, + "loss": 73.8023, + "step": 118770 + }, + { + "epoch": 0.47988623003672476, + "grad_norm": 1081.1673583984375, + "learning_rate": 2.5168288326884693e-05, + "loss": 63.1889, + "step": 118780 + }, + { + "epoch": 0.4799266313020924, + "grad_norm": 844.4727172851562, + "learning_rate": 2.5165590600242452e-05, + "loss": 65.6037, + "step": 118790 + }, + { + "epoch": 0.47996703256746004, + "grad_norm": 786.8031005859375, + "learning_rate": 2.5162892772894356e-05, + "loss": 102.9999, + "step": 118800 + }, + { + "epoch": 0.4800074338328276, + "grad_norm": 499.80126953125, + "learning_rate": 2.5160194844893014e-05, + "loss": 78.1657, + "step": 118810 + }, + { + "epoch": 0.48004783509819526, + "grad_norm": 1116.5250244140625, + "learning_rate": 2.5157496816291008e-05, + "loss": 59.6975, + "step": 118820 + }, + { + "epoch": 0.4800882363635629, + "grad_norm": 543.9334716796875, + "learning_rate": 2.515479868714095e-05, + "loss": 82.5185, + "step": 118830 + }, + { + "epoch": 0.48012863762893054, + "grad_norm": 471.33038330078125, + "learning_rate": 2.5152100457495447e-05, + "loss": 56.8384, + "step": 118840 + }, + { + "epoch": 0.4801690388942982, + "grad_norm": 591.5828247070312, + "learning_rate": 2.514940212740708e-05, + "loss": 65.4588, + "step": 118850 + }, + { + "epoch": 0.4802094401596658, + "grad_norm": 332.972900390625, + "learning_rate": 2.514670369692847e-05, + "loss": 58.6132, + "step": 118860 + }, + { + "epoch": 0.4802498414250334, + "grad_norm": 415.56585693359375, + "learning_rate": 2.5144005166112223e-05, + "loss": 60.3211, + "step": 118870 + }, + { + "epoch": 0.48029024269040105, + "grad_norm": 799.0918579101562, + "learning_rate": 2.514130653501095e-05, + "loss": 90.5955, + "step": 118880 + }, + { + "epoch": 0.4803306439557687, + "grad_norm": 343.889892578125, + "learning_rate": 2.513860780367726e-05, + "loss": 67.083, + "step": 118890 + }, + { + "epoch": 0.48037104522113633, + "grad_norm": 972.5665893554688, + "learning_rate": 2.513590897216376e-05, + "loss": 71.077, + "step": 118900 + }, + { + "epoch": 0.48041144648650397, + "grad_norm": 625.3969116210938, + "learning_rate": 2.5133210040523072e-05, + "loss": 88.4457, + "step": 118910 + }, + { + "epoch": 0.4804518477518716, + "grad_norm": 468.15545654296875, + "learning_rate": 2.513051100880781e-05, + "loss": 82.5713, + "step": 118920 + }, + { + "epoch": 0.4804922490172392, + "grad_norm": 2592.690673828125, + "learning_rate": 2.51278118770706e-05, + "loss": 88.2726, + "step": 118930 + }, + { + "epoch": 0.48053265028260683, + "grad_norm": 658.2777709960938, + "learning_rate": 2.5125112645364052e-05, + "loss": 99.5129, + "step": 118940 + }, + { + "epoch": 0.4805730515479745, + "grad_norm": 672.3861083984375, + "learning_rate": 2.5122413313740794e-05, + "loss": 72.4782, + "step": 118950 + }, + { + "epoch": 0.4806134528133421, + "grad_norm": 618.010009765625, + "learning_rate": 2.511971388225346e-05, + "loss": 60.8556, + "step": 118960 + }, + { + "epoch": 0.48065385407870975, + "grad_norm": 1482.3304443359375, + "learning_rate": 2.5117014350954656e-05, + "loss": 83.5104, + "step": 118970 + }, + { + "epoch": 0.4806942553440774, + "grad_norm": 898.024658203125, + "learning_rate": 2.5114314719897035e-05, + "loss": 87.3638, + "step": 118980 + }, + { + "epoch": 0.48073465660944503, + "grad_norm": 424.9355773925781, + "learning_rate": 2.5111614989133207e-05, + "loss": 64.6769, + "step": 118990 + }, + { + "epoch": 0.4807750578748126, + "grad_norm": 1095.3011474609375, + "learning_rate": 2.510891515871581e-05, + "loss": 71.5072, + "step": 119000 + }, + { + "epoch": 0.48081545914018026, + "grad_norm": 509.4305725097656, + "learning_rate": 2.510621522869749e-05, + "loss": 210.233, + "step": 119010 + }, + { + "epoch": 0.4808558604055479, + "grad_norm": 601.487548828125, + "learning_rate": 2.5103515199130875e-05, + "loss": 62.5554, + "step": 119020 + }, + { + "epoch": 0.48089626167091554, + "grad_norm": 877.5287475585938, + "learning_rate": 2.5100815070068598e-05, + "loss": 90.6623, + "step": 119030 + }, + { + "epoch": 0.4809366629362832, + "grad_norm": 587.19580078125, + "learning_rate": 2.5098114841563304e-05, + "loss": 81.6227, + "step": 119040 + }, + { + "epoch": 0.4809770642016508, + "grad_norm": 366.15631103515625, + "learning_rate": 2.5095414513667646e-05, + "loss": 63.2692, + "step": 119050 + }, + { + "epoch": 0.4810174654670184, + "grad_norm": 1942.1395263671875, + "learning_rate": 2.5092714086434257e-05, + "loss": 81.657, + "step": 119060 + }, + { + "epoch": 0.48105786673238604, + "grad_norm": 485.7444763183594, + "learning_rate": 2.509001355991578e-05, + "loss": 74.3113, + "step": 119070 + }, + { + "epoch": 0.4810982679977537, + "grad_norm": 317.6514892578125, + "learning_rate": 2.508731293416487e-05, + "loss": 84.5405, + "step": 119080 + }, + { + "epoch": 0.4811386692631213, + "grad_norm": 621.9979858398438, + "learning_rate": 2.5084612209234177e-05, + "loss": 53.6745, + "step": 119090 + }, + { + "epoch": 0.48117907052848896, + "grad_norm": 946.2128295898438, + "learning_rate": 2.5081911385176345e-05, + "loss": 130.1265, + "step": 119100 + }, + { + "epoch": 0.4812194717938566, + "grad_norm": 902.2601928710938, + "learning_rate": 2.5079210462044038e-05, + "loss": 60.8367, + "step": 119110 + }, + { + "epoch": 0.48125987305922424, + "grad_norm": 734.294677734375, + "learning_rate": 2.5076509439889906e-05, + "loss": 65.5126, + "step": 119120 + }, + { + "epoch": 0.4813002743245918, + "grad_norm": 347.2582702636719, + "learning_rate": 2.5073808318766615e-05, + "loss": 66.9212, + "step": 119130 + }, + { + "epoch": 0.48134067558995947, + "grad_norm": 831.12939453125, + "learning_rate": 2.5071107098726813e-05, + "loss": 82.6807, + "step": 119140 + }, + { + "epoch": 0.4813810768553271, + "grad_norm": 493.5893859863281, + "learning_rate": 2.506840577982317e-05, + "loss": 84.1436, + "step": 119150 + }, + { + "epoch": 0.48142147812069475, + "grad_norm": 421.4710388183594, + "learning_rate": 2.5065704362108347e-05, + "loss": 78.1458, + "step": 119160 + }, + { + "epoch": 0.4814618793860624, + "grad_norm": 2773.160888671875, + "learning_rate": 2.506300284563501e-05, + "loss": 104.7359, + "step": 119170 + }, + { + "epoch": 0.48150228065143, + "grad_norm": 527.6319580078125, + "learning_rate": 2.5060301230455822e-05, + "loss": 81.1434, + "step": 119180 + }, + { + "epoch": 0.4815426819167976, + "grad_norm": 435.5665283203125, + "learning_rate": 2.505759951662346e-05, + "loss": 105.8747, + "step": 119190 + }, + { + "epoch": 0.48158308318216525, + "grad_norm": 1869.313232421875, + "learning_rate": 2.505489770419059e-05, + "loss": 76.5178, + "step": 119200 + }, + { + "epoch": 0.4816234844475329, + "grad_norm": 838.8035278320312, + "learning_rate": 2.5052195793209885e-05, + "loss": 74.0699, + "step": 119210 + }, + { + "epoch": 0.48166388571290053, + "grad_norm": 752.8107299804688, + "learning_rate": 2.5049493783734025e-05, + "loss": 66.3141, + "step": 119220 + }, + { + "epoch": 0.48170428697826817, + "grad_norm": 683.0404052734375, + "learning_rate": 2.5046791675815687e-05, + "loss": 79.4078, + "step": 119230 + }, + { + "epoch": 0.4817446882436358, + "grad_norm": 731.1390380859375, + "learning_rate": 2.5044089469507543e-05, + "loss": 66.5221, + "step": 119240 + }, + { + "epoch": 0.4817850895090034, + "grad_norm": 962.4032592773438, + "learning_rate": 2.504138716486228e-05, + "loss": 132.4291, + "step": 119250 + }, + { + "epoch": 0.48182549077437103, + "grad_norm": 787.0385131835938, + "learning_rate": 2.503868476193257e-05, + "loss": 57.4014, + "step": 119260 + }, + { + "epoch": 0.4818658920397387, + "grad_norm": 627.843505859375, + "learning_rate": 2.5035982260771117e-05, + "loss": 62.0198, + "step": 119270 + }, + { + "epoch": 0.4819062933051063, + "grad_norm": 454.7787170410156, + "learning_rate": 2.5033279661430592e-05, + "loss": 90.8561, + "step": 119280 + }, + { + "epoch": 0.48194669457047395, + "grad_norm": 889.0216064453125, + "learning_rate": 2.5030576963963692e-05, + "loss": 62.8162, + "step": 119290 + }, + { + "epoch": 0.4819870958358416, + "grad_norm": 520.8234252929688, + "learning_rate": 2.50278741684231e-05, + "loss": 98.6169, + "step": 119300 + }, + { + "epoch": 0.48202749710120923, + "grad_norm": 192.20962524414062, + "learning_rate": 2.5025171274861513e-05, + "loss": 98.3019, + "step": 119310 + }, + { + "epoch": 0.4820678983665768, + "grad_norm": 1356.5445556640625, + "learning_rate": 2.5022468283331632e-05, + "loss": 70.7788, + "step": 119320 + }, + { + "epoch": 0.48210829963194446, + "grad_norm": 782.0664672851562, + "learning_rate": 2.501976519388614e-05, + "loss": 123.0526, + "step": 119330 + }, + { + "epoch": 0.4821487008973121, + "grad_norm": 370.16680908203125, + "learning_rate": 2.501706200657774e-05, + "loss": 98.6105, + "step": 119340 + }, + { + "epoch": 0.48218910216267974, + "grad_norm": 1562.607421875, + "learning_rate": 2.5014358721459138e-05, + "loss": 93.2694, + "step": 119350 + }, + { + "epoch": 0.4822295034280474, + "grad_norm": 672.490478515625, + "learning_rate": 2.5011655338583026e-05, + "loss": 94.7193, + "step": 119360 + }, + { + "epoch": 0.482269904693415, + "grad_norm": 770.9610595703125, + "learning_rate": 2.500895185800212e-05, + "loss": 65.9559, + "step": 119370 + }, + { + "epoch": 0.4823103059587826, + "grad_norm": 1041.4803466796875, + "learning_rate": 2.500624827976911e-05, + "loss": 103.6159, + "step": 119380 + }, + { + "epoch": 0.48235070722415024, + "grad_norm": 621.3819580078125, + "learning_rate": 2.5003544603936716e-05, + "loss": 74.4687, + "step": 119390 + }, + { + "epoch": 0.4823911084895179, + "grad_norm": 384.0485534667969, + "learning_rate": 2.5000840830557644e-05, + "loss": 69.3453, + "step": 119400 + }, + { + "epoch": 0.4824315097548855, + "grad_norm": 562.4686279296875, + "learning_rate": 2.4998136959684615e-05, + "loss": 65.5321, + "step": 119410 + }, + { + "epoch": 0.48247191102025316, + "grad_norm": 1170.8536376953125, + "learning_rate": 2.4995432991370318e-05, + "loss": 81.7007, + "step": 119420 + }, + { + "epoch": 0.4825123122856208, + "grad_norm": 323.64208984375, + "learning_rate": 2.4992728925667487e-05, + "loss": 96.1427, + "step": 119430 + }, + { + "epoch": 0.48255271355098844, + "grad_norm": 825.5402221679688, + "learning_rate": 2.4990024762628837e-05, + "loss": 101.34, + "step": 119440 + }, + { + "epoch": 0.48259311481635603, + "grad_norm": 724.6677856445312, + "learning_rate": 2.498732050230708e-05, + "loss": 55.3948, + "step": 119450 + }, + { + "epoch": 0.48263351608172367, + "grad_norm": 540.9104614257812, + "learning_rate": 2.4984616144754954e-05, + "loss": 55.8137, + "step": 119460 + }, + { + "epoch": 0.4826739173470913, + "grad_norm": 482.5035705566406, + "learning_rate": 2.498191169002516e-05, + "loss": 98.8825, + "step": 119470 + }, + { + "epoch": 0.48271431861245895, + "grad_norm": 466.008056640625, + "learning_rate": 2.4979207138170436e-05, + "loss": 97.1662, + "step": 119480 + }, + { + "epoch": 0.4827547198778266, + "grad_norm": 841.44873046875, + "learning_rate": 2.497650248924351e-05, + "loss": 72.8064, + "step": 119490 + }, + { + "epoch": 0.48279512114319423, + "grad_norm": 531.578857421875, + "learning_rate": 2.4973797743297103e-05, + "loss": 70.5512, + "step": 119500 + }, + { + "epoch": 0.4828355224085618, + "grad_norm": 788.2471923828125, + "learning_rate": 2.4971092900383945e-05, + "loss": 45.3882, + "step": 119510 + }, + { + "epoch": 0.48287592367392945, + "grad_norm": 1095.9281005859375, + "learning_rate": 2.496838796055677e-05, + "loss": 116.2725, + "step": 119520 + }, + { + "epoch": 0.4829163249392971, + "grad_norm": 890.851318359375, + "learning_rate": 2.4965682923868317e-05, + "loss": 116.0288, + "step": 119530 + }, + { + "epoch": 0.48295672620466473, + "grad_norm": 292.2893981933594, + "learning_rate": 2.496297779037132e-05, + "loss": 66.161, + "step": 119540 + }, + { + "epoch": 0.4829971274700324, + "grad_norm": 474.9371643066406, + "learning_rate": 2.4960272560118513e-05, + "loss": 83.4205, + "step": 119550 + }, + { + "epoch": 0.4830375287354, + "grad_norm": 920.8817749023438, + "learning_rate": 2.495756723316264e-05, + "loss": 96.1001, + "step": 119560 + }, + { + "epoch": 0.4830779300007676, + "grad_norm": 824.1600952148438, + "learning_rate": 2.4954861809556446e-05, + "loss": 62.5382, + "step": 119570 + }, + { + "epoch": 0.48311833126613524, + "grad_norm": 289.04541015625, + "learning_rate": 2.4952156289352666e-05, + "loss": 76.438, + "step": 119580 + }, + { + "epoch": 0.4831587325315029, + "grad_norm": 674.1458129882812, + "learning_rate": 2.494945067260405e-05, + "loss": 58.4398, + "step": 119590 + }, + { + "epoch": 0.4831991337968705, + "grad_norm": 983.1885986328125, + "learning_rate": 2.4946744959363343e-05, + "loss": 74.4203, + "step": 119600 + }, + { + "epoch": 0.48323953506223816, + "grad_norm": 419.6803894042969, + "learning_rate": 2.4944039149683296e-05, + "loss": 68.0925, + "step": 119610 + }, + { + "epoch": 0.4832799363276058, + "grad_norm": 819.8699951171875, + "learning_rate": 2.4941333243616662e-05, + "loss": 65.1013, + "step": 119620 + }, + { + "epoch": 0.48332033759297344, + "grad_norm": 1057.6195068359375, + "learning_rate": 2.4938627241216192e-05, + "loss": 88.7305, + "step": 119630 + }, + { + "epoch": 0.483360738858341, + "grad_norm": 562.9495239257812, + "learning_rate": 2.4935921142534642e-05, + "loss": 76.6933, + "step": 119640 + }, + { + "epoch": 0.48340114012370866, + "grad_norm": 547.6692504882812, + "learning_rate": 2.4933214947624773e-05, + "loss": 69.52, + "step": 119650 + }, + { + "epoch": 0.4834415413890763, + "grad_norm": 700.7909545898438, + "learning_rate": 2.493050865653933e-05, + "loss": 69.1788, + "step": 119660 + }, + { + "epoch": 0.48348194265444394, + "grad_norm": 880.1427001953125, + "learning_rate": 2.492780226933109e-05, + "loss": 68.2944, + "step": 119670 + }, + { + "epoch": 0.4835223439198116, + "grad_norm": 679.2254028320312, + "learning_rate": 2.4925095786052805e-05, + "loss": 42.7495, + "step": 119680 + }, + { + "epoch": 0.4835627451851792, + "grad_norm": 634.0556030273438, + "learning_rate": 2.492238920675724e-05, + "loss": 115.5892, + "step": 119690 + }, + { + "epoch": 0.4836031464505468, + "grad_norm": 362.93231201171875, + "learning_rate": 2.4919682531497164e-05, + "loss": 73.4358, + "step": 119700 + }, + { + "epoch": 0.48364354771591445, + "grad_norm": 684.0317993164062, + "learning_rate": 2.4916975760325343e-05, + "loss": 93.6979, + "step": 119710 + }, + { + "epoch": 0.4836839489812821, + "grad_norm": 984.5344848632812, + "learning_rate": 2.4914268893294548e-05, + "loss": 80.8495, + "step": 119720 + }, + { + "epoch": 0.4837243502466497, + "grad_norm": 868.182373046875, + "learning_rate": 2.491156193045755e-05, + "loss": 83.8891, + "step": 119730 + }, + { + "epoch": 0.48376475151201737, + "grad_norm": 543.0606689453125, + "learning_rate": 2.490885487186713e-05, + "loss": 90.3241, + "step": 119740 + }, + { + "epoch": 0.483805152777385, + "grad_norm": 667.4617919921875, + "learning_rate": 2.4906147717576054e-05, + "loss": 56.8515, + "step": 119750 + }, + { + "epoch": 0.48384555404275265, + "grad_norm": 1212.9775390625, + "learning_rate": 2.4903440467637104e-05, + "loss": 80.0081, + "step": 119760 + }, + { + "epoch": 0.48388595530812023, + "grad_norm": 854.0977172851562, + "learning_rate": 2.4900733122103053e-05, + "loss": 79.3506, + "step": 119770 + }, + { + "epoch": 0.48392635657348787, + "grad_norm": 932.5455932617188, + "learning_rate": 2.4898025681026693e-05, + "loss": 73.3493, + "step": 119780 + }, + { + "epoch": 0.4839667578388555, + "grad_norm": 1096.927001953125, + "learning_rate": 2.4895318144460793e-05, + "loss": 63.3743, + "step": 119790 + }, + { + "epoch": 0.48400715910422315, + "grad_norm": 751.5577392578125, + "learning_rate": 2.489261051245815e-05, + "loss": 64.1132, + "step": 119800 + }, + { + "epoch": 0.4840475603695908, + "grad_norm": 611.0308837890625, + "learning_rate": 2.488990278507154e-05, + "loss": 82.1647, + "step": 119810 + }, + { + "epoch": 0.48408796163495843, + "grad_norm": 930.701904296875, + "learning_rate": 2.4887194962353767e-05, + "loss": 78.3403, + "step": 119820 + }, + { + "epoch": 0.484128362900326, + "grad_norm": 467.8174743652344, + "learning_rate": 2.4884487044357608e-05, + "loss": 77.0122, + "step": 119830 + }, + { + "epoch": 0.48416876416569365, + "grad_norm": 962.4681396484375, + "learning_rate": 2.4881779031135863e-05, + "loss": 64.838, + "step": 119840 + }, + { + "epoch": 0.4842091654310613, + "grad_norm": 907.8468627929688, + "learning_rate": 2.4879070922741316e-05, + "loss": 90.7583, + "step": 119850 + }, + { + "epoch": 0.48424956669642893, + "grad_norm": 992.3377685546875, + "learning_rate": 2.4876362719226776e-05, + "loss": 78.8603, + "step": 119860 + }, + { + "epoch": 0.4842899679617966, + "grad_norm": 898.1716918945312, + "learning_rate": 2.487365442064503e-05, + "loss": 82.3782, + "step": 119870 + }, + { + "epoch": 0.4843303692271642, + "grad_norm": 849.2183837890625, + "learning_rate": 2.4870946027048878e-05, + "loss": 84.2089, + "step": 119880 + }, + { + "epoch": 0.4843707704925318, + "grad_norm": 584.0562744140625, + "learning_rate": 2.4868237538491128e-05, + "loss": 80.4826, + "step": 119890 + }, + { + "epoch": 0.48441117175789944, + "grad_norm": 978.0103759765625, + "learning_rate": 2.4865528955024587e-05, + "loss": 73.077, + "step": 119900 + }, + { + "epoch": 0.4844515730232671, + "grad_norm": 534.3222045898438, + "learning_rate": 2.4862820276702052e-05, + "loss": 89.2528, + "step": 119910 + }, + { + "epoch": 0.4844919742886347, + "grad_norm": 1046.9949951171875, + "learning_rate": 2.4860111503576326e-05, + "loss": 99.6296, + "step": 119920 + }, + { + "epoch": 0.48453237555400236, + "grad_norm": 599.7755126953125, + "learning_rate": 2.4857402635700232e-05, + "loss": 49.2895, + "step": 119930 + }, + { + "epoch": 0.48457277681937, + "grad_norm": 559.5242309570312, + "learning_rate": 2.4854693673126572e-05, + "loss": 80.8889, + "step": 119940 + }, + { + "epoch": 0.48461317808473764, + "grad_norm": 342.4100341796875, + "learning_rate": 2.485198461590815e-05, + "loss": 75.0802, + "step": 119950 + }, + { + "epoch": 0.4846535793501052, + "grad_norm": 725.5858154296875, + "learning_rate": 2.4849275464097796e-05, + "loss": 69.5807, + "step": 119960 + }, + { + "epoch": 0.48469398061547286, + "grad_norm": 600.6963500976562, + "learning_rate": 2.484656621774832e-05, + "loss": 53.3738, + "step": 119970 + }, + { + "epoch": 0.4847343818808405, + "grad_norm": 1060.8973388671875, + "learning_rate": 2.484385687691254e-05, + "loss": 90.2675, + "step": 119980 + }, + { + "epoch": 0.48477478314620814, + "grad_norm": 1650.609375, + "learning_rate": 2.484114744164328e-05, + "loss": 110.1788, + "step": 119990 + }, + { + "epoch": 0.4848151844115758, + "grad_norm": 394.85247802734375, + "learning_rate": 2.4838437911993355e-05, + "loss": 42.4243, + "step": 120000 + }, + { + "epoch": 0.4848555856769434, + "grad_norm": 1380.049560546875, + "learning_rate": 2.4835728288015597e-05, + "loss": 66.3422, + "step": 120010 + }, + { + "epoch": 0.484895986942311, + "grad_norm": 1672.320068359375, + "learning_rate": 2.483301856976282e-05, + "loss": 76.0911, + "step": 120020 + }, + { + "epoch": 0.48493638820767865, + "grad_norm": 1183.8551025390625, + "learning_rate": 2.483030875728786e-05, + "loss": 86.8273, + "step": 120030 + }, + { + "epoch": 0.4849767894730463, + "grad_norm": 1455.19677734375, + "learning_rate": 2.4827598850643548e-05, + "loss": 89.5707, + "step": 120040 + }, + { + "epoch": 0.48501719073841393, + "grad_norm": 536.6494750976562, + "learning_rate": 2.48248888498827e-05, + "loss": 85.2533, + "step": 120050 + }, + { + "epoch": 0.48505759200378157, + "grad_norm": 1177.7908935546875, + "learning_rate": 2.4822178755058167e-05, + "loss": 103.6653, + "step": 120060 + }, + { + "epoch": 0.4850979932691492, + "grad_norm": 849.2089233398438, + "learning_rate": 2.481946856622278e-05, + "loss": 64.7258, + "step": 120070 + }, + { + "epoch": 0.48513839453451685, + "grad_norm": 440.4406433105469, + "learning_rate": 2.481675828342937e-05, + "loss": 99.9779, + "step": 120080 + }, + { + "epoch": 0.48517879579988443, + "grad_norm": 982.1458129882812, + "learning_rate": 2.4814047906730774e-05, + "loss": 63.9195, + "step": 120090 + }, + { + "epoch": 0.4852191970652521, + "grad_norm": 906.9108276367188, + "learning_rate": 2.4811337436179835e-05, + "loss": 73.9092, + "step": 120100 + }, + { + "epoch": 0.4852595983306197, + "grad_norm": 2648.330322265625, + "learning_rate": 2.4808626871829402e-05, + "loss": 73.7301, + "step": 120110 + }, + { + "epoch": 0.48529999959598735, + "grad_norm": 465.84326171875, + "learning_rate": 2.4805916213732308e-05, + "loss": 73.3927, + "step": 120120 + }, + { + "epoch": 0.485340400861355, + "grad_norm": 1703.0106201171875, + "learning_rate": 2.4803205461941402e-05, + "loss": 88.9226, + "step": 120130 + }, + { + "epoch": 0.48538080212672263, + "grad_norm": 565.7413330078125, + "learning_rate": 2.4800494616509533e-05, + "loss": 73.7021, + "step": 120140 + }, + { + "epoch": 0.4854212033920902, + "grad_norm": 1347.8641357421875, + "learning_rate": 2.4797783677489554e-05, + "loss": 107.6501, + "step": 120150 + }, + { + "epoch": 0.48546160465745786, + "grad_norm": 1171.96923828125, + "learning_rate": 2.4795072644934307e-05, + "loss": 71.3713, + "step": 120160 + }, + { + "epoch": 0.4855020059228255, + "grad_norm": 504.4379577636719, + "learning_rate": 2.4792361518896653e-05, + "loss": 62.2898, + "step": 120170 + }, + { + "epoch": 0.48554240718819314, + "grad_norm": 811.8753051757812, + "learning_rate": 2.4789650299429444e-05, + "loss": 67.0998, + "step": 120180 + }, + { + "epoch": 0.4855828084535608, + "grad_norm": 561.9525756835938, + "learning_rate": 2.4786938986585537e-05, + "loss": 81.9662, + "step": 120190 + }, + { + "epoch": 0.4856232097189284, + "grad_norm": 503.3394775390625, + "learning_rate": 2.478422758041779e-05, + "loss": 67.5309, + "step": 120200 + }, + { + "epoch": 0.485663610984296, + "grad_norm": 686.9485473632812, + "learning_rate": 2.478151608097906e-05, + "loss": 63.1134, + "step": 120210 + }, + { + "epoch": 0.48570401224966364, + "grad_norm": 667.7425537109375, + "learning_rate": 2.4778804488322214e-05, + "loss": 77.1578, + "step": 120220 + }, + { + "epoch": 0.4857444135150313, + "grad_norm": 610.60693359375, + "learning_rate": 2.4776092802500115e-05, + "loss": 52.4865, + "step": 120230 + }, + { + "epoch": 0.4857848147803989, + "grad_norm": 889.573486328125, + "learning_rate": 2.477338102356563e-05, + "loss": 84.5018, + "step": 120240 + }, + { + "epoch": 0.48582521604576656, + "grad_norm": 1107.960205078125, + "learning_rate": 2.477066915157162e-05, + "loss": 66.816, + "step": 120250 + }, + { + "epoch": 0.4858656173111342, + "grad_norm": 2395.661376953125, + "learning_rate": 2.4767957186570963e-05, + "loss": 81.6822, + "step": 120260 + }, + { + "epoch": 0.48590601857650184, + "grad_norm": 774.957763671875, + "learning_rate": 2.4765245128616523e-05, + "loss": 93.5102, + "step": 120270 + }, + { + "epoch": 0.4859464198418694, + "grad_norm": 388.7480163574219, + "learning_rate": 2.476253297776118e-05, + "loss": 50.8794, + "step": 120280 + }, + { + "epoch": 0.48598682110723707, + "grad_norm": 597.4124145507812, + "learning_rate": 2.4759820734057794e-05, + "loss": 49.231, + "step": 120290 + }, + { + "epoch": 0.4860272223726047, + "grad_norm": 708.588134765625, + "learning_rate": 2.4757108397559263e-05, + "loss": 101.0384, + "step": 120300 + }, + { + "epoch": 0.48606762363797235, + "grad_norm": 671.2006225585938, + "learning_rate": 2.475439596831845e-05, + "loss": 82.0845, + "step": 120310 + }, + { + "epoch": 0.48610802490334, + "grad_norm": 1304.120849609375, + "learning_rate": 2.4751683446388243e-05, + "loss": 110.8464, + "step": 120320 + }, + { + "epoch": 0.4861484261687076, + "grad_norm": 815.2864379882812, + "learning_rate": 2.474897083182152e-05, + "loss": 100.6553, + "step": 120330 + }, + { + "epoch": 0.4861888274340752, + "grad_norm": 573.0238037109375, + "learning_rate": 2.474625812467116e-05, + "loss": 79.3459, + "step": 120340 + }, + { + "epoch": 0.48622922869944285, + "grad_norm": 584.3265991210938, + "learning_rate": 2.4743545324990063e-05, + "loss": 74.8363, + "step": 120350 + }, + { + "epoch": 0.4862696299648105, + "grad_norm": 466.3558654785156, + "learning_rate": 2.4740832432831105e-05, + "loss": 58.1531, + "step": 120360 + }, + { + "epoch": 0.48631003123017813, + "grad_norm": 1099.5382080078125, + "learning_rate": 2.4738119448247178e-05, + "loss": 119.2516, + "step": 120370 + }, + { + "epoch": 0.48635043249554577, + "grad_norm": 627.3672485351562, + "learning_rate": 2.473540637129117e-05, + "loss": 111.5759, + "step": 120380 + }, + { + "epoch": 0.4863908337609134, + "grad_norm": 655.34765625, + "learning_rate": 2.473269320201598e-05, + "loss": 67.3804, + "step": 120390 + }, + { + "epoch": 0.48643123502628105, + "grad_norm": 625.1474609375, + "learning_rate": 2.47299799404745e-05, + "loss": 91.1888, + "step": 120400 + }, + { + "epoch": 0.48647163629164863, + "grad_norm": 1379.4129638671875, + "learning_rate": 2.4727266586719617e-05, + "loss": 99.2026, + "step": 120410 + }, + { + "epoch": 0.4865120375570163, + "grad_norm": 733.4869384765625, + "learning_rate": 2.4724553140804247e-05, + "loss": 79.2914, + "step": 120420 + }, + { + "epoch": 0.4865524388223839, + "grad_norm": 1026.6614990234375, + "learning_rate": 2.472183960278128e-05, + "loss": 65.6672, + "step": 120430 + }, + { + "epoch": 0.48659284008775155, + "grad_norm": 889.0552978515625, + "learning_rate": 2.4719125972703624e-05, + "loss": 70.4762, + "step": 120440 + }, + { + "epoch": 0.4866332413531192, + "grad_norm": 699.1395263671875, + "learning_rate": 2.471641225062417e-05, + "loss": 109.5955, + "step": 120450 + }, + { + "epoch": 0.48667364261848683, + "grad_norm": 567.0905151367188, + "learning_rate": 2.471369843659583e-05, + "loss": 45.5922, + "step": 120460 + }, + { + "epoch": 0.4867140438838544, + "grad_norm": 716.1432495117188, + "learning_rate": 2.471098453067152e-05, + "loss": 63.8209, + "step": 120470 + }, + { + "epoch": 0.48675444514922206, + "grad_norm": 886.13427734375, + "learning_rate": 2.4708270532904134e-05, + "loss": 102.914, + "step": 120480 + }, + { + "epoch": 0.4867948464145897, + "grad_norm": 608.5283203125, + "learning_rate": 2.4705556443346588e-05, + "loss": 93.3066, + "step": 120490 + }, + { + "epoch": 0.48683524767995734, + "grad_norm": 651.0779418945312, + "learning_rate": 2.47028422620518e-05, + "loss": 67.4759, + "step": 120500 + }, + { + "epoch": 0.486875648945325, + "grad_norm": 1103.9395751953125, + "learning_rate": 2.4700127989072683e-05, + "loss": 80.4023, + "step": 120510 + }, + { + "epoch": 0.4869160502106926, + "grad_norm": 1109.5986328125, + "learning_rate": 2.469741362446215e-05, + "loss": 80.4784, + "step": 120520 + }, + { + "epoch": 0.4869564514760602, + "grad_norm": 480.8632507324219, + "learning_rate": 2.469469916827312e-05, + "loss": 92.899, + "step": 120530 + }, + { + "epoch": 0.48699685274142784, + "grad_norm": 432.48199462890625, + "learning_rate": 2.4691984620558516e-05, + "loss": 123.7068, + "step": 120540 + }, + { + "epoch": 0.4870372540067955, + "grad_norm": 574.6146240234375, + "learning_rate": 2.4689269981371247e-05, + "loss": 113.2858, + "step": 120550 + }, + { + "epoch": 0.4870776552721631, + "grad_norm": 793.2593994140625, + "learning_rate": 2.468655525076426e-05, + "loss": 93.6491, + "step": 120560 + }, + { + "epoch": 0.48711805653753076, + "grad_norm": 951.4713745117188, + "learning_rate": 2.468384042879045e-05, + "loss": 72.8038, + "step": 120570 + }, + { + "epoch": 0.4871584578028984, + "grad_norm": 906.2860717773438, + "learning_rate": 2.468112551550277e-05, + "loss": 81.6621, + "step": 120580 + }, + { + "epoch": 0.48719885906826604, + "grad_norm": 643.4618530273438, + "learning_rate": 2.467841051095413e-05, + "loss": 85.291, + "step": 120590 + }, + { + "epoch": 0.48723926033363363, + "grad_norm": 578.2364501953125, + "learning_rate": 2.4675695415197476e-05, + "loss": 78.8369, + "step": 120600 + }, + { + "epoch": 0.48727966159900127, + "grad_norm": 652.1512451171875, + "learning_rate": 2.4672980228285734e-05, + "loss": 96.5023, + "step": 120610 + }, + { + "epoch": 0.4873200628643689, + "grad_norm": 1984.2548828125, + "learning_rate": 2.4670264950271828e-05, + "loss": 87.3977, + "step": 120620 + }, + { + "epoch": 0.48736046412973655, + "grad_norm": 991.6414184570312, + "learning_rate": 2.4667549581208707e-05, + "loss": 116.5671, + "step": 120630 + }, + { + "epoch": 0.4874008653951042, + "grad_norm": 1232.9769287109375, + "learning_rate": 2.4664834121149308e-05, + "loss": 92.4171, + "step": 120640 + }, + { + "epoch": 0.48744126666047183, + "grad_norm": 684.319580078125, + "learning_rate": 2.466211857014656e-05, + "loss": 65.8787, + "step": 120650 + }, + { + "epoch": 0.4874816679258394, + "grad_norm": 513.7102661132812, + "learning_rate": 2.4659402928253407e-05, + "loss": 57.9733, + "step": 120660 + }, + { + "epoch": 0.48752206919120705, + "grad_norm": 801.7551879882812, + "learning_rate": 2.4656687195522802e-05, + "loss": 64.851, + "step": 120670 + }, + { + "epoch": 0.4875624704565747, + "grad_norm": 949.32470703125, + "learning_rate": 2.465397137200768e-05, + "loss": 68.4854, + "step": 120680 + }, + { + "epoch": 0.48760287172194233, + "grad_norm": 678.7447509765625, + "learning_rate": 2.4651255457760988e-05, + "loss": 56.274, + "step": 120690 + }, + { + "epoch": 0.48764327298731, + "grad_norm": 1837.3316650390625, + "learning_rate": 2.4648539452835682e-05, + "loss": 88.5154, + "step": 120700 + }, + { + "epoch": 0.4876836742526776, + "grad_norm": 496.2486877441406, + "learning_rate": 2.46458233572847e-05, + "loss": 87.2834, + "step": 120710 + }, + { + "epoch": 0.48772407551804525, + "grad_norm": 580.468505859375, + "learning_rate": 2.464310717116101e-05, + "loss": 70.6984, + "step": 120720 + }, + { + "epoch": 0.48776447678341284, + "grad_norm": 1168.1060791015625, + "learning_rate": 2.4640390894517548e-05, + "loss": 46.5905, + "step": 120730 + }, + { + "epoch": 0.4878048780487805, + "grad_norm": 410.1440734863281, + "learning_rate": 2.4637674527407272e-05, + "loss": 71.0717, + "step": 120740 + }, + { + "epoch": 0.4878452793141481, + "grad_norm": 146.05154418945312, + "learning_rate": 2.4634958069883144e-05, + "loss": 60.9932, + "step": 120750 + }, + { + "epoch": 0.48788568057951576, + "grad_norm": 560.4864501953125, + "learning_rate": 2.463224152199813e-05, + "loss": 63.1173, + "step": 120760 + }, + { + "epoch": 0.4879260818448834, + "grad_norm": 964.2786865234375, + "learning_rate": 2.462952488380518e-05, + "loss": 81.9313, + "step": 120770 + }, + { + "epoch": 0.48796648311025104, + "grad_norm": 1190.0516357421875, + "learning_rate": 2.4626808155357262e-05, + "loss": 127.6782, + "step": 120780 + }, + { + "epoch": 0.4880068843756186, + "grad_norm": 1194.260498046875, + "learning_rate": 2.462409133670733e-05, + "loss": 77.8219, + "step": 120790 + }, + { + "epoch": 0.48804728564098626, + "grad_norm": 681.9465942382812, + "learning_rate": 2.4621374427908363e-05, + "loss": 58.4191, + "step": 120800 + }, + { + "epoch": 0.4880876869063539, + "grad_norm": 392.9099426269531, + "learning_rate": 2.461865742901332e-05, + "loss": 45.7737, + "step": 120810 + }, + { + "epoch": 0.48812808817172154, + "grad_norm": 794.9099731445312, + "learning_rate": 2.461594034007517e-05, + "loss": 67.6726, + "step": 120820 + }, + { + "epoch": 0.4881684894370892, + "grad_norm": 868.7100830078125, + "learning_rate": 2.4613223161146883e-05, + "loss": 109.3214, + "step": 120830 + }, + { + "epoch": 0.4882088907024568, + "grad_norm": 484.81866455078125, + "learning_rate": 2.4610505892281445e-05, + "loss": 72.5817, + "step": 120840 + }, + { + "epoch": 0.4882492919678244, + "grad_norm": 511.8673400878906, + "learning_rate": 2.4607788533531815e-05, + "loss": 85.1669, + "step": 120850 + }, + { + "epoch": 0.48828969323319205, + "grad_norm": 652.2692260742188, + "learning_rate": 2.4605071084950974e-05, + "loss": 84.5282, + "step": 120860 + }, + { + "epoch": 0.4883300944985597, + "grad_norm": 798.8328247070312, + "learning_rate": 2.4602353546591907e-05, + "loss": 107.3094, + "step": 120870 + }, + { + "epoch": 0.4883704957639273, + "grad_norm": 647.73583984375, + "learning_rate": 2.459963591850759e-05, + "loss": 82.0566, + "step": 120880 + }, + { + "epoch": 0.48841089702929497, + "grad_norm": 566.8096923828125, + "learning_rate": 2.4596918200750994e-05, + "loss": 104.3471, + "step": 120890 + }, + { + "epoch": 0.4884512982946626, + "grad_norm": 328.6711730957031, + "learning_rate": 2.4594200393375114e-05, + "loss": 64.374, + "step": 120900 + }, + { + "epoch": 0.48849169956003025, + "grad_norm": 647.3331909179688, + "learning_rate": 2.4591482496432933e-05, + "loss": 53.9235, + "step": 120910 + }, + { + "epoch": 0.48853210082539783, + "grad_norm": 1029.5423583984375, + "learning_rate": 2.458876450997744e-05, + "loss": 92.8734, + "step": 120920 + }, + { + "epoch": 0.48857250209076547, + "grad_norm": 430.05731201171875, + "learning_rate": 2.4586046434061618e-05, + "loss": 77.7034, + "step": 120930 + }, + { + "epoch": 0.4886129033561331, + "grad_norm": 676.9320678710938, + "learning_rate": 2.4583328268738457e-05, + "loss": 76.6953, + "step": 120940 + }, + { + "epoch": 0.48865330462150075, + "grad_norm": 560.4332885742188, + "learning_rate": 2.4580610014060957e-05, + "loss": 57.8157, + "step": 120950 + }, + { + "epoch": 0.4886937058868684, + "grad_norm": 344.0078430175781, + "learning_rate": 2.4577891670082102e-05, + "loss": 45.098, + "step": 120960 + }, + { + "epoch": 0.48873410715223603, + "grad_norm": 597.7210083007812, + "learning_rate": 2.4575173236854896e-05, + "loss": 58.3158, + "step": 120970 + }, + { + "epoch": 0.4887745084176036, + "grad_norm": 625.5091552734375, + "learning_rate": 2.4572454714432324e-05, + "loss": 103.9133, + "step": 120980 + }, + { + "epoch": 0.48881490968297125, + "grad_norm": 1625.877197265625, + "learning_rate": 2.45697361028674e-05, + "loss": 132.6655, + "step": 120990 + }, + { + "epoch": 0.4888553109483389, + "grad_norm": 577.2813720703125, + "learning_rate": 2.4567017402213118e-05, + "loss": 73.4132, + "step": 121000 + }, + { + "epoch": 0.48889571221370653, + "grad_norm": 881.597412109375, + "learning_rate": 2.456429861252248e-05, + "loss": 98.0588, + "step": 121010 + }, + { + "epoch": 0.4889361134790742, + "grad_norm": 548.3545532226562, + "learning_rate": 2.4561579733848486e-05, + "loss": 57.5114, + "step": 121020 + }, + { + "epoch": 0.4889765147444418, + "grad_norm": 991.5888671875, + "learning_rate": 2.455886076624415e-05, + "loss": 89.6205, + "step": 121030 + }, + { + "epoch": 0.48901691600980945, + "grad_norm": 747.1013793945312, + "learning_rate": 2.4556141709762482e-05, + "loss": 63.0639, + "step": 121040 + }, + { + "epoch": 0.48905731727517704, + "grad_norm": 627.502685546875, + "learning_rate": 2.4553422564456483e-05, + "loss": 119.9385, + "step": 121050 + }, + { + "epoch": 0.4890977185405447, + "grad_norm": 877.8662109375, + "learning_rate": 2.4550703330379167e-05, + "loss": 99.0592, + "step": 121060 + }, + { + "epoch": 0.4891381198059123, + "grad_norm": 730.1051635742188, + "learning_rate": 2.454798400758354e-05, + "loss": 83.3637, + "step": 121070 + }, + { + "epoch": 0.48917852107127996, + "grad_norm": 765.2272338867188, + "learning_rate": 2.4545264596122627e-05, + "loss": 84.6825, + "step": 121080 + }, + { + "epoch": 0.4892189223366476, + "grad_norm": 431.79608154296875, + "learning_rate": 2.4542545096049444e-05, + "loss": 95.7066, + "step": 121090 + }, + { + "epoch": 0.48925932360201524, + "grad_norm": 1458.674072265625, + "learning_rate": 2.4539825507417005e-05, + "loss": 119.3779, + "step": 121100 + }, + { + "epoch": 0.4892997248673828, + "grad_norm": 1049.220947265625, + "learning_rate": 2.4537105830278323e-05, + "loss": 112.5317, + "step": 121110 + }, + { + "epoch": 0.48934012613275046, + "grad_norm": 802.5245971679688, + "learning_rate": 2.4534386064686438e-05, + "loss": 97.5475, + "step": 121120 + }, + { + "epoch": 0.4893805273981181, + "grad_norm": 516.5579833984375, + "learning_rate": 2.453166621069435e-05, + "loss": 77.0891, + "step": 121130 + }, + { + "epoch": 0.48942092866348574, + "grad_norm": 583.3533325195312, + "learning_rate": 2.452894626835511e-05, + "loss": 71.0375, + "step": 121140 + }, + { + "epoch": 0.4894613299288534, + "grad_norm": 823.4742431640625, + "learning_rate": 2.452622623772172e-05, + "loss": 75.3994, + "step": 121150 + }, + { + "epoch": 0.489501731194221, + "grad_norm": 581.3960571289062, + "learning_rate": 2.4523506118847223e-05, + "loss": 93.999, + "step": 121160 + }, + { + "epoch": 0.4895421324595886, + "grad_norm": 370.5791320800781, + "learning_rate": 2.4520785911784648e-05, + "loss": 96.9688, + "step": 121170 + }, + { + "epoch": 0.48958253372495625, + "grad_norm": 273.726318359375, + "learning_rate": 2.4518065616587018e-05, + "loss": 72.6316, + "step": 121180 + }, + { + "epoch": 0.4896229349903239, + "grad_norm": 608.4071655273438, + "learning_rate": 2.4515345233307377e-05, + "loss": 65.3768, + "step": 121190 + }, + { + "epoch": 0.48966333625569153, + "grad_norm": 667.296875, + "learning_rate": 2.4512624761998753e-05, + "loss": 68.5628, + "step": 121200 + }, + { + "epoch": 0.48970373752105917, + "grad_norm": 543.4629516601562, + "learning_rate": 2.4509904202714193e-05, + "loss": 88.8893, + "step": 121210 + }, + { + "epoch": 0.4897441387864268, + "grad_norm": 855.2021484375, + "learning_rate": 2.4507183555506723e-05, + "loss": 81.2798, + "step": 121220 + }, + { + "epoch": 0.48978454005179445, + "grad_norm": 317.741455078125, + "learning_rate": 2.450446282042939e-05, + "loss": 47.5482, + "step": 121230 + }, + { + "epoch": 0.48982494131716203, + "grad_norm": 840.0529174804688, + "learning_rate": 2.4501741997535234e-05, + "loss": 74.3702, + "step": 121240 + }, + { + "epoch": 0.4898653425825297, + "grad_norm": 393.52764892578125, + "learning_rate": 2.4499021086877304e-05, + "loss": 49.4519, + "step": 121250 + }, + { + "epoch": 0.4899057438478973, + "grad_norm": 816.437255859375, + "learning_rate": 2.449630008850864e-05, + "loss": 80.8295, + "step": 121260 + }, + { + "epoch": 0.48994614511326495, + "grad_norm": 2022.5732421875, + "learning_rate": 2.4493579002482286e-05, + "loss": 77.5058, + "step": 121270 + }, + { + "epoch": 0.4899865463786326, + "grad_norm": 1041.41748046875, + "learning_rate": 2.44908578288513e-05, + "loss": 94.6453, + "step": 121280 + }, + { + "epoch": 0.49002694764400023, + "grad_norm": 645.1863403320312, + "learning_rate": 2.448813656766873e-05, + "loss": 116.0168, + "step": 121290 + }, + { + "epoch": 0.4900673489093678, + "grad_norm": 701.5032958984375, + "learning_rate": 2.4485415218987628e-05, + "loss": 73.178, + "step": 121300 + }, + { + "epoch": 0.49010775017473546, + "grad_norm": 1272.55859375, + "learning_rate": 2.4482693782861046e-05, + "loss": 112.2181, + "step": 121310 + }, + { + "epoch": 0.4901481514401031, + "grad_norm": 705.1486206054688, + "learning_rate": 2.4479972259342037e-05, + "loss": 53.1469, + "step": 121320 + }, + { + "epoch": 0.49018855270547074, + "grad_norm": 358.90643310546875, + "learning_rate": 2.4477250648483664e-05, + "loss": 72.6494, + "step": 121330 + }, + { + "epoch": 0.4902289539708384, + "grad_norm": 708.568115234375, + "learning_rate": 2.447452895033899e-05, + "loss": 85.7438, + "step": 121340 + }, + { + "epoch": 0.490269355236206, + "grad_norm": 805.8172607421875, + "learning_rate": 2.447180716496106e-05, + "loss": 78.9247, + "step": 121350 + }, + { + "epoch": 0.49030975650157366, + "grad_norm": 621.662841796875, + "learning_rate": 2.4469085292402954e-05, + "loss": 79.9817, + "step": 121360 + }, + { + "epoch": 0.49035015776694124, + "grad_norm": 382.5184326171875, + "learning_rate": 2.446636333271773e-05, + "loss": 84.828, + "step": 121370 + }, + { + "epoch": 0.4903905590323089, + "grad_norm": 724.5690307617188, + "learning_rate": 2.446364128595845e-05, + "loss": 77.5322, + "step": 121380 + }, + { + "epoch": 0.4904309602976765, + "grad_norm": 678.5905151367188, + "learning_rate": 2.446091915217819e-05, + "loss": 60.7944, + "step": 121390 + }, + { + "epoch": 0.49047136156304416, + "grad_norm": 3606.58935546875, + "learning_rate": 2.445819693143001e-05, + "loss": 73.9069, + "step": 121400 + }, + { + "epoch": 0.4905117628284118, + "grad_norm": 1105.2510986328125, + "learning_rate": 2.4455474623766987e-05, + "loss": 86.7892, + "step": 121410 + }, + { + "epoch": 0.49055216409377944, + "grad_norm": 578.3053588867188, + "learning_rate": 2.4452752229242192e-05, + "loss": 68.6698, + "step": 121420 + }, + { + "epoch": 0.490592565359147, + "grad_norm": 1125.4365234375, + "learning_rate": 2.44500297479087e-05, + "loss": 90.041, + "step": 121430 + }, + { + "epoch": 0.49063296662451467, + "grad_norm": 773.282958984375, + "learning_rate": 2.4447307179819584e-05, + "loss": 86.3008, + "step": 121440 + }, + { + "epoch": 0.4906733678898823, + "grad_norm": 703.2347412109375, + "learning_rate": 2.4444584525027934e-05, + "loss": 74.4815, + "step": 121450 + }, + { + "epoch": 0.49071376915524995, + "grad_norm": 500.7328796386719, + "learning_rate": 2.4441861783586812e-05, + "loss": 77.327, + "step": 121460 + }, + { + "epoch": 0.4907541704206176, + "grad_norm": 728.6350708007812, + "learning_rate": 2.4439138955549308e-05, + "loss": 66.0806, + "step": 121470 + }, + { + "epoch": 0.4907945716859852, + "grad_norm": 935.1435546875, + "learning_rate": 2.443641604096851e-05, + "loss": 105.1448, + "step": 121480 + }, + { + "epoch": 0.4908349729513528, + "grad_norm": 371.174072265625, + "learning_rate": 2.44336930398975e-05, + "loss": 67.5273, + "step": 121490 + }, + { + "epoch": 0.49087537421672045, + "grad_norm": 598.9290161132812, + "learning_rate": 2.443096995238935e-05, + "loss": 66.7699, + "step": 121500 + }, + { + "epoch": 0.4909157754820881, + "grad_norm": 331.3316650390625, + "learning_rate": 2.4428246778497164e-05, + "loss": 51.9296, + "step": 121510 + }, + { + "epoch": 0.49095617674745573, + "grad_norm": 2541.25830078125, + "learning_rate": 2.4425523518274027e-05, + "loss": 110.2823, + "step": 121520 + }, + { + "epoch": 0.49099657801282337, + "grad_norm": 615.3324584960938, + "learning_rate": 2.4422800171773033e-05, + "loss": 104.0641, + "step": 121530 + }, + { + "epoch": 0.491036979278191, + "grad_norm": 1419.2388916015625, + "learning_rate": 2.4420076739047273e-05, + "loss": 83.7779, + "step": 121540 + }, + { + "epoch": 0.49107738054355865, + "grad_norm": 617.3104248046875, + "learning_rate": 2.441735322014984e-05, + "loss": 44.8537, + "step": 121550 + }, + { + "epoch": 0.49111778180892623, + "grad_norm": 616.0048828125, + "learning_rate": 2.4414629615133833e-05, + "loss": 72.6817, + "step": 121560 + }, + { + "epoch": 0.4911581830742939, + "grad_norm": 673.1790771484375, + "learning_rate": 2.4411905924052345e-05, + "loss": 92.7128, + "step": 121570 + }, + { + "epoch": 0.4911985843396615, + "grad_norm": 570.7882690429688, + "learning_rate": 2.440918214695849e-05, + "loss": 79.0064, + "step": 121580 + }, + { + "epoch": 0.49123898560502915, + "grad_norm": 966.1315307617188, + "learning_rate": 2.4406458283905345e-05, + "loss": 133.7895, + "step": 121590 + }, + { + "epoch": 0.4912793868703968, + "grad_norm": 777.6692504882812, + "learning_rate": 2.440373433494603e-05, + "loss": 92.0611, + "step": 121600 + }, + { + "epoch": 0.49131978813576443, + "grad_norm": 869.7113037109375, + "learning_rate": 2.4401010300133653e-05, + "loss": 106.9064, + "step": 121610 + }, + { + "epoch": 0.491360189401132, + "grad_norm": 569.958740234375, + "learning_rate": 2.4398286179521313e-05, + "loss": 101.441, + "step": 121620 + }, + { + "epoch": 0.49140059066649966, + "grad_norm": 546.1995239257812, + "learning_rate": 2.4395561973162117e-05, + "loss": 93.8264, + "step": 121630 + }, + { + "epoch": 0.4914409919318673, + "grad_norm": 231.9970245361328, + "learning_rate": 2.4392837681109177e-05, + "loss": 56.4772, + "step": 121640 + }, + { + "epoch": 0.49148139319723494, + "grad_norm": 579.3296508789062, + "learning_rate": 2.4390113303415612e-05, + "loss": 75.0745, + "step": 121650 + }, + { + "epoch": 0.4915217944626026, + "grad_norm": 1544.556396484375, + "learning_rate": 2.438738884013452e-05, + "loss": 71.4724, + "step": 121660 + }, + { + "epoch": 0.4915621957279702, + "grad_norm": 457.5162048339844, + "learning_rate": 2.4384664291319027e-05, + "loss": 53.8712, + "step": 121670 + }, + { + "epoch": 0.4916025969933378, + "grad_norm": 4491.1357421875, + "learning_rate": 2.4381939657022245e-05, + "loss": 90.5687, + "step": 121680 + }, + { + "epoch": 0.49164299825870544, + "grad_norm": 251.72425842285156, + "learning_rate": 2.4379214937297295e-05, + "loss": 60.9057, + "step": 121690 + }, + { + "epoch": 0.4916833995240731, + "grad_norm": 527.4282836914062, + "learning_rate": 2.4376490132197295e-05, + "loss": 105.9585, + "step": 121700 + }, + { + "epoch": 0.4917238007894407, + "grad_norm": 651.80419921875, + "learning_rate": 2.4373765241775368e-05, + "loss": 50.6985, + "step": 121710 + }, + { + "epoch": 0.49176420205480836, + "grad_norm": 1315.571533203125, + "learning_rate": 2.437104026608463e-05, + "loss": 86.1721, + "step": 121720 + }, + { + "epoch": 0.491804603320176, + "grad_norm": 408.9228515625, + "learning_rate": 2.4368315205178217e-05, + "loss": 44.6121, + "step": 121730 + }, + { + "epoch": 0.49184500458554364, + "grad_norm": 1105.741455078125, + "learning_rate": 2.436559005910925e-05, + "loss": 98.0749, + "step": 121740 + }, + { + "epoch": 0.49188540585091123, + "grad_norm": 1028.651123046875, + "learning_rate": 2.4362864827930855e-05, + "loss": 85.9224, + "step": 121750 + }, + { + "epoch": 0.49192580711627887, + "grad_norm": 446.1821594238281, + "learning_rate": 2.4360139511696165e-05, + "loss": 104.5727, + "step": 121760 + }, + { + "epoch": 0.4919662083816465, + "grad_norm": 560.9019775390625, + "learning_rate": 2.4357414110458307e-05, + "loss": 59.7036, + "step": 121770 + }, + { + "epoch": 0.49200660964701415, + "grad_norm": 604.6292114257812, + "learning_rate": 2.4354688624270427e-05, + "loss": 59.3963, + "step": 121780 + }, + { + "epoch": 0.4920470109123818, + "grad_norm": 836.2964477539062, + "learning_rate": 2.4351963053185638e-05, + "loss": 87.6908, + "step": 121790 + }, + { + "epoch": 0.49208741217774943, + "grad_norm": 746.4942016601562, + "learning_rate": 2.43492373972571e-05, + "loss": 61.8183, + "step": 121800 + }, + { + "epoch": 0.492127813443117, + "grad_norm": 1046.4361572265625, + "learning_rate": 2.434651165653793e-05, + "loss": 142.85, + "step": 121810 + }, + { + "epoch": 0.49216821470848465, + "grad_norm": 862.5107421875, + "learning_rate": 2.4343785831081285e-05, + "loss": 57.9469, + "step": 121820 + }, + { + "epoch": 0.4922086159738523, + "grad_norm": 542.1762084960938, + "learning_rate": 2.43410599209403e-05, + "loss": 60.6474, + "step": 121830 + }, + { + "epoch": 0.49224901723921993, + "grad_norm": 624.1466064453125, + "learning_rate": 2.4338333926168112e-05, + "loss": 49.1437, + "step": 121840 + }, + { + "epoch": 0.4922894185045876, + "grad_norm": 812.0460205078125, + "learning_rate": 2.433560784681787e-05, + "loss": 89.3206, + "step": 121850 + }, + { + "epoch": 0.4923298197699552, + "grad_norm": 800.0659790039062, + "learning_rate": 2.4332881682942723e-05, + "loss": 81.1719, + "step": 121860 + }, + { + "epoch": 0.49237022103532285, + "grad_norm": 363.2164306640625, + "learning_rate": 2.4330155434595817e-05, + "loss": 71.2778, + "step": 121870 + }, + { + "epoch": 0.49241062230069044, + "grad_norm": 739.5545654296875, + "learning_rate": 2.4327429101830297e-05, + "loss": 95.4023, + "step": 121880 + }, + { + "epoch": 0.4924510235660581, + "grad_norm": 1234.7591552734375, + "learning_rate": 2.4324702684699323e-05, + "loss": 95.8032, + "step": 121890 + }, + { + "epoch": 0.4924914248314257, + "grad_norm": 732.7054443359375, + "learning_rate": 2.4321976183256045e-05, + "loss": 66.7003, + "step": 121900 + }, + { + "epoch": 0.49253182609679336, + "grad_norm": 288.765380859375, + "learning_rate": 2.4319249597553616e-05, + "loss": 53.6706, + "step": 121910 + }, + { + "epoch": 0.492572227362161, + "grad_norm": 748.4280395507812, + "learning_rate": 2.431652292764519e-05, + "loss": 63.3793, + "step": 121920 + }, + { + "epoch": 0.49261262862752864, + "grad_norm": 236.4117431640625, + "learning_rate": 2.431379617358393e-05, + "loss": 89.5247, + "step": 121930 + }, + { + "epoch": 0.4926530298928962, + "grad_norm": 1097.3651123046875, + "learning_rate": 2.431106933542299e-05, + "loss": 73.7105, + "step": 121940 + }, + { + "epoch": 0.49269343115826386, + "grad_norm": 856.8369140625, + "learning_rate": 2.430834241321553e-05, + "loss": 103.2319, + "step": 121950 + }, + { + "epoch": 0.4927338324236315, + "grad_norm": 1090.7271728515625, + "learning_rate": 2.430561540701472e-05, + "loss": 86.0953, + "step": 121960 + }, + { + "epoch": 0.49277423368899914, + "grad_norm": 410.91314697265625, + "learning_rate": 2.4302888316873726e-05, + "loss": 64.5899, + "step": 121970 + }, + { + "epoch": 0.4928146349543668, + "grad_norm": 821.9375, + "learning_rate": 2.4300161142845708e-05, + "loss": 91.6263, + "step": 121980 + }, + { + "epoch": 0.4928550362197344, + "grad_norm": 468.89556884765625, + "learning_rate": 2.4297433884983827e-05, + "loss": 50.7364, + "step": 121990 + }, + { + "epoch": 0.492895437485102, + "grad_norm": 568.6695556640625, + "learning_rate": 2.4294706543341267e-05, + "loss": 95.9335, + "step": 122000 + }, + { + "epoch": 0.49293583875046965, + "grad_norm": 1387.9644775390625, + "learning_rate": 2.429197911797119e-05, + "loss": 94.8243, + "step": 122010 + }, + { + "epoch": 0.4929762400158373, + "grad_norm": 786.1040649414062, + "learning_rate": 2.428925160892677e-05, + "loss": 79.6641, + "step": 122020 + }, + { + "epoch": 0.4930166412812049, + "grad_norm": 3119.762939453125, + "learning_rate": 2.4286524016261184e-05, + "loss": 117.7543, + "step": 122030 + }, + { + "epoch": 0.49305704254657257, + "grad_norm": 505.270751953125, + "learning_rate": 2.42837963400276e-05, + "loss": 92.1748, + "step": 122040 + }, + { + "epoch": 0.4930974438119402, + "grad_norm": 674.3359985351562, + "learning_rate": 2.4281068580279202e-05, + "loss": 65.2873, + "step": 122050 + }, + { + "epoch": 0.49313784507730785, + "grad_norm": 951.321044921875, + "learning_rate": 2.427834073706917e-05, + "loss": 119.5942, + "step": 122060 + }, + { + "epoch": 0.49317824634267543, + "grad_norm": 905.1078491210938, + "learning_rate": 2.4275612810450687e-05, + "loss": 60.2367, + "step": 122070 + }, + { + "epoch": 0.49321864760804307, + "grad_norm": 1418.0528564453125, + "learning_rate": 2.427288480047693e-05, + "loss": 55.6961, + "step": 122080 + }, + { + "epoch": 0.4932590488734107, + "grad_norm": 1029.9874267578125, + "learning_rate": 2.4270156707201075e-05, + "loss": 62.0447, + "step": 122090 + }, + { + "epoch": 0.49329945013877835, + "grad_norm": 1602.9757080078125, + "learning_rate": 2.4267428530676324e-05, + "loss": 69.7372, + "step": 122100 + }, + { + "epoch": 0.493339851404146, + "grad_norm": 957.5477294921875, + "learning_rate": 2.4264700270955857e-05, + "loss": 69.1488, + "step": 122110 + }, + { + "epoch": 0.49338025266951363, + "grad_norm": 995.9920043945312, + "learning_rate": 2.4261971928092858e-05, + "loss": 71.9818, + "step": 122120 + }, + { + "epoch": 0.4934206539348812, + "grad_norm": 975.5771484375, + "learning_rate": 2.4259243502140522e-05, + "loss": 63.553, + "step": 122130 + }, + { + "epoch": 0.49346105520024885, + "grad_norm": 896.5932006835938, + "learning_rate": 2.4256514993152042e-05, + "loss": 75.0237, + "step": 122140 + }, + { + "epoch": 0.4935014564656165, + "grad_norm": 2196.12255859375, + "learning_rate": 2.4253786401180618e-05, + "loss": 133.4314, + "step": 122150 + }, + { + "epoch": 0.49354185773098413, + "grad_norm": 886.4937133789062, + "learning_rate": 2.425105772627943e-05, + "loss": 70.2482, + "step": 122160 + }, + { + "epoch": 0.4935822589963518, + "grad_norm": 1086.2032470703125, + "learning_rate": 2.424832896850169e-05, + "loss": 100.5019, + "step": 122170 + }, + { + "epoch": 0.4936226602617194, + "grad_norm": 1307.1348876953125, + "learning_rate": 2.4245600127900586e-05, + "loss": 86.6157, + "step": 122180 + }, + { + "epoch": 0.49366306152708705, + "grad_norm": 743.4760131835938, + "learning_rate": 2.4242871204529317e-05, + "loss": 103.1339, + "step": 122190 + }, + { + "epoch": 0.49370346279245464, + "grad_norm": 1142.2166748046875, + "learning_rate": 2.42401421984411e-05, + "loss": 77.4651, + "step": 122200 + }, + { + "epoch": 0.4937438640578223, + "grad_norm": 683.6112060546875, + "learning_rate": 2.4237413109689118e-05, + "loss": 106.5851, + "step": 122210 + }, + { + "epoch": 0.4937842653231899, + "grad_norm": 873.92578125, + "learning_rate": 2.423468393832659e-05, + "loss": 89.3272, + "step": 122220 + }, + { + "epoch": 0.49382466658855756, + "grad_norm": 1023.6698608398438, + "learning_rate": 2.4231954684406716e-05, + "loss": 83.803, + "step": 122230 + }, + { + "epoch": 0.4938650678539252, + "grad_norm": 412.075927734375, + "learning_rate": 2.4229225347982707e-05, + "loss": 65.0841, + "step": 122240 + }, + { + "epoch": 0.49390546911929284, + "grad_norm": 217.85411071777344, + "learning_rate": 2.422649592910778e-05, + "loss": 85.1963, + "step": 122250 + }, + { + "epoch": 0.4939458703846604, + "grad_norm": 394.8777160644531, + "learning_rate": 2.4223766427835126e-05, + "loss": 64.6769, + "step": 122260 + }, + { + "epoch": 0.49398627165002806, + "grad_norm": 945.1768798828125, + "learning_rate": 2.4221036844217985e-05, + "loss": 82.6173, + "step": 122270 + }, + { + "epoch": 0.4940266729153957, + "grad_norm": 242.72601318359375, + "learning_rate": 2.4218307178309547e-05, + "loss": 44.661, + "step": 122280 + }, + { + "epoch": 0.49406707418076334, + "grad_norm": 825.0850830078125, + "learning_rate": 2.4215577430163042e-05, + "loss": 80.7476, + "step": 122290 + }, + { + "epoch": 0.494107475446131, + "grad_norm": 862.6529541015625, + "learning_rate": 2.4212847599831682e-05, + "loss": 75.8824, + "step": 122300 + }, + { + "epoch": 0.4941478767114986, + "grad_norm": 380.354248046875, + "learning_rate": 2.4210117687368694e-05, + "loss": 72.6598, + "step": 122310 + }, + { + "epoch": 0.4941882779768662, + "grad_norm": 495.6409912109375, + "learning_rate": 2.4207387692827295e-05, + "loss": 77.4705, + "step": 122320 + }, + { + "epoch": 0.49422867924223385, + "grad_norm": 420.5531005859375, + "learning_rate": 2.42046576162607e-05, + "loss": 56.8308, + "step": 122330 + }, + { + "epoch": 0.4942690805076015, + "grad_norm": 451.6242370605469, + "learning_rate": 2.4201927457722144e-05, + "loss": 88.4241, + "step": 122340 + }, + { + "epoch": 0.49430948177296913, + "grad_norm": 980.1364135742188, + "learning_rate": 2.4199197217264854e-05, + "loss": 72.4843, + "step": 122350 + }, + { + "epoch": 0.49434988303833677, + "grad_norm": 164.95497131347656, + "learning_rate": 2.4196466894942047e-05, + "loss": 77.8456, + "step": 122360 + }, + { + "epoch": 0.4943902843037044, + "grad_norm": 403.9043273925781, + "learning_rate": 2.4193736490806953e-05, + "loss": 73.7133, + "step": 122370 + }, + { + "epoch": 0.49443068556907205, + "grad_norm": 338.3937072753906, + "learning_rate": 2.419100600491281e-05, + "loss": 61.7073, + "step": 122380 + }, + { + "epoch": 0.49447108683443963, + "grad_norm": 1202.9063720703125, + "learning_rate": 2.4188275437312853e-05, + "loss": 69.7645, + "step": 122390 + }, + { + "epoch": 0.4945114880998073, + "grad_norm": 1517.267578125, + "learning_rate": 2.4185544788060306e-05, + "loss": 106.6861, + "step": 122400 + }, + { + "epoch": 0.4945518893651749, + "grad_norm": 867.900634765625, + "learning_rate": 2.4182814057208405e-05, + "loss": 96.3869, + "step": 122410 + }, + { + "epoch": 0.49459229063054255, + "grad_norm": 818.8439331054688, + "learning_rate": 2.41800832448104e-05, + "loss": 74.8494, + "step": 122420 + }, + { + "epoch": 0.4946326918959102, + "grad_norm": 1312.114013671875, + "learning_rate": 2.417735235091951e-05, + "loss": 118.8958, + "step": 122430 + }, + { + "epoch": 0.49467309316127783, + "grad_norm": 528.3131713867188, + "learning_rate": 2.4174621375588996e-05, + "loss": 81.4593, + "step": 122440 + }, + { + "epoch": 0.4947134944266454, + "grad_norm": 535.3844604492188, + "learning_rate": 2.417189031887208e-05, + "loss": 63.0803, + "step": 122450 + }, + { + "epoch": 0.49475389569201306, + "grad_norm": 736.334228515625, + "learning_rate": 2.4169159180822015e-05, + "loss": 84.1367, + "step": 122460 + }, + { + "epoch": 0.4947942969573807, + "grad_norm": 823.5589599609375, + "learning_rate": 2.416642796149205e-05, + "loss": 61.3327, + "step": 122470 + }, + { + "epoch": 0.49483469822274834, + "grad_norm": 1273.875, + "learning_rate": 2.4163696660935424e-05, + "loss": 58.1204, + "step": 122480 + }, + { + "epoch": 0.494875099488116, + "grad_norm": 732.9791259765625, + "learning_rate": 2.4160965279205386e-05, + "loss": 61.2488, + "step": 122490 + }, + { + "epoch": 0.4949155007534836, + "grad_norm": 306.0708923339844, + "learning_rate": 2.4158233816355185e-05, + "loss": 59.5019, + "step": 122500 + }, + { + "epoch": 0.49495590201885126, + "grad_norm": 778.806884765625, + "learning_rate": 2.4155502272438086e-05, + "loss": 73.1197, + "step": 122510 + }, + { + "epoch": 0.49499630328421884, + "grad_norm": 1005.1541137695312, + "learning_rate": 2.4152770647507328e-05, + "loss": 83.5518, + "step": 122520 + }, + { + "epoch": 0.4950367045495865, + "grad_norm": 825.9061889648438, + "learning_rate": 2.415003894161616e-05, + "loss": 58.2219, + "step": 122530 + }, + { + "epoch": 0.4950771058149541, + "grad_norm": 490.45758056640625, + "learning_rate": 2.414730715481785e-05, + "loss": 65.4786, + "step": 122540 + }, + { + "epoch": 0.49511750708032176, + "grad_norm": 638.4840698242188, + "learning_rate": 2.414457528716565e-05, + "loss": 70.3047, + "step": 122550 + }, + { + "epoch": 0.4951579083456894, + "grad_norm": 410.12982177734375, + "learning_rate": 2.414184333871283e-05, + "loss": 87.4493, + "step": 122560 + }, + { + "epoch": 0.49519830961105704, + "grad_norm": 1082.327880859375, + "learning_rate": 2.413911130951263e-05, + "loss": 68.6874, + "step": 122570 + }, + { + "epoch": 0.4952387108764246, + "grad_norm": 1699.6444091796875, + "learning_rate": 2.413637919961833e-05, + "loss": 73.2525, + "step": 122580 + }, + { + "epoch": 0.49527911214179227, + "grad_norm": 1086.631103515625, + "learning_rate": 2.4133647009083192e-05, + "loss": 100.5783, + "step": 122590 + }, + { + "epoch": 0.4953195134071599, + "grad_norm": 983.3731079101562, + "learning_rate": 2.4130914737960472e-05, + "loss": 113.5976, + "step": 122600 + }, + { + "epoch": 0.49535991467252755, + "grad_norm": 1163.1903076171875, + "learning_rate": 2.4128182386303448e-05, + "loss": 84.4952, + "step": 122610 + }, + { + "epoch": 0.4954003159378952, + "grad_norm": 903.212158203125, + "learning_rate": 2.4125449954165375e-05, + "loss": 78.3812, + "step": 122620 + }, + { + "epoch": 0.4954407172032628, + "grad_norm": 657.7420654296875, + "learning_rate": 2.412271744159953e-05, + "loss": 75.7644, + "step": 122630 + }, + { + "epoch": 0.4954811184686304, + "grad_norm": 480.19921875, + "learning_rate": 2.4119984848659195e-05, + "loss": 73.2301, + "step": 122640 + }, + { + "epoch": 0.49552151973399805, + "grad_norm": 885.2695922851562, + "learning_rate": 2.411725217539763e-05, + "loss": 91.401, + "step": 122650 + }, + { + "epoch": 0.4955619209993657, + "grad_norm": 441.935791015625, + "learning_rate": 2.411451942186811e-05, + "loss": 80.5663, + "step": 122660 + }, + { + "epoch": 0.49560232226473333, + "grad_norm": 1403.0130615234375, + "learning_rate": 2.411178658812392e-05, + "loss": 105.7808, + "step": 122670 + }, + { + "epoch": 0.49564272353010097, + "grad_norm": 928.5856323242188, + "learning_rate": 2.410905367421834e-05, + "loss": 127.3779, + "step": 122680 + }, + { + "epoch": 0.4956831247954686, + "grad_norm": 564.8436279296875, + "learning_rate": 2.4106320680204633e-05, + "loss": 81.4556, + "step": 122690 + }, + { + "epoch": 0.49572352606083625, + "grad_norm": 662.7551879882812, + "learning_rate": 2.410358760613609e-05, + "loss": 77.7972, + "step": 122700 + }, + { + "epoch": 0.49576392732620383, + "grad_norm": 959.5945434570312, + "learning_rate": 2.4100854452066003e-05, + "loss": 104.4031, + "step": 122710 + }, + { + "epoch": 0.4958043285915715, + "grad_norm": 969.4058227539062, + "learning_rate": 2.4098121218047642e-05, + "loss": 73.2513, + "step": 122720 + }, + { + "epoch": 0.4958447298569391, + "grad_norm": 873.369140625, + "learning_rate": 2.4095387904134295e-05, + "loss": 61.2855, + "step": 122730 + }, + { + "epoch": 0.49588513112230675, + "grad_norm": 459.99334716796875, + "learning_rate": 2.4092654510379253e-05, + "loss": 150.8828, + "step": 122740 + }, + { + "epoch": 0.4959255323876744, + "grad_norm": 582.45849609375, + "learning_rate": 2.4089921036835805e-05, + "loss": 86.609, + "step": 122750 + }, + { + "epoch": 0.49596593365304203, + "grad_norm": 616.7574462890625, + "learning_rate": 2.4087187483557245e-05, + "loss": 57.5154, + "step": 122760 + }, + { + "epoch": 0.4960063349184096, + "grad_norm": 487.3528747558594, + "learning_rate": 2.408445385059686e-05, + "loss": 92.4665, + "step": 122770 + }, + { + "epoch": 0.49604673618377726, + "grad_norm": 1262.3206787109375, + "learning_rate": 2.4081720138007944e-05, + "loss": 106.4543, + "step": 122780 + }, + { + "epoch": 0.4960871374491449, + "grad_norm": 804.2769165039062, + "learning_rate": 2.407898634584379e-05, + "loss": 69.9957, + "step": 122790 + }, + { + "epoch": 0.49612753871451254, + "grad_norm": 479.3500671386719, + "learning_rate": 2.40762524741577e-05, + "loss": 57.8802, + "step": 122800 + }, + { + "epoch": 0.4961679399798802, + "grad_norm": 368.7585754394531, + "learning_rate": 2.4073518523002965e-05, + "loss": 69.71, + "step": 122810 + }, + { + "epoch": 0.4962083412452478, + "grad_norm": 1164.1260986328125, + "learning_rate": 2.4070784492432893e-05, + "loss": 88.4587, + "step": 122820 + }, + { + "epoch": 0.49624874251061546, + "grad_norm": 536.5999755859375, + "learning_rate": 2.4068050382500774e-05, + "loss": 81.9398, + "step": 122830 + }, + { + "epoch": 0.49628914377598304, + "grad_norm": 652.4202270507812, + "learning_rate": 2.406531619325993e-05, + "loss": 63.6244, + "step": 122840 + }, + { + "epoch": 0.4963295450413507, + "grad_norm": 446.93408203125, + "learning_rate": 2.4062581924763647e-05, + "loss": 71.1186, + "step": 122850 + }, + { + "epoch": 0.4963699463067183, + "grad_norm": 456.352783203125, + "learning_rate": 2.405984757706524e-05, + "loss": 117.2445, + "step": 122860 + }, + { + "epoch": 0.49641034757208596, + "grad_norm": 532.8500366210938, + "learning_rate": 2.4057113150218013e-05, + "loss": 87.9835, + "step": 122870 + }, + { + "epoch": 0.4964507488374536, + "grad_norm": 461.66912841796875, + "learning_rate": 2.405437864427528e-05, + "loss": 74.7121, + "step": 122880 + }, + { + "epoch": 0.49649115010282124, + "grad_norm": 788.5820922851562, + "learning_rate": 2.4051644059290344e-05, + "loss": 56.9272, + "step": 122890 + }, + { + "epoch": 0.49653155136818883, + "grad_norm": 339.0431823730469, + "learning_rate": 2.404890939531652e-05, + "loss": 58.7755, + "step": 122900 + }, + { + "epoch": 0.49657195263355647, + "grad_norm": 1377.38525390625, + "learning_rate": 2.4046174652407123e-05, + "loss": 97.1942, + "step": 122910 + }, + { + "epoch": 0.4966123538989241, + "grad_norm": 860.4175415039062, + "learning_rate": 2.4043439830615474e-05, + "loss": 69.3301, + "step": 122920 + }, + { + "epoch": 0.49665275516429175, + "grad_norm": 1011.8389282226562, + "learning_rate": 2.4040704929994877e-05, + "loss": 65.0918, + "step": 122930 + }, + { + "epoch": 0.4966931564296594, + "grad_norm": 661.6041870117188, + "learning_rate": 2.4037969950598664e-05, + "loss": 79.5169, + "step": 122940 + }, + { + "epoch": 0.49673355769502703, + "grad_norm": 784.3323364257812, + "learning_rate": 2.403523489248014e-05, + "loss": 94.7502, + "step": 122950 + }, + { + "epoch": 0.4967739589603946, + "grad_norm": 1595.494384765625, + "learning_rate": 2.4032499755692646e-05, + "loss": 80.4973, + "step": 122960 + }, + { + "epoch": 0.49681436022576225, + "grad_norm": 824.2272338867188, + "learning_rate": 2.4029764540289482e-05, + "loss": 89.9829, + "step": 122970 + }, + { + "epoch": 0.4968547614911299, + "grad_norm": 779.408447265625, + "learning_rate": 2.4027029246323986e-05, + "loss": 83.5915, + "step": 122980 + }, + { + "epoch": 0.49689516275649753, + "grad_norm": 1075.6331787109375, + "learning_rate": 2.402429387384948e-05, + "loss": 131.9565, + "step": 122990 + }, + { + "epoch": 0.4969355640218652, + "grad_norm": 414.9312744140625, + "learning_rate": 2.4021558422919297e-05, + "loss": 71.4759, + "step": 123000 + }, + { + "epoch": 0.4969759652872328, + "grad_norm": 980.7962646484375, + "learning_rate": 2.401882289358676e-05, + "loss": 112.9359, + "step": 123010 + }, + { + "epoch": 0.49701636655260045, + "grad_norm": 658.5823974609375, + "learning_rate": 2.40160872859052e-05, + "loss": 88.3067, + "step": 123020 + }, + { + "epoch": 0.49705676781796804, + "grad_norm": 797.7344360351562, + "learning_rate": 2.4013351599927957e-05, + "loss": 49.8922, + "step": 123030 + }, + { + "epoch": 0.4970971690833357, + "grad_norm": 398.2716064453125, + "learning_rate": 2.401061583570836e-05, + "loss": 87.9575, + "step": 123040 + }, + { + "epoch": 0.4971375703487033, + "grad_norm": 738.423828125, + "learning_rate": 2.400787999329973e-05, + "loss": 113.8274, + "step": 123050 + }, + { + "epoch": 0.49717797161407096, + "grad_norm": 2430.86279296875, + "learning_rate": 2.4005144072755423e-05, + "loss": 82.5272, + "step": 123060 + }, + { + "epoch": 0.4972183728794386, + "grad_norm": 503.8680114746094, + "learning_rate": 2.400240807412877e-05, + "loss": 86.7019, + "step": 123070 + }, + { + "epoch": 0.49725877414480624, + "grad_norm": 340.639892578125, + "learning_rate": 2.399967199747311e-05, + "loss": 72.7362, + "step": 123080 + }, + { + "epoch": 0.4972991754101738, + "grad_norm": 536.601806640625, + "learning_rate": 2.3996935842841787e-05, + "loss": 65.2371, + "step": 123090 + }, + { + "epoch": 0.49733957667554146, + "grad_norm": 865.8690795898438, + "learning_rate": 2.3994199610288142e-05, + "loss": 95.1852, + "step": 123100 + }, + { + "epoch": 0.4973799779409091, + "grad_norm": 1121.8709716796875, + "learning_rate": 2.399146329986552e-05, + "loss": 142.6217, + "step": 123110 + }, + { + "epoch": 0.49742037920627674, + "grad_norm": 1081.2674560546875, + "learning_rate": 2.3988726911627264e-05, + "loss": 90.4602, + "step": 123120 + }, + { + "epoch": 0.4974607804716444, + "grad_norm": 1107.0218505859375, + "learning_rate": 2.3985990445626727e-05, + "loss": 62.9885, + "step": 123130 + }, + { + "epoch": 0.497501181737012, + "grad_norm": 828.7254638671875, + "learning_rate": 2.3983253901917253e-05, + "loss": 37.4225, + "step": 123140 + }, + { + "epoch": 0.49754158300237966, + "grad_norm": 842.7921142578125, + "learning_rate": 2.3980517280552188e-05, + "loss": 49.9022, + "step": 123150 + }, + { + "epoch": 0.49758198426774725, + "grad_norm": 776.9707641601562, + "learning_rate": 2.397778058158489e-05, + "loss": 65.8708, + "step": 123160 + }, + { + "epoch": 0.4976223855331149, + "grad_norm": 498.4964599609375, + "learning_rate": 2.397504380506872e-05, + "loss": 81.194, + "step": 123170 + }, + { + "epoch": 0.4976627867984825, + "grad_norm": 735.9248657226562, + "learning_rate": 2.3972306951057017e-05, + "loss": 120.9918, + "step": 123180 + }, + { + "epoch": 0.49770318806385017, + "grad_norm": 537.8585205078125, + "learning_rate": 2.3969570019603144e-05, + "loss": 67.5244, + "step": 123190 + }, + { + "epoch": 0.4977435893292178, + "grad_norm": 658.7194213867188, + "learning_rate": 2.396683301076047e-05, + "loss": 80.3408, + "step": 123200 + }, + { + "epoch": 0.49778399059458545, + "grad_norm": 784.102294921875, + "learning_rate": 2.396409592458234e-05, + "loss": 73.3765, + "step": 123210 + }, + { + "epoch": 0.49782439185995303, + "grad_norm": 416.82427978515625, + "learning_rate": 2.396135876112211e-05, + "loss": 46.1481, + "step": 123220 + }, + { + "epoch": 0.49786479312532067, + "grad_norm": 969.4666137695312, + "learning_rate": 2.3958621520433162e-05, + "loss": 72.6744, + "step": 123230 + }, + { + "epoch": 0.4979051943906883, + "grad_norm": 1028.3092041015625, + "learning_rate": 2.3955884202568842e-05, + "loss": 67.4646, + "step": 123240 + }, + { + "epoch": 0.49794559565605595, + "grad_norm": 582.2713012695312, + "learning_rate": 2.3953146807582525e-05, + "loss": 79.7935, + "step": 123250 + }, + { + "epoch": 0.4979859969214236, + "grad_norm": 453.0330810546875, + "learning_rate": 2.3950409335527577e-05, + "loss": 71.0388, + "step": 123260 + }, + { + "epoch": 0.49802639818679123, + "grad_norm": 742.7041625976562, + "learning_rate": 2.3947671786457364e-05, + "loss": 59.4854, + "step": 123270 + }, + { + "epoch": 0.4980667994521588, + "grad_norm": 708.7224731445312, + "learning_rate": 2.394493416042526e-05, + "loss": 82.0612, + "step": 123280 + }, + { + "epoch": 0.49810720071752645, + "grad_norm": 653.9208984375, + "learning_rate": 2.394219645748463e-05, + "loss": 104.3022, + "step": 123290 + }, + { + "epoch": 0.4981476019828941, + "grad_norm": 802.8060913085938, + "learning_rate": 2.3939458677688857e-05, + "loss": 96.3571, + "step": 123300 + }, + { + "epoch": 0.49818800324826173, + "grad_norm": 1540.8363037109375, + "learning_rate": 2.3936720821091305e-05, + "loss": 106.8561, + "step": 123310 + }, + { + "epoch": 0.4982284045136294, + "grad_norm": 504.794189453125, + "learning_rate": 2.3933982887745348e-05, + "loss": 89.3424, + "step": 123320 + }, + { + "epoch": 0.498268805778997, + "grad_norm": 607.6868896484375, + "learning_rate": 2.393124487770438e-05, + "loss": 59.8547, + "step": 123330 + }, + { + "epoch": 0.49830920704436465, + "grad_norm": 1383.67529296875, + "learning_rate": 2.3928506791021758e-05, + "loss": 82.4474, + "step": 123340 + }, + { + "epoch": 0.49834960830973224, + "grad_norm": 1111.6287841796875, + "learning_rate": 2.392576862775088e-05, + "loss": 69.2164, + "step": 123350 + }, + { + "epoch": 0.4983900095750999, + "grad_norm": 213.31710815429688, + "learning_rate": 2.3923030387945118e-05, + "loss": 43.7994, + "step": 123360 + }, + { + "epoch": 0.4984304108404675, + "grad_norm": 330.9508361816406, + "learning_rate": 2.3920292071657866e-05, + "loss": 72.9961, + "step": 123370 + }, + { + "epoch": 0.49847081210583516, + "grad_norm": 579.3356323242188, + "learning_rate": 2.39175536789425e-05, + "loss": 64.9604, + "step": 123380 + }, + { + "epoch": 0.4985112133712028, + "grad_norm": 581.3795776367188, + "learning_rate": 2.3914815209852404e-05, + "loss": 80.4839, + "step": 123390 + }, + { + "epoch": 0.49855161463657044, + "grad_norm": 658.8446044921875, + "learning_rate": 2.3912076664440967e-05, + "loss": 71.6973, + "step": 123400 + }, + { + "epoch": 0.498592015901938, + "grad_norm": 641.3970336914062, + "learning_rate": 2.3909338042761586e-05, + "loss": 82.8966, + "step": 123410 + }, + { + "epoch": 0.49863241716730566, + "grad_norm": 1364.7108154296875, + "learning_rate": 2.3906599344867648e-05, + "loss": 92.3921, + "step": 123420 + }, + { + "epoch": 0.4986728184326733, + "grad_norm": 1050.7869873046875, + "learning_rate": 2.390386057081254e-05, + "loss": 142.3496, + "step": 123430 + }, + { + "epoch": 0.49871321969804094, + "grad_norm": 886.931884765625, + "learning_rate": 2.3901121720649663e-05, + "loss": 66.4458, + "step": 123440 + }, + { + "epoch": 0.4987536209634086, + "grad_norm": 763.9518432617188, + "learning_rate": 2.3898382794432414e-05, + "loss": 71.265, + "step": 123450 + }, + { + "epoch": 0.4987940222287762, + "grad_norm": 989.2879638671875, + "learning_rate": 2.389564379221418e-05, + "loss": 71.8626, + "step": 123460 + }, + { + "epoch": 0.49883442349414386, + "grad_norm": 453.5799865722656, + "learning_rate": 2.3892904714048372e-05, + "loss": 71.5098, + "step": 123470 + }, + { + "epoch": 0.49887482475951145, + "grad_norm": 959.1405639648438, + "learning_rate": 2.3890165559988374e-05, + "loss": 81.1376, + "step": 123480 + }, + { + "epoch": 0.4989152260248791, + "grad_norm": 514.6488647460938, + "learning_rate": 2.38874263300876e-05, + "loss": 50.8478, + "step": 123490 + }, + { + "epoch": 0.49895562729024673, + "grad_norm": 884.4595336914062, + "learning_rate": 2.388468702439944e-05, + "loss": 68.1034, + "step": 123500 + }, + { + "epoch": 0.49899602855561437, + "grad_norm": 940.49755859375, + "learning_rate": 2.3881947642977314e-05, + "loss": 77.8266, + "step": 123510 + }, + { + "epoch": 0.499036429820982, + "grad_norm": 921.4026489257812, + "learning_rate": 2.387920818587462e-05, + "loss": 88.2991, + "step": 123520 + }, + { + "epoch": 0.49907683108634965, + "grad_norm": 2191.58642578125, + "learning_rate": 2.3876468653144764e-05, + "loss": 119.0783, + "step": 123530 + }, + { + "epoch": 0.49911723235171723, + "grad_norm": 366.9653625488281, + "learning_rate": 2.387372904484116e-05, + "loss": 69.9156, + "step": 123540 + }, + { + "epoch": 0.4991576336170849, + "grad_norm": 1001.5599365234375, + "learning_rate": 2.387098936101721e-05, + "loss": 105.1615, + "step": 123550 + }, + { + "epoch": 0.4991980348824525, + "grad_norm": 777.2265014648438, + "learning_rate": 2.386824960172633e-05, + "loss": 72.0871, + "step": 123560 + }, + { + "epoch": 0.49923843614782015, + "grad_norm": 1419.2415771484375, + "learning_rate": 2.386550976702193e-05, + "loss": 86.1587, + "step": 123570 + }, + { + "epoch": 0.4992788374131878, + "grad_norm": 638.14501953125, + "learning_rate": 2.3862769856957432e-05, + "loss": 79.1252, + "step": 123580 + }, + { + "epoch": 0.49931923867855543, + "grad_norm": 991.7211303710938, + "learning_rate": 2.3860029871586245e-05, + "loss": 81.0573, + "step": 123590 + }, + { + "epoch": 0.499359639943923, + "grad_norm": 807.3308715820312, + "learning_rate": 2.385728981096178e-05, + "loss": 70.8255, + "step": 123600 + }, + { + "epoch": 0.49940004120929066, + "grad_norm": 984.2042846679688, + "learning_rate": 2.3854549675137474e-05, + "loss": 94.8227, + "step": 123610 + }, + { + "epoch": 0.4994404424746583, + "grad_norm": 775.43408203125, + "learning_rate": 2.385180946416674e-05, + "loss": 68.8124, + "step": 123620 + }, + { + "epoch": 0.49948084374002594, + "grad_norm": 230.18186950683594, + "learning_rate": 2.384906917810299e-05, + "loss": 71.3703, + "step": 123630 + }, + { + "epoch": 0.4995212450053936, + "grad_norm": 1232.7469482421875, + "learning_rate": 2.384632881699966e-05, + "loss": 129.2027, + "step": 123640 + }, + { + "epoch": 0.4995616462707612, + "grad_norm": 572.2311401367188, + "learning_rate": 2.3843588380910164e-05, + "loss": 52.7834, + "step": 123650 + }, + { + "epoch": 0.49960204753612886, + "grad_norm": 764.8812255859375, + "learning_rate": 2.3840847869887938e-05, + "loss": 94.839, + "step": 123660 + }, + { + "epoch": 0.49964244880149644, + "grad_norm": 445.4139709472656, + "learning_rate": 2.3838107283986407e-05, + "loss": 53.4394, + "step": 123670 + }, + { + "epoch": 0.4996828500668641, + "grad_norm": 779.7825927734375, + "learning_rate": 2.3835366623258995e-05, + "loss": 60.854, + "step": 123680 + }, + { + "epoch": 0.4997232513322317, + "grad_norm": 304.24554443359375, + "learning_rate": 2.3832625887759133e-05, + "loss": 71.9532, + "step": 123690 + }, + { + "epoch": 0.49976365259759936, + "grad_norm": 549.197021484375, + "learning_rate": 2.382988507754026e-05, + "loss": 72.2328, + "step": 123700 + }, + { + "epoch": 0.499804053862967, + "grad_norm": 834.8193969726562, + "learning_rate": 2.3827144192655805e-05, + "loss": 112.5881, + "step": 123710 + }, + { + "epoch": 0.49984445512833464, + "grad_norm": 732.8499145507812, + "learning_rate": 2.382440323315921e-05, + "loss": 54.2736, + "step": 123720 + }, + { + "epoch": 0.4998848563937022, + "grad_norm": 1058.6573486328125, + "learning_rate": 2.38216621991039e-05, + "loss": 108.3559, + "step": 123730 + }, + { + "epoch": 0.49992525765906987, + "grad_norm": 587.6115112304688, + "learning_rate": 2.3818921090543317e-05, + "loss": 91.7427, + "step": 123740 + }, + { + "epoch": 0.4999656589244375, + "grad_norm": 604.4542236328125, + "learning_rate": 2.38161799075309e-05, + "loss": 44.3078, + "step": 123750 + }, + { + "epoch": 0.5000060601898051, + "grad_norm": 864.2062377929688, + "learning_rate": 2.381343865012009e-05, + "loss": 73.4043, + "step": 123760 + }, + { + "epoch": 0.5000464614551727, + "grad_norm": 947.9273681640625, + "learning_rate": 2.381069731836433e-05, + "loss": 56.9451, + "step": 123770 + }, + { + "epoch": 0.5000868627205404, + "grad_norm": 826.2166748046875, + "learning_rate": 2.3807955912317073e-05, + "loss": 65.5985, + "step": 123780 + }, + { + "epoch": 0.500127263985908, + "grad_norm": 958.3613891601562, + "learning_rate": 2.3805214432031745e-05, + "loss": 83.4149, + "step": 123790 + }, + { + "epoch": 0.5001676652512757, + "grad_norm": 841.5140991210938, + "learning_rate": 2.3802472877561805e-05, + "loss": 93.1645, + "step": 123800 + }, + { + "epoch": 0.5002080665166433, + "grad_norm": 752.6101684570312, + "learning_rate": 2.3799731248960703e-05, + "loss": 48.0118, + "step": 123810 + }, + { + "epoch": 0.5002484677820109, + "grad_norm": 362.3717956542969, + "learning_rate": 2.3796989546281884e-05, + "loss": 82.6599, + "step": 123820 + }, + { + "epoch": 0.5002888690473786, + "grad_norm": 375.84075927734375, + "learning_rate": 2.3794247769578793e-05, + "loss": 59.7294, + "step": 123830 + }, + { + "epoch": 0.5003292703127462, + "grad_norm": 632.019287109375, + "learning_rate": 2.379150591890489e-05, + "loss": 68.0384, + "step": 123840 + }, + { + "epoch": 0.5003696715781139, + "grad_norm": 773.5118408203125, + "learning_rate": 2.3788763994313627e-05, + "loss": 97.0633, + "step": 123850 + }, + { + "epoch": 0.5004100728434815, + "grad_norm": 883.8350219726562, + "learning_rate": 2.3786021995858465e-05, + "loss": 84.5112, + "step": 123860 + }, + { + "epoch": 0.5004504741088491, + "grad_norm": 524.0778198242188, + "learning_rate": 2.378327992359285e-05, + "loss": 97.8017, + "step": 123870 + }, + { + "epoch": 0.5004908753742168, + "grad_norm": 430.3297119140625, + "learning_rate": 2.3780537777570243e-05, + "loss": 56.6561, + "step": 123880 + }, + { + "epoch": 0.5005312766395843, + "grad_norm": 951.6124877929688, + "learning_rate": 2.377779555784411e-05, + "loss": 94.5018, + "step": 123890 + }, + { + "epoch": 0.5005716779049519, + "grad_norm": 373.7945251464844, + "learning_rate": 2.3775053264467908e-05, + "loss": 56.891, + "step": 123900 + }, + { + "epoch": 0.5006120791703196, + "grad_norm": 1462.4580078125, + "learning_rate": 2.3772310897495098e-05, + "loss": 63.5012, + "step": 123910 + }, + { + "epoch": 0.5006524804356872, + "grad_norm": 952.3980102539062, + "learning_rate": 2.3769568456979143e-05, + "loss": 93.0, + "step": 123920 + }, + { + "epoch": 0.5006928817010549, + "grad_norm": 645.07080078125, + "learning_rate": 2.376682594297351e-05, + "loss": 74.4258, + "step": 123930 + }, + { + "epoch": 0.5007332829664225, + "grad_norm": 718.09814453125, + "learning_rate": 2.376408335553167e-05, + "loss": 80.6176, + "step": 123940 + }, + { + "epoch": 0.5007736842317901, + "grad_norm": 925.30908203125, + "learning_rate": 2.376134069470708e-05, + "loss": 84.3014, + "step": 123950 + }, + { + "epoch": 0.5008140854971578, + "grad_norm": 475.9263610839844, + "learning_rate": 2.375859796055322e-05, + "loss": 65.8074, + "step": 123960 + }, + { + "epoch": 0.5008544867625254, + "grad_norm": 936.2493896484375, + "learning_rate": 2.3755855153123558e-05, + "loss": 64.4054, + "step": 123970 + }, + { + "epoch": 0.5008948880278931, + "grad_norm": 1089.3785400390625, + "learning_rate": 2.3753112272471574e-05, + "loss": 74.6321, + "step": 123980 + }, + { + "epoch": 0.5009352892932607, + "grad_norm": 453.5094299316406, + "learning_rate": 2.3750369318650726e-05, + "loss": 60.8828, + "step": 123990 + }, + { + "epoch": 0.5009756905586283, + "grad_norm": 746.8863525390625, + "learning_rate": 2.37476262917145e-05, + "loss": 72.9129, + "step": 124000 + }, + { + "epoch": 0.501016091823996, + "grad_norm": 656.0217895507812, + "learning_rate": 2.3744883191716364e-05, + "loss": 78.4561, + "step": 124010 + }, + { + "epoch": 0.5010564930893635, + "grad_norm": 262.51251220703125, + "learning_rate": 2.3742140018709806e-05, + "loss": 51.9013, + "step": 124020 + }, + { + "epoch": 0.5010968943547311, + "grad_norm": 1111.6531982421875, + "learning_rate": 2.37393967727483e-05, + "loss": 109.8793, + "step": 124030 + }, + { + "epoch": 0.5011372956200988, + "grad_norm": 654.8194580078125, + "learning_rate": 2.3736653453885333e-05, + "loss": 55.7431, + "step": 124040 + }, + { + "epoch": 0.5011776968854664, + "grad_norm": 491.09613037109375, + "learning_rate": 2.373391006217438e-05, + "loss": 80.0188, + "step": 124050 + }, + { + "epoch": 0.5012180981508341, + "grad_norm": 1357.8626708984375, + "learning_rate": 2.3731166597668932e-05, + "loss": 100.7068, + "step": 124060 + }, + { + "epoch": 0.5012584994162017, + "grad_norm": 1067.775634765625, + "learning_rate": 2.3728423060422473e-05, + "loss": 55.3369, + "step": 124070 + }, + { + "epoch": 0.5012989006815693, + "grad_norm": 870.95654296875, + "learning_rate": 2.3725679450488474e-05, + "loss": 73.6384, + "step": 124080 + }, + { + "epoch": 0.501339301946937, + "grad_norm": 2816.150634765625, + "learning_rate": 2.3722935767920445e-05, + "loss": 83.0783, + "step": 124090 + }, + { + "epoch": 0.5013797032123046, + "grad_norm": 639.4296264648438, + "learning_rate": 2.372019201277186e-05, + "loss": 85.2372, + "step": 124100 + }, + { + "epoch": 0.5014201044776723, + "grad_norm": 752.8707885742188, + "learning_rate": 2.3717448185096227e-05, + "loss": 88.352, + "step": 124110 + }, + { + "epoch": 0.5014605057430399, + "grad_norm": 749.814697265625, + "learning_rate": 2.3714704284947023e-05, + "loss": 73.1754, + "step": 124120 + }, + { + "epoch": 0.5015009070084075, + "grad_norm": 1246.45361328125, + "learning_rate": 2.371196031237774e-05, + "loss": 78.3225, + "step": 124130 + }, + { + "epoch": 0.5015413082737752, + "grad_norm": 875.1344604492188, + "learning_rate": 2.3709216267441885e-05, + "loss": 98.8453, + "step": 124140 + }, + { + "epoch": 0.5015817095391427, + "grad_norm": 673.1982421875, + "learning_rate": 2.370647215019295e-05, + "loss": 62.7953, + "step": 124150 + }, + { + "epoch": 0.5016221108045104, + "grad_norm": 755.080322265625, + "learning_rate": 2.3703727960684435e-05, + "loss": 71.6966, + "step": 124160 + }, + { + "epoch": 0.501662512069878, + "grad_norm": 1243.663330078125, + "learning_rate": 2.3700983698969832e-05, + "loss": 75.5264, + "step": 124170 + }, + { + "epoch": 0.5017029133352456, + "grad_norm": 398.9146423339844, + "learning_rate": 2.3698239365102645e-05, + "loss": 101.0659, + "step": 124180 + }, + { + "epoch": 0.5017433146006133, + "grad_norm": 762.2984008789062, + "learning_rate": 2.3695494959136386e-05, + "loss": 72.0908, + "step": 124190 + }, + { + "epoch": 0.5017837158659809, + "grad_norm": 1238.1876220703125, + "learning_rate": 2.369275048112454e-05, + "loss": 103.8889, + "step": 124200 + }, + { + "epoch": 0.5018241171313486, + "grad_norm": 1743.1190185546875, + "learning_rate": 2.3690005931120624e-05, + "loss": 81.0214, + "step": 124210 + }, + { + "epoch": 0.5018645183967162, + "grad_norm": 775.9205932617188, + "learning_rate": 2.368726130917815e-05, + "loss": 54.7399, + "step": 124220 + }, + { + "epoch": 0.5019049196620838, + "grad_norm": 978.1658935546875, + "learning_rate": 2.3684516615350616e-05, + "loss": 80.7256, + "step": 124230 + }, + { + "epoch": 0.5019453209274515, + "grad_norm": 724.0523071289062, + "learning_rate": 2.368177184969153e-05, + "loss": 53.0191, + "step": 124240 + }, + { + "epoch": 0.5019857221928191, + "grad_norm": 486.6738586425781, + "learning_rate": 2.3679027012254404e-05, + "loss": 73.5531, + "step": 124250 + }, + { + "epoch": 0.5020261234581868, + "grad_norm": 1064.50341796875, + "learning_rate": 2.367628210309276e-05, + "loss": 106.2727, + "step": 124260 + }, + { + "epoch": 0.5020665247235544, + "grad_norm": 376.0253601074219, + "learning_rate": 2.36735371222601e-05, + "loss": 79.7956, + "step": 124270 + }, + { + "epoch": 0.5021069259889219, + "grad_norm": 639.0972290039062, + "learning_rate": 2.3670792069809937e-05, + "loss": 76.5073, + "step": 124280 + }, + { + "epoch": 0.5021473272542896, + "grad_norm": 621.5938720703125, + "learning_rate": 2.36680469457958e-05, + "loss": 72.3291, + "step": 124290 + }, + { + "epoch": 0.5021877285196572, + "grad_norm": 1103.8951416015625, + "learning_rate": 2.3665301750271198e-05, + "loss": 118.3838, + "step": 124300 + }, + { + "epoch": 0.5022281297850248, + "grad_norm": 359.5256042480469, + "learning_rate": 2.3662556483289653e-05, + "loss": 85.6486, + "step": 124310 + }, + { + "epoch": 0.5022685310503925, + "grad_norm": 1902.8026123046875, + "learning_rate": 2.3659811144904678e-05, + "loss": 85.389, + "step": 124320 + }, + { + "epoch": 0.5023089323157601, + "grad_norm": 510.9039001464844, + "learning_rate": 2.3657065735169807e-05, + "loss": 81.0482, + "step": 124330 + }, + { + "epoch": 0.5023493335811278, + "grad_norm": 444.10345458984375, + "learning_rate": 2.3654320254138554e-05, + "loss": 55.1412, + "step": 124340 + }, + { + "epoch": 0.5023897348464954, + "grad_norm": 1051.240966796875, + "learning_rate": 2.3651574701864447e-05, + "loss": 88.3971, + "step": 124350 + }, + { + "epoch": 0.502430136111863, + "grad_norm": 1382.6998291015625, + "learning_rate": 2.364882907840101e-05, + "loss": 101.3224, + "step": 124360 + }, + { + "epoch": 0.5024705373772307, + "grad_norm": 1916.6981201171875, + "learning_rate": 2.364608338380177e-05, + "loss": 76.8723, + "step": 124370 + }, + { + "epoch": 0.5025109386425983, + "grad_norm": 580.8969116210938, + "learning_rate": 2.3643337618120256e-05, + "loss": 76.0087, + "step": 124380 + }, + { + "epoch": 0.502551339907966, + "grad_norm": 967.6674194335938, + "learning_rate": 2.3640591781410005e-05, + "loss": 73.6898, + "step": 124390 + }, + { + "epoch": 0.5025917411733335, + "grad_norm": 893.1395874023438, + "learning_rate": 2.363784587372454e-05, + "loss": 127.3695, + "step": 124400 + }, + { + "epoch": 0.5026321424387011, + "grad_norm": 636.568115234375, + "learning_rate": 2.36350998951174e-05, + "loss": 90.4836, + "step": 124410 + }, + { + "epoch": 0.5026725437040688, + "grad_norm": 307.4099426269531, + "learning_rate": 2.3632353845642115e-05, + "loss": 119.0505, + "step": 124420 + }, + { + "epoch": 0.5027129449694364, + "grad_norm": 484.4166259765625, + "learning_rate": 2.362960772535222e-05, + "loss": 71.8839, + "step": 124430 + }, + { + "epoch": 0.502753346234804, + "grad_norm": 874.7826538085938, + "learning_rate": 2.362686153430125e-05, + "loss": 86.882, + "step": 124440 + }, + { + "epoch": 0.5027937475001717, + "grad_norm": 697.2208862304688, + "learning_rate": 2.3624115272542748e-05, + "loss": 59.1912, + "step": 124450 + }, + { + "epoch": 0.5028341487655393, + "grad_norm": 483.3216552734375, + "learning_rate": 2.3621368940130257e-05, + "loss": 74.7138, + "step": 124460 + }, + { + "epoch": 0.502874550030907, + "grad_norm": 644.0543212890625, + "learning_rate": 2.3618622537117312e-05, + "loss": 86.0892, + "step": 124470 + }, + { + "epoch": 0.5029149512962746, + "grad_norm": 809.4756469726562, + "learning_rate": 2.361587606355746e-05, + "loss": 61.0269, + "step": 124480 + }, + { + "epoch": 0.5029553525616423, + "grad_norm": 605.9149169921875, + "learning_rate": 2.3613129519504238e-05, + "loss": 77.3277, + "step": 124490 + }, + { + "epoch": 0.5029957538270099, + "grad_norm": 509.6092224121094, + "learning_rate": 2.3610382905011205e-05, + "loss": 55.3119, + "step": 124500 + }, + { + "epoch": 0.5030361550923775, + "grad_norm": 632.0637817382812, + "learning_rate": 2.3607636220131895e-05, + "loss": 98.8756, + "step": 124510 + }, + { + "epoch": 0.5030765563577452, + "grad_norm": 398.4982604980469, + "learning_rate": 2.3604889464919856e-05, + "loss": 79.2648, + "step": 124520 + }, + { + "epoch": 0.5031169576231127, + "grad_norm": 808.4458618164062, + "learning_rate": 2.360214263942864e-05, + "loss": 89.2172, + "step": 124530 + }, + { + "epoch": 0.5031573588884803, + "grad_norm": 573.582763671875, + "learning_rate": 2.35993957437118e-05, + "loss": 62.9262, + "step": 124540 + }, + { + "epoch": 0.503197760153848, + "grad_norm": 568.2861938476562, + "learning_rate": 2.359664877782289e-05, + "loss": 98.2817, + "step": 124550 + }, + { + "epoch": 0.5032381614192156, + "grad_norm": 694.2310791015625, + "learning_rate": 2.3593901741815454e-05, + "loss": 81.7846, + "step": 124560 + }, + { + "epoch": 0.5032785626845833, + "grad_norm": 569.1074829101562, + "learning_rate": 2.3591154635743055e-05, + "loss": 70.7449, + "step": 124570 + }, + { + "epoch": 0.5033189639499509, + "grad_norm": 1384.7392578125, + "learning_rate": 2.3588407459659256e-05, + "loss": 72.3317, + "step": 124580 + }, + { + "epoch": 0.5033593652153185, + "grad_norm": 513.1998291015625, + "learning_rate": 2.3585660213617597e-05, + "loss": 65.3888, + "step": 124590 + }, + { + "epoch": 0.5033997664806862, + "grad_norm": 586.5802612304688, + "learning_rate": 2.358291289767165e-05, + "loss": 75.0984, + "step": 124600 + }, + { + "epoch": 0.5034401677460538, + "grad_norm": 883.067138671875, + "learning_rate": 2.358016551187497e-05, + "loss": 83.4674, + "step": 124610 + }, + { + "epoch": 0.5034805690114215, + "grad_norm": 497.7769470214844, + "learning_rate": 2.357741805628112e-05, + "loss": 42.893, + "step": 124620 + }, + { + "epoch": 0.5035209702767891, + "grad_norm": 806.4500732421875, + "learning_rate": 2.357467053094366e-05, + "loss": 78.1472, + "step": 124630 + }, + { + "epoch": 0.5035613715421567, + "grad_norm": 679.083984375, + "learning_rate": 2.3571922935916164e-05, + "loss": 47.7483, + "step": 124640 + }, + { + "epoch": 0.5036017728075244, + "grad_norm": 1239.581787109375, + "learning_rate": 2.356917527125219e-05, + "loss": 69.8066, + "step": 124650 + }, + { + "epoch": 0.5036421740728919, + "grad_norm": 381.1549377441406, + "learning_rate": 2.3566427537005305e-05, + "loss": 62.75, + "step": 124660 + }, + { + "epoch": 0.5036825753382596, + "grad_norm": 325.6667175292969, + "learning_rate": 2.356367973322908e-05, + "loss": 68.3107, + "step": 124670 + }, + { + "epoch": 0.5037229766036272, + "grad_norm": 581.4620971679688, + "learning_rate": 2.3560931859977086e-05, + "loss": 119.3219, + "step": 124680 + }, + { + "epoch": 0.5037633778689948, + "grad_norm": 406.7649230957031, + "learning_rate": 2.355818391730289e-05, + "loss": 65.1252, + "step": 124690 + }, + { + "epoch": 0.5038037791343625, + "grad_norm": 763.4819946289062, + "learning_rate": 2.3555435905260066e-05, + "loss": 99.085, + "step": 124700 + }, + { + "epoch": 0.5038441803997301, + "grad_norm": 901.76708984375, + "learning_rate": 2.3552687823902182e-05, + "loss": 68.3778, + "step": 124710 + }, + { + "epoch": 0.5038845816650978, + "grad_norm": 624.6129760742188, + "learning_rate": 2.3549939673282833e-05, + "loss": 73.1725, + "step": 124720 + }, + { + "epoch": 0.5039249829304654, + "grad_norm": 258.3861999511719, + "learning_rate": 2.3547191453455574e-05, + "loss": 77.3781, + "step": 124730 + }, + { + "epoch": 0.503965384195833, + "grad_norm": 660.4166870117188, + "learning_rate": 2.354444316447399e-05, + "loss": 92.0569, + "step": 124740 + }, + { + "epoch": 0.5040057854612007, + "grad_norm": 1185.4964599609375, + "learning_rate": 2.3541694806391672e-05, + "loss": 110.0241, + "step": 124750 + }, + { + "epoch": 0.5040461867265683, + "grad_norm": 1471.8436279296875, + "learning_rate": 2.353894637926218e-05, + "loss": 98.949, + "step": 124760 + }, + { + "epoch": 0.504086587991936, + "grad_norm": 306.51922607421875, + "learning_rate": 2.3536197883139114e-05, + "loss": 68.2959, + "step": 124770 + }, + { + "epoch": 0.5041269892573036, + "grad_norm": 959.779052734375, + "learning_rate": 2.3533449318076042e-05, + "loss": 82.6109, + "step": 124780 + }, + { + "epoch": 0.5041673905226711, + "grad_norm": 828.029052734375, + "learning_rate": 2.3530700684126558e-05, + "loss": 73.7941, + "step": 124790 + }, + { + "epoch": 0.5042077917880388, + "grad_norm": 626.31982421875, + "learning_rate": 2.352795198134425e-05, + "loss": 90.5079, + "step": 124800 + }, + { + "epoch": 0.5042481930534064, + "grad_norm": 492.9288635253906, + "learning_rate": 2.35252032097827e-05, + "loss": 57.474, + "step": 124810 + }, + { + "epoch": 0.504288594318774, + "grad_norm": 587.7888793945312, + "learning_rate": 2.3522454369495495e-05, + "loss": 50.5362, + "step": 124820 + }, + { + "epoch": 0.5043289955841417, + "grad_norm": 826.4014892578125, + "learning_rate": 2.3519705460536236e-05, + "loss": 50.674, + "step": 124830 + }, + { + "epoch": 0.5043693968495093, + "grad_norm": 868.8826904296875, + "learning_rate": 2.3516956482958503e-05, + "loss": 80.4411, + "step": 124840 + }, + { + "epoch": 0.504409798114877, + "grad_norm": 1008.8751831054688, + "learning_rate": 2.3514207436815896e-05, + "loss": 76.7026, + "step": 124850 + }, + { + "epoch": 0.5044501993802446, + "grad_norm": 1173.9139404296875, + "learning_rate": 2.3511458322162e-05, + "loss": 91.7644, + "step": 124860 + }, + { + "epoch": 0.5044906006456122, + "grad_norm": 732.0101318359375, + "learning_rate": 2.350870913905042e-05, + "loss": 101.7711, + "step": 124870 + }, + { + "epoch": 0.5045310019109799, + "grad_norm": 624.877685546875, + "learning_rate": 2.3505959887534752e-05, + "loss": 53.164, + "step": 124880 + }, + { + "epoch": 0.5045714031763475, + "grad_norm": 366.80926513671875, + "learning_rate": 2.3503210567668582e-05, + "loss": 65.904, + "step": 124890 + }, + { + "epoch": 0.5046118044417152, + "grad_norm": 770.7205810546875, + "learning_rate": 2.3500461179505526e-05, + "loss": 69.2114, + "step": 124900 + }, + { + "epoch": 0.5046522057070827, + "grad_norm": 1048.2762451171875, + "learning_rate": 2.349771172309917e-05, + "loss": 95.8542, + "step": 124910 + }, + { + "epoch": 0.5046926069724503, + "grad_norm": 586.1166381835938, + "learning_rate": 2.349496219850313e-05, + "loss": 68.0398, + "step": 124920 + }, + { + "epoch": 0.504733008237818, + "grad_norm": 1318.9345703125, + "learning_rate": 2.3492212605770996e-05, + "loss": 91.6393, + "step": 124930 + }, + { + "epoch": 0.5047734095031856, + "grad_norm": 365.20361328125, + "learning_rate": 2.348946294495639e-05, + "loss": 56.961, + "step": 124940 + }, + { + "epoch": 0.5048138107685533, + "grad_norm": 610.0076293945312, + "learning_rate": 2.3486713216112896e-05, + "loss": 74.2277, + "step": 124950 + }, + { + "epoch": 0.5048542120339209, + "grad_norm": 998.9900512695312, + "learning_rate": 2.3483963419294146e-05, + "loss": 43.2463, + "step": 124960 + }, + { + "epoch": 0.5048946132992885, + "grad_norm": 721.56591796875, + "learning_rate": 2.3481213554553724e-05, + "loss": 49.8918, + "step": 124970 + }, + { + "epoch": 0.5049350145646562, + "grad_norm": 958.49853515625, + "learning_rate": 2.3478463621945252e-05, + "loss": 63.5593, + "step": 124980 + }, + { + "epoch": 0.5049754158300238, + "grad_norm": 702.3695068359375, + "learning_rate": 2.347571362152234e-05, + "loss": 82.9215, + "step": 124990 + }, + { + "epoch": 0.5050158170953915, + "grad_norm": 261.6616516113281, + "learning_rate": 2.3472963553338614e-05, + "loss": 66.7247, + "step": 125000 + }, + { + "epoch": 0.5050562183607591, + "grad_norm": 526.8492431640625, + "learning_rate": 2.3470213417447662e-05, + "loss": 94.0547, + "step": 125010 + }, + { + "epoch": 0.5050966196261267, + "grad_norm": 960.2244262695312, + "learning_rate": 2.3467463213903118e-05, + "loss": 98.6385, + "step": 125020 + }, + { + "epoch": 0.5051370208914944, + "grad_norm": 631.2443237304688, + "learning_rate": 2.3464712942758592e-05, + "loss": 62.8552, + "step": 125030 + }, + { + "epoch": 0.5051774221568619, + "grad_norm": 981.9104614257812, + "learning_rate": 2.346196260406771e-05, + "loss": 72.4889, + "step": 125040 + }, + { + "epoch": 0.5052178234222295, + "grad_norm": 1006.9649047851562, + "learning_rate": 2.3459212197884078e-05, + "loss": 69.4985, + "step": 125050 + }, + { + "epoch": 0.5052582246875972, + "grad_norm": 1121.0289306640625, + "learning_rate": 2.345646172426132e-05, + "loss": 86.2625, + "step": 125060 + }, + { + "epoch": 0.5052986259529648, + "grad_norm": 827.148193359375, + "learning_rate": 2.3453711183253063e-05, + "loss": 57.9993, + "step": 125070 + }, + { + "epoch": 0.5053390272183325, + "grad_norm": 916.59619140625, + "learning_rate": 2.3450960574912934e-05, + "loss": 84.352, + "step": 125080 + }, + { + "epoch": 0.5053794284837001, + "grad_norm": 1251.605712890625, + "learning_rate": 2.344820989929455e-05, + "loss": 80.2015, + "step": 125090 + }, + { + "epoch": 0.5054198297490677, + "grad_norm": 654.6368408203125, + "learning_rate": 2.3445459156451538e-05, + "loss": 78.3863, + "step": 125100 + }, + { + "epoch": 0.5054602310144354, + "grad_norm": 843.285888671875, + "learning_rate": 2.3442708346437523e-05, + "loss": 50.0393, + "step": 125110 + }, + { + "epoch": 0.505500632279803, + "grad_norm": 808.625244140625, + "learning_rate": 2.343995746930614e-05, + "loss": 75.8548, + "step": 125120 + }, + { + "epoch": 0.5055410335451707, + "grad_norm": 1086.694091796875, + "learning_rate": 2.3437206525111014e-05, + "loss": 64.0335, + "step": 125130 + }, + { + "epoch": 0.5055814348105383, + "grad_norm": 622.2830200195312, + "learning_rate": 2.343445551390578e-05, + "loss": 72.2689, + "step": 125140 + }, + { + "epoch": 0.5056218360759059, + "grad_norm": 915.3798828125, + "learning_rate": 2.343170443574406e-05, + "loss": 105.8572, + "step": 125150 + }, + { + "epoch": 0.5056622373412736, + "grad_norm": 1232.4456787109375, + "learning_rate": 2.34289532906795e-05, + "loss": 115.1442, + "step": 125160 + }, + { + "epoch": 0.5057026386066411, + "grad_norm": 453.69189453125, + "learning_rate": 2.342620207876573e-05, + "loss": 85.0402, + "step": 125170 + }, + { + "epoch": 0.5057430398720087, + "grad_norm": 861.4758911132812, + "learning_rate": 2.3423450800056385e-05, + "loss": 82.6469, + "step": 125180 + }, + { + "epoch": 0.5057834411373764, + "grad_norm": 308.0115966796875, + "learning_rate": 2.3420699454605115e-05, + "loss": 55.9154, + "step": 125190 + }, + { + "epoch": 0.505823842402744, + "grad_norm": 1043.7642822265625, + "learning_rate": 2.3417948042465536e-05, + "loss": 81.0632, + "step": 125200 + }, + { + "epoch": 0.5058642436681117, + "grad_norm": 216.85911560058594, + "learning_rate": 2.3415196563691307e-05, + "loss": 56.1708, + "step": 125210 + }, + { + "epoch": 0.5059046449334793, + "grad_norm": 680.9257202148438, + "learning_rate": 2.3412445018336057e-05, + "loss": 68.3688, + "step": 125220 + }, + { + "epoch": 0.505945046198847, + "grad_norm": 884.7078857421875, + "learning_rate": 2.3409693406453437e-05, + "loss": 94.9706, + "step": 125230 + }, + { + "epoch": 0.5059854474642146, + "grad_norm": 912.4779052734375, + "learning_rate": 2.340694172809708e-05, + "loss": 69.5853, + "step": 125240 + }, + { + "epoch": 0.5060258487295822, + "grad_norm": 834.961181640625, + "learning_rate": 2.3404189983320652e-05, + "loss": 80.782, + "step": 125250 + }, + { + "epoch": 0.5060662499949499, + "grad_norm": 814.3042602539062, + "learning_rate": 2.3401438172177783e-05, + "loss": 69.3968, + "step": 125260 + }, + { + "epoch": 0.5061066512603175, + "grad_norm": 556.5704956054688, + "learning_rate": 2.3398686294722128e-05, + "loss": 46.1357, + "step": 125270 + }, + { + "epoch": 0.5061470525256851, + "grad_norm": 378.234130859375, + "learning_rate": 2.3395934351007333e-05, + "loss": 82.6538, + "step": 125280 + }, + { + "epoch": 0.5061874537910528, + "grad_norm": 701.1221313476562, + "learning_rate": 2.3393182341087055e-05, + "loss": 78.2324, + "step": 125290 + }, + { + "epoch": 0.5062278550564203, + "grad_norm": 870.015869140625, + "learning_rate": 2.339043026501493e-05, + "loss": 57.5684, + "step": 125300 + }, + { + "epoch": 0.506268256321788, + "grad_norm": 727.7157592773438, + "learning_rate": 2.3387678122844622e-05, + "loss": 60.8073, + "step": 125310 + }, + { + "epoch": 0.5063086575871556, + "grad_norm": 843.9540405273438, + "learning_rate": 2.3384925914629787e-05, + "loss": 56.7931, + "step": 125320 + }, + { + "epoch": 0.5063490588525232, + "grad_norm": 1043.8494873046875, + "learning_rate": 2.3382173640424085e-05, + "loss": 84.9701, + "step": 125330 + }, + { + "epoch": 0.5063894601178909, + "grad_norm": 648.1075439453125, + "learning_rate": 2.337942130028116e-05, + "loss": 72.0501, + "step": 125340 + }, + { + "epoch": 0.5064298613832585, + "grad_norm": 796.8192138671875, + "learning_rate": 2.3376668894254677e-05, + "loss": 61.5104, + "step": 125350 + }, + { + "epoch": 0.5064702626486262, + "grad_norm": 539.1845703125, + "learning_rate": 2.3373916422398302e-05, + "loss": 61.6045, + "step": 125360 + }, + { + "epoch": 0.5065106639139938, + "grad_norm": 597.8878173828125, + "learning_rate": 2.337116388476569e-05, + "loss": 63.0586, + "step": 125370 + }, + { + "epoch": 0.5065510651793614, + "grad_norm": 848.0739135742188, + "learning_rate": 2.3368411281410493e-05, + "loss": 90.3213, + "step": 125380 + }, + { + "epoch": 0.5065914664447291, + "grad_norm": 713.491943359375, + "learning_rate": 2.3365658612386387e-05, + "loss": 65.6314, + "step": 125390 + }, + { + "epoch": 0.5066318677100967, + "grad_norm": 533.8340454101562, + "learning_rate": 2.3362905877747033e-05, + "loss": 99.2521, + "step": 125400 + }, + { + "epoch": 0.5066722689754644, + "grad_norm": 1015.9691772460938, + "learning_rate": 2.3360153077546102e-05, + "loss": 63.573, + "step": 125410 + }, + { + "epoch": 0.506712670240832, + "grad_norm": 1385.2177734375, + "learning_rate": 2.3357400211837252e-05, + "loss": 73.7976, + "step": 125420 + }, + { + "epoch": 0.5067530715061995, + "grad_norm": 833.7890625, + "learning_rate": 2.3354647280674156e-05, + "loss": 58.1836, + "step": 125430 + }, + { + "epoch": 0.5067934727715672, + "grad_norm": 1086.9378662109375, + "learning_rate": 2.3351894284110483e-05, + "loss": 79.2673, + "step": 125440 + }, + { + "epoch": 0.5068338740369348, + "grad_norm": 659.2811279296875, + "learning_rate": 2.3349141222199912e-05, + "loss": 63.4763, + "step": 125450 + }, + { + "epoch": 0.5068742753023024, + "grad_norm": 566.5320434570312, + "learning_rate": 2.334638809499611e-05, + "loss": 63.4515, + "step": 125460 + }, + { + "epoch": 0.5069146765676701, + "grad_norm": 778.6336059570312, + "learning_rate": 2.3343634902552743e-05, + "loss": 73.9686, + "step": 125470 + }, + { + "epoch": 0.5069550778330377, + "grad_norm": 625.6480102539062, + "learning_rate": 2.3340881644923493e-05, + "loss": 83.409, + "step": 125480 + }, + { + "epoch": 0.5069954790984054, + "grad_norm": 719.005126953125, + "learning_rate": 2.333812832216204e-05, + "loss": 59.1163, + "step": 125490 + }, + { + "epoch": 0.507035880363773, + "grad_norm": 726.1068115234375, + "learning_rate": 2.333537493432205e-05, + "loss": 77.6968, + "step": 125500 + }, + { + "epoch": 0.5070762816291406, + "grad_norm": 553.2169189453125, + "learning_rate": 2.333262148145721e-05, + "loss": 97.6725, + "step": 125510 + }, + { + "epoch": 0.5071166828945083, + "grad_norm": 884.8190307617188, + "learning_rate": 2.3329867963621196e-05, + "loss": 53.486, + "step": 125520 + }, + { + "epoch": 0.5071570841598759, + "grad_norm": 468.8116149902344, + "learning_rate": 2.3327114380867698e-05, + "loss": 55.7225, + "step": 125530 + }, + { + "epoch": 0.5071974854252436, + "grad_norm": 659.50048828125, + "learning_rate": 2.3324360733250395e-05, + "loss": 46.4611, + "step": 125540 + }, + { + "epoch": 0.5072378866906111, + "grad_norm": 890.7202758789062, + "learning_rate": 2.332160702082296e-05, + "loss": 107.3208, + "step": 125550 + }, + { + "epoch": 0.5072782879559787, + "grad_norm": 650.31298828125, + "learning_rate": 2.3318853243639086e-05, + "loss": 71.8805, + "step": 125560 + }, + { + "epoch": 0.5073186892213464, + "grad_norm": 680.4561157226562, + "learning_rate": 2.3316099401752466e-05, + "loss": 90.6824, + "step": 125570 + }, + { + "epoch": 0.507359090486714, + "grad_norm": 628.091796875, + "learning_rate": 2.3313345495216778e-05, + "loss": 81.8013, + "step": 125580 + }, + { + "epoch": 0.5073994917520817, + "grad_norm": 584.0039672851562, + "learning_rate": 2.331059152408571e-05, + "loss": 59.8454, + "step": 125590 + }, + { + "epoch": 0.5074398930174493, + "grad_norm": 1087.4588623046875, + "learning_rate": 2.3307837488412955e-05, + "loss": 69.0356, + "step": 125600 + }, + { + "epoch": 0.5074802942828169, + "grad_norm": 599.2728881835938, + "learning_rate": 2.3305083388252214e-05, + "loss": 75.6856, + "step": 125610 + }, + { + "epoch": 0.5075206955481846, + "grad_norm": 281.5044860839844, + "learning_rate": 2.330232922365716e-05, + "loss": 97.1438, + "step": 125620 + }, + { + "epoch": 0.5075610968135522, + "grad_norm": 847.3013305664062, + "learning_rate": 2.329957499468151e-05, + "loss": 79.9245, + "step": 125630 + }, + { + "epoch": 0.5076014980789199, + "grad_norm": 451.21844482421875, + "learning_rate": 2.329682070137894e-05, + "loss": 90.5284, + "step": 125640 + }, + { + "epoch": 0.5076418993442875, + "grad_norm": 807.6908569335938, + "learning_rate": 2.3294066343803146e-05, + "loss": 98.9036, + "step": 125650 + }, + { + "epoch": 0.5076823006096551, + "grad_norm": 1211.083251953125, + "learning_rate": 2.3291311922007847e-05, + "loss": 71.2345, + "step": 125660 + }, + { + "epoch": 0.5077227018750228, + "grad_norm": 1298.0909423828125, + "learning_rate": 2.328855743604672e-05, + "loss": 62.0741, + "step": 125670 + }, + { + "epoch": 0.5077631031403903, + "grad_norm": 497.5022277832031, + "learning_rate": 2.3285802885973466e-05, + "loss": 71.454, + "step": 125680 + }, + { + "epoch": 0.5078035044057579, + "grad_norm": 1247.1212158203125, + "learning_rate": 2.3283048271841802e-05, + "loss": 61.6947, + "step": 125690 + }, + { + "epoch": 0.5078439056711256, + "grad_norm": 802.89111328125, + "learning_rate": 2.3280293593705422e-05, + "loss": 92.767, + "step": 125700 + }, + { + "epoch": 0.5078843069364932, + "grad_norm": 549.5624389648438, + "learning_rate": 2.327753885161803e-05, + "loss": 81.193, + "step": 125710 + }, + { + "epoch": 0.5079247082018609, + "grad_norm": 521.927734375, + "learning_rate": 2.327478404563333e-05, + "loss": 71.5996, + "step": 125720 + }, + { + "epoch": 0.5079651094672285, + "grad_norm": 333.1406555175781, + "learning_rate": 2.327202917580502e-05, + "loss": 81.1996, + "step": 125730 + }, + { + "epoch": 0.5080055107325961, + "grad_norm": 1065.0086669921875, + "learning_rate": 2.326927424218683e-05, + "loss": 136.4643, + "step": 125740 + }, + { + "epoch": 0.5080459119979638, + "grad_norm": 383.2929992675781, + "learning_rate": 2.3266519244832448e-05, + "loss": 78.21, + "step": 125750 + }, + { + "epoch": 0.5080863132633314, + "grad_norm": 529.945068359375, + "learning_rate": 2.3263764183795593e-05, + "loss": 73.6441, + "step": 125760 + }, + { + "epoch": 0.5081267145286991, + "grad_norm": 394.6263732910156, + "learning_rate": 2.3261009059129972e-05, + "loss": 61.8521, + "step": 125770 + }, + { + "epoch": 0.5081671157940667, + "grad_norm": 434.06817626953125, + "learning_rate": 2.325825387088931e-05, + "loss": 113.4853, + "step": 125780 + }, + { + "epoch": 0.5082075170594343, + "grad_norm": 824.635009765625, + "learning_rate": 2.32554986191273e-05, + "loss": 79.3799, + "step": 125790 + }, + { + "epoch": 0.508247918324802, + "grad_norm": 678.3859252929688, + "learning_rate": 2.3252743303897677e-05, + "loss": 65.091, + "step": 125800 + }, + { + "epoch": 0.5082883195901695, + "grad_norm": 839.47216796875, + "learning_rate": 2.3249987925254143e-05, + "loss": 72.7555, + "step": 125810 + }, + { + "epoch": 0.5083287208555372, + "grad_norm": 638.7647094726562, + "learning_rate": 2.3247232483250427e-05, + "loss": 74.8891, + "step": 125820 + }, + { + "epoch": 0.5083691221209048, + "grad_norm": 866.5538940429688, + "learning_rate": 2.3244476977940234e-05, + "loss": 58.449, + "step": 125830 + }, + { + "epoch": 0.5084095233862724, + "grad_norm": 488.7957458496094, + "learning_rate": 2.3241721409377297e-05, + "loss": 84.2317, + "step": 125840 + }, + { + "epoch": 0.5084499246516401, + "grad_norm": 664.722412109375, + "learning_rate": 2.3238965777615324e-05, + "loss": 84.2498, + "step": 125850 + }, + { + "epoch": 0.5084903259170077, + "grad_norm": 853.5700073242188, + "learning_rate": 2.3236210082708058e-05, + "loss": 74.8231, + "step": 125860 + }, + { + "epoch": 0.5085307271823754, + "grad_norm": 422.8149108886719, + "learning_rate": 2.3233454324709204e-05, + "loss": 72.7359, + "step": 125870 + }, + { + "epoch": 0.508571128447743, + "grad_norm": 428.75799560546875, + "learning_rate": 2.32306985036725e-05, + "loss": 67.1801, + "step": 125880 + }, + { + "epoch": 0.5086115297131106, + "grad_norm": 918.4115600585938, + "learning_rate": 2.322794261965166e-05, + "loss": 79.5006, + "step": 125890 + }, + { + "epoch": 0.5086519309784783, + "grad_norm": 435.0486755371094, + "learning_rate": 2.3225186672700418e-05, + "loss": 44.2631, + "step": 125900 + }, + { + "epoch": 0.5086923322438459, + "grad_norm": 551.4952392578125, + "learning_rate": 2.3222430662872496e-05, + "loss": 78.3664, + "step": 125910 + }, + { + "epoch": 0.5087327335092136, + "grad_norm": 511.10919189453125, + "learning_rate": 2.321967459022163e-05, + "loss": 57.4601, + "step": 125920 + }, + { + "epoch": 0.5087731347745812, + "grad_norm": 1262.535400390625, + "learning_rate": 2.3216918454801555e-05, + "loss": 75.7196, + "step": 125930 + }, + { + "epoch": 0.5088135360399487, + "grad_norm": 572.989990234375, + "learning_rate": 2.3214162256666e-05, + "loss": 85.3681, + "step": 125940 + }, + { + "epoch": 0.5088539373053164, + "grad_norm": 1690.1878662109375, + "learning_rate": 2.321140599586869e-05, + "loss": 113.1842, + "step": 125950 + }, + { + "epoch": 0.508894338570684, + "grad_norm": 445.2865905761719, + "learning_rate": 2.3208649672463367e-05, + "loss": 95.7158, + "step": 125960 + }, + { + "epoch": 0.5089347398360516, + "grad_norm": 589.657958984375, + "learning_rate": 2.3205893286503777e-05, + "loss": 51.903, + "step": 125970 + }, + { + "epoch": 0.5089751411014193, + "grad_norm": 1367.8341064453125, + "learning_rate": 2.320313683804364e-05, + "loss": 80.1345, + "step": 125980 + }, + { + "epoch": 0.5090155423667869, + "grad_norm": 543.2908935546875, + "learning_rate": 2.3200380327136705e-05, + "loss": 80.5191, + "step": 125990 + }, + { + "epoch": 0.5090559436321546, + "grad_norm": 876.8602294921875, + "learning_rate": 2.31976237538367e-05, + "loss": 81.1524, + "step": 126000 + }, + { + "epoch": 0.5090963448975222, + "grad_norm": 1285.987060546875, + "learning_rate": 2.3194867118197376e-05, + "loss": 85.9419, + "step": 126010 + }, + { + "epoch": 0.5091367461628898, + "grad_norm": 447.32305908203125, + "learning_rate": 2.3192110420272478e-05, + "loss": 77.2044, + "step": 126020 + }, + { + "epoch": 0.5091771474282575, + "grad_norm": 643.3151245117188, + "learning_rate": 2.3189353660115737e-05, + "loss": 104.5755, + "step": 126030 + }, + { + "epoch": 0.5092175486936251, + "grad_norm": 349.8528747558594, + "learning_rate": 2.318659683778091e-05, + "loss": 77.9311, + "step": 126040 + }, + { + "epoch": 0.5092579499589928, + "grad_norm": 586.280029296875, + "learning_rate": 2.3183839953321735e-05, + "loss": 88.0744, + "step": 126050 + }, + { + "epoch": 0.5092983512243604, + "grad_norm": 1297.82421875, + "learning_rate": 2.3181083006791965e-05, + "loss": 82.8723, + "step": 126060 + }, + { + "epoch": 0.5093387524897279, + "grad_norm": 453.59326171875, + "learning_rate": 2.317832599824534e-05, + "loss": 63.267, + "step": 126070 + }, + { + "epoch": 0.5093791537550956, + "grad_norm": 987.6134643554688, + "learning_rate": 2.3175568927735612e-05, + "loss": 58.0137, + "step": 126080 + }, + { + "epoch": 0.5094195550204632, + "grad_norm": 1161.0086669921875, + "learning_rate": 2.3172811795316534e-05, + "loss": 115.2587, + "step": 126090 + }, + { + "epoch": 0.5094599562858309, + "grad_norm": 691.8253173828125, + "learning_rate": 2.317005460104186e-05, + "loss": 71.7124, + "step": 126100 + }, + { + "epoch": 0.5095003575511985, + "grad_norm": 501.4620361328125, + "learning_rate": 2.3167297344965334e-05, + "loss": 77.8389, + "step": 126110 + }, + { + "epoch": 0.5095407588165661, + "grad_norm": 483.8471374511719, + "learning_rate": 2.3164540027140715e-05, + "loss": 91.3925, + "step": 126120 + }, + { + "epoch": 0.5095811600819338, + "grad_norm": 658.811767578125, + "learning_rate": 2.3161782647621764e-05, + "loss": 78.8194, + "step": 126130 + }, + { + "epoch": 0.5096215613473014, + "grad_norm": 957.9981689453125, + "learning_rate": 2.3159025206462233e-05, + "loss": 60.8603, + "step": 126140 + }, + { + "epoch": 0.509661962612669, + "grad_norm": 678.26611328125, + "learning_rate": 2.315626770371588e-05, + "loss": 55.1557, + "step": 126150 + }, + { + "epoch": 0.5097023638780367, + "grad_norm": 1329.445556640625, + "learning_rate": 2.3153510139436458e-05, + "loss": 73.524, + "step": 126160 + }, + { + "epoch": 0.5097427651434043, + "grad_norm": 759.9807739257812, + "learning_rate": 2.315075251367773e-05, + "loss": 133.9603, + "step": 126170 + }, + { + "epoch": 0.509783166408772, + "grad_norm": 682.0528564453125, + "learning_rate": 2.314799482649346e-05, + "loss": 63.309, + "step": 126180 + }, + { + "epoch": 0.5098235676741395, + "grad_norm": 1163.62109375, + "learning_rate": 2.314523707793742e-05, + "loss": 65.3546, + "step": 126190 + }, + { + "epoch": 0.5098639689395071, + "grad_norm": 733.1469116210938, + "learning_rate": 2.314247926806336e-05, + "loss": 62.4089, + "step": 126200 + }, + { + "epoch": 0.5099043702048748, + "grad_norm": 963.3950805664062, + "learning_rate": 2.3139721396925042e-05, + "loss": 96.0767, + "step": 126210 + }, + { + "epoch": 0.5099447714702424, + "grad_norm": 557.8453369140625, + "learning_rate": 2.3136963464576248e-05, + "loss": 61.2593, + "step": 126220 + }, + { + "epoch": 0.5099851727356101, + "grad_norm": 3611.396728515625, + "learning_rate": 2.3134205471070733e-05, + "loss": 109.2723, + "step": 126230 + }, + { + "epoch": 0.5100255740009777, + "grad_norm": 423.0957336425781, + "learning_rate": 2.3131447416462267e-05, + "loss": 69.3753, + "step": 126240 + }, + { + "epoch": 0.5100659752663453, + "grad_norm": 967.181640625, + "learning_rate": 2.312868930080462e-05, + "loss": 79.8809, + "step": 126250 + }, + { + "epoch": 0.510106376531713, + "grad_norm": 1078.2294921875, + "learning_rate": 2.3125931124151567e-05, + "loss": 54.0354, + "step": 126260 + }, + { + "epoch": 0.5101467777970806, + "grad_norm": 982.7615356445312, + "learning_rate": 2.3123172886556876e-05, + "loss": 89.7015, + "step": 126270 + }, + { + "epoch": 0.5101871790624483, + "grad_norm": 867.3275756835938, + "learning_rate": 2.3120414588074326e-05, + "loss": 90.1115, + "step": 126280 + }, + { + "epoch": 0.5102275803278159, + "grad_norm": 772.1019897460938, + "learning_rate": 2.311765622875768e-05, + "loss": 80.3877, + "step": 126290 + }, + { + "epoch": 0.5102679815931835, + "grad_norm": 643.1610107421875, + "learning_rate": 2.3114897808660725e-05, + "loss": 102.8637, + "step": 126300 + }, + { + "epoch": 0.5103083828585512, + "grad_norm": 458.5145568847656, + "learning_rate": 2.3112139327837233e-05, + "loss": 88.0923, + "step": 126310 + }, + { + "epoch": 0.5103487841239187, + "grad_norm": 683.0382080078125, + "learning_rate": 2.3109380786340988e-05, + "loss": 105.4016, + "step": 126320 + }, + { + "epoch": 0.5103891853892863, + "grad_norm": 345.6849670410156, + "learning_rate": 2.3106622184225756e-05, + "loss": 64.033, + "step": 126330 + }, + { + "epoch": 0.510429586654654, + "grad_norm": 1464.1981201171875, + "learning_rate": 2.310386352154532e-05, + "loss": 100.0413, + "step": 126340 + }, + { + "epoch": 0.5104699879200216, + "grad_norm": 519.0050048828125, + "learning_rate": 2.310110479835348e-05, + "loss": 67.2137, + "step": 126350 + }, + { + "epoch": 0.5105103891853893, + "grad_norm": 361.1784973144531, + "learning_rate": 2.3098346014704e-05, + "loss": 100.8945, + "step": 126360 + }, + { + "epoch": 0.5105507904507569, + "grad_norm": 646.7041625976562, + "learning_rate": 2.3095587170650665e-05, + "loss": 106.2932, + "step": 126370 + }, + { + "epoch": 0.5105911917161245, + "grad_norm": 874.3350219726562, + "learning_rate": 2.3092828266247267e-05, + "loss": 90.0217, + "step": 126380 + }, + { + "epoch": 0.5106315929814922, + "grad_norm": 434.38287353515625, + "learning_rate": 2.309006930154759e-05, + "loss": 56.5771, + "step": 126390 + }, + { + "epoch": 0.5106719942468598, + "grad_norm": 465.22412109375, + "learning_rate": 2.3087310276605428e-05, + "loss": 60.274, + "step": 126400 + }, + { + "epoch": 0.5107123955122275, + "grad_norm": 580.7153930664062, + "learning_rate": 2.3084551191474554e-05, + "loss": 80.853, + "step": 126410 + }, + { + "epoch": 0.5107527967775951, + "grad_norm": 312.6272888183594, + "learning_rate": 2.308179204620877e-05, + "loss": 64.2978, + "step": 126420 + }, + { + "epoch": 0.5107931980429627, + "grad_norm": 858.8868408203125, + "learning_rate": 2.3079032840861863e-05, + "loss": 59.3323, + "step": 126430 + }, + { + "epoch": 0.5108335993083304, + "grad_norm": 766.7066650390625, + "learning_rate": 2.3076273575487626e-05, + "loss": 79.6948, + "step": 126440 + }, + { + "epoch": 0.5108740005736979, + "grad_norm": 3136.072998046875, + "learning_rate": 2.3073514250139848e-05, + "loss": 78.0294, + "step": 126450 + }, + { + "epoch": 0.5109144018390656, + "grad_norm": 1437.3848876953125, + "learning_rate": 2.3070754864872332e-05, + "loss": 125.8244, + "step": 126460 + }, + { + "epoch": 0.5109548031044332, + "grad_norm": 574.3046264648438, + "learning_rate": 2.3067995419738866e-05, + "loss": 72.5579, + "step": 126470 + }, + { + "epoch": 0.5109952043698008, + "grad_norm": 460.64715576171875, + "learning_rate": 2.3065235914793252e-05, + "loss": 59.1845, + "step": 126480 + }, + { + "epoch": 0.5110356056351685, + "grad_norm": 883.6168823242188, + "learning_rate": 2.3062476350089287e-05, + "loss": 70.5711, + "step": 126490 + }, + { + "epoch": 0.5110760069005361, + "grad_norm": 768.465576171875, + "learning_rate": 2.3059716725680766e-05, + "loss": 69.2414, + "step": 126500 + }, + { + "epoch": 0.5111164081659038, + "grad_norm": 1435.8023681640625, + "learning_rate": 2.3056957041621493e-05, + "loss": 87.235, + "step": 126510 + }, + { + "epoch": 0.5111568094312714, + "grad_norm": 949.3103637695312, + "learning_rate": 2.3054197297965268e-05, + "loss": 65.416, + "step": 126520 + }, + { + "epoch": 0.511197210696639, + "grad_norm": 661.1835327148438, + "learning_rate": 2.3051437494765892e-05, + "loss": 73.7557, + "step": 126530 + }, + { + "epoch": 0.5112376119620067, + "grad_norm": 1081.3138427734375, + "learning_rate": 2.3048677632077173e-05, + "loss": 84.0155, + "step": 126540 + }, + { + "epoch": 0.5112780132273743, + "grad_norm": 233.6175994873047, + "learning_rate": 2.3045917709952915e-05, + "loss": 50.919, + "step": 126550 + }, + { + "epoch": 0.511318414492742, + "grad_norm": 543.8875122070312, + "learning_rate": 2.3043157728446924e-05, + "loss": 100.4872, + "step": 126560 + }, + { + "epoch": 0.5113588157581096, + "grad_norm": 791.6467895507812, + "learning_rate": 2.304039768761301e-05, + "loss": 74.2042, + "step": 126570 + }, + { + "epoch": 0.5113992170234771, + "grad_norm": 1607.880859375, + "learning_rate": 2.303763758750497e-05, + "loss": 96.1971, + "step": 126580 + }, + { + "epoch": 0.5114396182888448, + "grad_norm": 483.92205810546875, + "learning_rate": 2.3034877428176628e-05, + "loss": 80.3329, + "step": 126590 + }, + { + "epoch": 0.5114800195542124, + "grad_norm": 500.1396484375, + "learning_rate": 2.3032117209681782e-05, + "loss": 69.0772, + "step": 126600 + }, + { + "epoch": 0.51152042081958, + "grad_norm": 1326.7479248046875, + "learning_rate": 2.3029356932074252e-05, + "loss": 56.5297, + "step": 126610 + }, + { + "epoch": 0.5115608220849477, + "grad_norm": 897.68310546875, + "learning_rate": 2.3026596595407848e-05, + "loss": 82.3797, + "step": 126620 + }, + { + "epoch": 0.5116012233503153, + "grad_norm": 356.3905944824219, + "learning_rate": 2.3023836199736386e-05, + "loss": 76.3198, + "step": 126630 + }, + { + "epoch": 0.511641624615683, + "grad_norm": 642.3408813476562, + "learning_rate": 2.3021075745113685e-05, + "loss": 76.1383, + "step": 126640 + }, + { + "epoch": 0.5116820258810506, + "grad_norm": 480.7047424316406, + "learning_rate": 2.301831523159355e-05, + "loss": 72.4777, + "step": 126650 + }, + { + "epoch": 0.5117224271464182, + "grad_norm": 377.42578125, + "learning_rate": 2.301555465922981e-05, + "loss": 74.129, + "step": 126660 + }, + { + "epoch": 0.5117628284117859, + "grad_norm": 796.801025390625, + "learning_rate": 2.3012794028076274e-05, + "loss": 129.8187, + "step": 126670 + }, + { + "epoch": 0.5118032296771535, + "grad_norm": 742.3406982421875, + "learning_rate": 2.301003333818678e-05, + "loss": 65.815, + "step": 126680 + }, + { + "epoch": 0.5118436309425212, + "grad_norm": 967.3101806640625, + "learning_rate": 2.300727258961512e-05, + "loss": 75.2797, + "step": 126690 + }, + { + "epoch": 0.5118840322078888, + "grad_norm": 1060.47314453125, + "learning_rate": 2.3004511782415145e-05, + "loss": 59.0808, + "step": 126700 + }, + { + "epoch": 0.5119244334732563, + "grad_norm": 402.1617431640625, + "learning_rate": 2.300175091664066e-05, + "loss": 66.7389, + "step": 126710 + }, + { + "epoch": 0.511964834738624, + "grad_norm": 1233.6219482421875, + "learning_rate": 2.2998989992345496e-05, + "loss": 54.5187, + "step": 126720 + }, + { + "epoch": 0.5120052360039916, + "grad_norm": 912.5408325195312, + "learning_rate": 2.299622900958348e-05, + "loss": 89.5903, + "step": 126730 + }, + { + "epoch": 0.5120456372693593, + "grad_norm": 804.4415283203125, + "learning_rate": 2.299346796840844e-05, + "loss": 58.4938, + "step": 126740 + }, + { + "epoch": 0.5120860385347269, + "grad_norm": 548.144775390625, + "learning_rate": 2.2990706868874196e-05, + "loss": 58.6598, + "step": 126750 + }, + { + "epoch": 0.5121264398000945, + "grad_norm": 1092.3233642578125, + "learning_rate": 2.298794571103458e-05, + "loss": 78.084, + "step": 126760 + }, + { + "epoch": 0.5121668410654622, + "grad_norm": 503.9816589355469, + "learning_rate": 2.298518449494343e-05, + "loss": 114.1756, + "step": 126770 + }, + { + "epoch": 0.5122072423308298, + "grad_norm": 753.3436889648438, + "learning_rate": 2.2982423220654565e-05, + "loss": 65.8313, + "step": 126780 + }, + { + "epoch": 0.5122476435961975, + "grad_norm": 500.0668640136719, + "learning_rate": 2.2979661888221828e-05, + "loss": 53.4223, + "step": 126790 + }, + { + "epoch": 0.5122880448615651, + "grad_norm": 1120.72119140625, + "learning_rate": 2.2976900497699047e-05, + "loss": 138.7726, + "step": 126800 + }, + { + "epoch": 0.5123284461269327, + "grad_norm": 868.3945922851562, + "learning_rate": 2.2974139049140058e-05, + "loss": 70.2053, + "step": 126810 + }, + { + "epoch": 0.5123688473923004, + "grad_norm": 670.7530517578125, + "learning_rate": 2.2971377542598696e-05, + "loss": 61.1059, + "step": 126820 + }, + { + "epoch": 0.5124092486576679, + "grad_norm": 551.9815673828125, + "learning_rate": 2.2968615978128802e-05, + "loss": 96.6111, + "step": 126830 + }, + { + "epoch": 0.5124496499230355, + "grad_norm": 730.4855346679688, + "learning_rate": 2.2965854355784213e-05, + "loss": 60.9909, + "step": 126840 + }, + { + "epoch": 0.5124900511884032, + "grad_norm": 863.8174438476562, + "learning_rate": 2.2963092675618763e-05, + "loss": 73.7566, + "step": 126850 + }, + { + "epoch": 0.5125304524537708, + "grad_norm": 735.92431640625, + "learning_rate": 2.2960330937686296e-05, + "loss": 66.0734, + "step": 126860 + }, + { + "epoch": 0.5125708537191385, + "grad_norm": 776.1450805664062, + "learning_rate": 2.295756914204065e-05, + "loss": 83.421, + "step": 126870 + }, + { + "epoch": 0.5126112549845061, + "grad_norm": 528.0159301757812, + "learning_rate": 2.2954807288735678e-05, + "loss": 65.7211, + "step": 126880 + }, + { + "epoch": 0.5126516562498737, + "grad_norm": 470.9817199707031, + "learning_rate": 2.295204537782521e-05, + "loss": 70.8748, + "step": 126890 + }, + { + "epoch": 0.5126920575152414, + "grad_norm": 415.53778076171875, + "learning_rate": 2.2949283409363103e-05, + "loss": 55.8081, + "step": 126900 + }, + { + "epoch": 0.512732458780609, + "grad_norm": 520.0337524414062, + "learning_rate": 2.2946521383403194e-05, + "loss": 65.0671, + "step": 126910 + }, + { + "epoch": 0.5127728600459767, + "grad_norm": 1074.77978515625, + "learning_rate": 2.2943759299999334e-05, + "loss": 58.5867, + "step": 126920 + }, + { + "epoch": 0.5128132613113443, + "grad_norm": 715.8361206054688, + "learning_rate": 2.2940997159205373e-05, + "loss": 106.8144, + "step": 126930 + }, + { + "epoch": 0.5128536625767119, + "grad_norm": 641.4935913085938, + "learning_rate": 2.293823496107516e-05, + "loss": 69.8603, + "step": 126940 + }, + { + "epoch": 0.5128940638420796, + "grad_norm": 725.38134765625, + "learning_rate": 2.2935472705662533e-05, + "loss": 92.3135, + "step": 126950 + }, + { + "epoch": 0.5129344651074471, + "grad_norm": 421.2032470703125, + "learning_rate": 2.2932710393021365e-05, + "loss": 86.1892, + "step": 126960 + }, + { + "epoch": 0.5129748663728148, + "grad_norm": 726.4468994140625, + "learning_rate": 2.2929948023205494e-05, + "loss": 100.9282, + "step": 126970 + }, + { + "epoch": 0.5130152676381824, + "grad_norm": 565.4813232421875, + "learning_rate": 2.2927185596268777e-05, + "loss": 67.3805, + "step": 126980 + }, + { + "epoch": 0.51305566890355, + "grad_norm": 572.8099365234375, + "learning_rate": 2.2924423112265072e-05, + "loss": 68.5574, + "step": 126990 + }, + { + "epoch": 0.5130960701689177, + "grad_norm": 692.7301635742188, + "learning_rate": 2.2921660571248237e-05, + "loss": 94.6221, + "step": 127000 + }, + { + "epoch": 0.5131364714342853, + "grad_norm": 348.12799072265625, + "learning_rate": 2.2918897973272122e-05, + "loss": 56.6276, + "step": 127010 + }, + { + "epoch": 0.513176872699653, + "grad_norm": 1107.42626953125, + "learning_rate": 2.2916135318390582e-05, + "loss": 82.4434, + "step": 127020 + }, + { + "epoch": 0.5132172739650206, + "grad_norm": 713.937255859375, + "learning_rate": 2.2913372606657485e-05, + "loss": 42.596, + "step": 127030 + }, + { + "epoch": 0.5132576752303882, + "grad_norm": 1060.7271728515625, + "learning_rate": 2.2910609838126696e-05, + "loss": 105.1253, + "step": 127040 + }, + { + "epoch": 0.5132980764957559, + "grad_norm": 730.0079345703125, + "learning_rate": 2.2907847012852064e-05, + "loss": 72.2916, + "step": 127050 + }, + { + "epoch": 0.5133384777611235, + "grad_norm": 425.6758728027344, + "learning_rate": 2.2905084130887456e-05, + "loss": 59.6958, + "step": 127060 + }, + { + "epoch": 0.5133788790264912, + "grad_norm": 944.4654541015625, + "learning_rate": 2.290232119228674e-05, + "loss": 57.6145, + "step": 127070 + }, + { + "epoch": 0.5134192802918588, + "grad_norm": 885.0126953125, + "learning_rate": 2.289955819710378e-05, + "loss": 82.7746, + "step": 127080 + }, + { + "epoch": 0.5134596815572263, + "grad_norm": 849.8751220703125, + "learning_rate": 2.289679514539244e-05, + "loss": 66.4935, + "step": 127090 + }, + { + "epoch": 0.513500082822594, + "grad_norm": 845.4713134765625, + "learning_rate": 2.2894032037206585e-05, + "loss": 75.5988, + "step": 127100 + }, + { + "epoch": 0.5135404840879616, + "grad_norm": 577.295166015625, + "learning_rate": 2.2891268872600082e-05, + "loss": 60.9919, + "step": 127110 + }, + { + "epoch": 0.5135808853533292, + "grad_norm": 1307.258544921875, + "learning_rate": 2.2888505651626813e-05, + "loss": 65.9796, + "step": 127120 + }, + { + "epoch": 0.5136212866186969, + "grad_norm": 533.4025268554688, + "learning_rate": 2.2885742374340632e-05, + "loss": 83.9973, + "step": 127130 + }, + { + "epoch": 0.5136616878840645, + "grad_norm": 453.2850646972656, + "learning_rate": 2.288297904079542e-05, + "loss": 77.2695, + "step": 127140 + }, + { + "epoch": 0.5137020891494322, + "grad_norm": 1510.9627685546875, + "learning_rate": 2.2880215651045044e-05, + "loss": 103.83, + "step": 127150 + }, + { + "epoch": 0.5137424904147998, + "grad_norm": 444.4743957519531, + "learning_rate": 2.287745220514339e-05, + "loss": 85.5242, + "step": 127160 + }, + { + "epoch": 0.5137828916801674, + "grad_norm": 676.6897583007812, + "learning_rate": 2.2874688703144313e-05, + "loss": 78.0316, + "step": 127170 + }, + { + "epoch": 0.5138232929455351, + "grad_norm": 791.8211669921875, + "learning_rate": 2.2871925145101706e-05, + "loss": 71.0267, + "step": 127180 + }, + { + "epoch": 0.5138636942109027, + "grad_norm": 413.62518310546875, + "learning_rate": 2.286916153106944e-05, + "loss": 94.3967, + "step": 127190 + }, + { + "epoch": 0.5139040954762704, + "grad_norm": 1433.3804931640625, + "learning_rate": 2.286639786110139e-05, + "loss": 92.434, + "step": 127200 + }, + { + "epoch": 0.513944496741638, + "grad_norm": 291.98028564453125, + "learning_rate": 2.2863634135251438e-05, + "loss": 63.7404, + "step": 127210 + }, + { + "epoch": 0.5139848980070055, + "grad_norm": 951.303955078125, + "learning_rate": 2.2860870353573468e-05, + "loss": 67.4417, + "step": 127220 + }, + { + "epoch": 0.5140252992723732, + "grad_norm": 540.9479370117188, + "learning_rate": 2.2858106516121353e-05, + "loss": 67.1702, + "step": 127230 + }, + { + "epoch": 0.5140657005377408, + "grad_norm": 824.615234375, + "learning_rate": 2.2855342622948978e-05, + "loss": 62.1932, + "step": 127240 + }, + { + "epoch": 0.5141061018031085, + "grad_norm": 517.2991943359375, + "learning_rate": 2.2852578674110238e-05, + "loss": 108.4044, + "step": 127250 + }, + { + "epoch": 0.5141465030684761, + "grad_norm": 950.1973876953125, + "learning_rate": 2.2849814669659e-05, + "loss": 81.2345, + "step": 127260 + }, + { + "epoch": 0.5141869043338437, + "grad_norm": 330.2584533691406, + "learning_rate": 2.2847050609649162e-05, + "loss": 95.8909, + "step": 127270 + }, + { + "epoch": 0.5142273055992114, + "grad_norm": 602.4452514648438, + "learning_rate": 2.2844286494134602e-05, + "loss": 80.0448, + "step": 127280 + }, + { + "epoch": 0.514267706864579, + "grad_norm": 505.6949462890625, + "learning_rate": 2.284152232316922e-05, + "loss": 61.7532, + "step": 127290 + }, + { + "epoch": 0.5143081081299467, + "grad_norm": 604.4889526367188, + "learning_rate": 2.283875809680689e-05, + "loss": 77.5452, + "step": 127300 + }, + { + "epoch": 0.5143485093953143, + "grad_norm": 1889.63671875, + "learning_rate": 2.2835993815101513e-05, + "loss": 94.6216, + "step": 127310 + }, + { + "epoch": 0.5143889106606819, + "grad_norm": 258.3216857910156, + "learning_rate": 2.2833229478106974e-05, + "loss": 61.9295, + "step": 127320 + }, + { + "epoch": 0.5144293119260496, + "grad_norm": 895.0501098632812, + "learning_rate": 2.2830465085877173e-05, + "loss": 97.6931, + "step": 127330 + }, + { + "epoch": 0.5144697131914172, + "grad_norm": 709.9806518554688, + "learning_rate": 2.282770063846599e-05, + "loss": 91.0569, + "step": 127340 + }, + { + "epoch": 0.5145101144567847, + "grad_norm": 381.3973388671875, + "learning_rate": 2.282493613592734e-05, + "loss": 55.216, + "step": 127350 + }, + { + "epoch": 0.5145505157221524, + "grad_norm": 479.5680236816406, + "learning_rate": 2.2822171578315093e-05, + "loss": 75.1297, + "step": 127360 + }, + { + "epoch": 0.51459091698752, + "grad_norm": 357.3561096191406, + "learning_rate": 2.2819406965683168e-05, + "loss": 115.3988, + "step": 127370 + }, + { + "epoch": 0.5146313182528877, + "grad_norm": 1215.04638671875, + "learning_rate": 2.2816642298085442e-05, + "loss": 142.3054, + "step": 127380 + }, + { + "epoch": 0.5146717195182553, + "grad_norm": 855.3469848632812, + "learning_rate": 2.2813877575575827e-05, + "loss": 61.1598, + "step": 127390 + }, + { + "epoch": 0.5147121207836229, + "grad_norm": 1116.9388427734375, + "learning_rate": 2.281111279820822e-05, + "loss": 97.4564, + "step": 127400 + }, + { + "epoch": 0.5147525220489906, + "grad_norm": 546.7578735351562, + "learning_rate": 2.280834796603653e-05, + "loss": 59.1521, + "step": 127410 + }, + { + "epoch": 0.5147929233143582, + "grad_norm": 530.3707275390625, + "learning_rate": 2.280558307911464e-05, + "loss": 69.4647, + "step": 127420 + }, + { + "epoch": 0.5148333245797259, + "grad_norm": 910.5916748046875, + "learning_rate": 2.280281813749647e-05, + "loss": 74.3504, + "step": 127430 + }, + { + "epoch": 0.5148737258450935, + "grad_norm": 869.7164916992188, + "learning_rate": 2.2800053141235917e-05, + "loss": 117.7524, + "step": 127440 + }, + { + "epoch": 0.5149141271104611, + "grad_norm": 1334.7384033203125, + "learning_rate": 2.2797288090386886e-05, + "loss": 94.5149, + "step": 127450 + }, + { + "epoch": 0.5149545283758288, + "grad_norm": 496.0525207519531, + "learning_rate": 2.279452298500328e-05, + "loss": 61.4322, + "step": 127460 + }, + { + "epoch": 0.5149949296411963, + "grad_norm": 1261.5916748046875, + "learning_rate": 2.2791757825139012e-05, + "loss": 76.0967, + "step": 127470 + }, + { + "epoch": 0.515035330906564, + "grad_norm": 1322.6468505859375, + "learning_rate": 2.2788992610847987e-05, + "loss": 113.1507, + "step": 127480 + }, + { + "epoch": 0.5150757321719316, + "grad_norm": 1733.2835693359375, + "learning_rate": 2.278622734218412e-05, + "loss": 108.0501, + "step": 127490 + }, + { + "epoch": 0.5151161334372992, + "grad_norm": 743.2252197265625, + "learning_rate": 2.278346201920131e-05, + "loss": 61.0121, + "step": 127500 + }, + { + "epoch": 0.5151565347026669, + "grad_norm": 777.6689453125, + "learning_rate": 2.2780696641953476e-05, + "loss": 60.5815, + "step": 127510 + }, + { + "epoch": 0.5151969359680345, + "grad_norm": 386.90191650390625, + "learning_rate": 2.2777931210494537e-05, + "loss": 53.062, + "step": 127520 + }, + { + "epoch": 0.5152373372334021, + "grad_norm": 746.7528686523438, + "learning_rate": 2.2775165724878395e-05, + "loss": 58.999, + "step": 127530 + }, + { + "epoch": 0.5152777384987698, + "grad_norm": 1117.9468994140625, + "learning_rate": 2.2772400185158965e-05, + "loss": 105.3529, + "step": 127540 + }, + { + "epoch": 0.5153181397641374, + "grad_norm": 936.1934814453125, + "learning_rate": 2.2769634591390167e-05, + "loss": 77.7949, + "step": 127550 + }, + { + "epoch": 0.5153585410295051, + "grad_norm": 316.4607238769531, + "learning_rate": 2.2766868943625914e-05, + "loss": 68.1341, + "step": 127560 + }, + { + "epoch": 0.5153989422948727, + "grad_norm": 641.5528564453125, + "learning_rate": 2.2764103241920134e-05, + "loss": 63.4791, + "step": 127570 + }, + { + "epoch": 0.5154393435602403, + "grad_norm": 840.3245849609375, + "learning_rate": 2.276133748632673e-05, + "loss": 68.67, + "step": 127580 + }, + { + "epoch": 0.515479744825608, + "grad_norm": 986.2763061523438, + "learning_rate": 2.2758571676899634e-05, + "loss": 126.6323, + "step": 127590 + }, + { + "epoch": 0.5155201460909755, + "grad_norm": 719.0609130859375, + "learning_rate": 2.275580581369276e-05, + "loss": 59.6182, + "step": 127600 + }, + { + "epoch": 0.5155605473563432, + "grad_norm": 808.1866455078125, + "learning_rate": 2.275303989676004e-05, + "loss": 53.4708, + "step": 127610 + }, + { + "epoch": 0.5156009486217108, + "grad_norm": 620.6580810546875, + "learning_rate": 2.275027392615539e-05, + "loss": 79.4879, + "step": 127620 + }, + { + "epoch": 0.5156413498870784, + "grad_norm": 432.0035400390625, + "learning_rate": 2.2747507901932728e-05, + "loss": 63.1072, + "step": 127630 + }, + { + "epoch": 0.5156817511524461, + "grad_norm": 259.4426574707031, + "learning_rate": 2.2744741824145983e-05, + "loss": 62.2793, + "step": 127640 + }, + { + "epoch": 0.5157221524178137, + "grad_norm": 1303.3416748046875, + "learning_rate": 2.2741975692849093e-05, + "loss": 73.9436, + "step": 127650 + }, + { + "epoch": 0.5157625536831814, + "grad_norm": 855.0846557617188, + "learning_rate": 2.2739209508095965e-05, + "loss": 62.7912, + "step": 127660 + }, + { + "epoch": 0.515802954948549, + "grad_norm": 450.9022216796875, + "learning_rate": 2.273644326994054e-05, + "loss": 72.851, + "step": 127670 + }, + { + "epoch": 0.5158433562139166, + "grad_norm": 602.1485595703125, + "learning_rate": 2.273367697843674e-05, + "loss": 73.9034, + "step": 127680 + }, + { + "epoch": 0.5158837574792843, + "grad_norm": 704.0927124023438, + "learning_rate": 2.273091063363851e-05, + "loss": 54.9359, + "step": 127690 + }, + { + "epoch": 0.5159241587446519, + "grad_norm": 334.3456726074219, + "learning_rate": 2.272814423559977e-05, + "loss": 80.3695, + "step": 127700 + }, + { + "epoch": 0.5159645600100196, + "grad_norm": 875.7000732421875, + "learning_rate": 2.2725377784374452e-05, + "loss": 86.2559, + "step": 127710 + }, + { + "epoch": 0.5160049612753872, + "grad_norm": 346.68682861328125, + "learning_rate": 2.272261128001649e-05, + "loss": 68.9791, + "step": 127720 + }, + { + "epoch": 0.5160453625407547, + "grad_norm": 774.8905029296875, + "learning_rate": 2.2719844722579815e-05, + "loss": 46.5582, + "step": 127730 + }, + { + "epoch": 0.5160857638061224, + "grad_norm": 1408.300537109375, + "learning_rate": 2.2717078112118373e-05, + "loss": 78.7004, + "step": 127740 + }, + { + "epoch": 0.51612616507149, + "grad_norm": 625.7191162109375, + "learning_rate": 2.2714311448686092e-05, + "loss": 65.4093, + "step": 127750 + }, + { + "epoch": 0.5161665663368576, + "grad_norm": 539.7301025390625, + "learning_rate": 2.271154473233691e-05, + "loss": 99.0346, + "step": 127760 + }, + { + "epoch": 0.5162069676022253, + "grad_norm": 779.3140258789062, + "learning_rate": 2.2708777963124768e-05, + "loss": 69.3211, + "step": 127770 + }, + { + "epoch": 0.5162473688675929, + "grad_norm": 2674.43505859375, + "learning_rate": 2.2706011141103606e-05, + "loss": 119.8558, + "step": 127780 + }, + { + "epoch": 0.5162877701329606, + "grad_norm": 1058.7064208984375, + "learning_rate": 2.270324426632737e-05, + "loss": 79.601, + "step": 127790 + }, + { + "epoch": 0.5163281713983282, + "grad_norm": 787.1674194335938, + "learning_rate": 2.2700477338849985e-05, + "loss": 89.7437, + "step": 127800 + }, + { + "epoch": 0.5163685726636958, + "grad_norm": 1254.3388671875, + "learning_rate": 2.269771035872541e-05, + "loss": 104.1269, + "step": 127810 + }, + { + "epoch": 0.5164089739290635, + "grad_norm": 896.8914184570312, + "learning_rate": 2.2694943326007582e-05, + "loss": 96.4289, + "step": 127820 + }, + { + "epoch": 0.5164493751944311, + "grad_norm": 776.43359375, + "learning_rate": 2.2692176240750447e-05, + "loss": 61.7469, + "step": 127830 + }, + { + "epoch": 0.5164897764597988, + "grad_norm": 1093.8990478515625, + "learning_rate": 2.2689409103007946e-05, + "loss": 66.2826, + "step": 127840 + }, + { + "epoch": 0.5165301777251664, + "grad_norm": 975.5880126953125, + "learning_rate": 2.268664191283403e-05, + "loss": 70.9319, + "step": 127850 + }, + { + "epoch": 0.5165705789905339, + "grad_norm": 368.46185302734375, + "learning_rate": 2.2683874670282654e-05, + "loss": 102.824, + "step": 127860 + }, + { + "epoch": 0.5166109802559016, + "grad_norm": 808.557373046875, + "learning_rate": 2.2681107375407755e-05, + "loss": 103.2197, + "step": 127870 + }, + { + "epoch": 0.5166513815212692, + "grad_norm": 719.295166015625, + "learning_rate": 2.267834002826329e-05, + "loss": 100.2938, + "step": 127880 + }, + { + "epoch": 0.5166917827866369, + "grad_norm": 1576.9075927734375, + "learning_rate": 2.26755726289032e-05, + "loss": 85.8429, + "step": 127890 + }, + { + "epoch": 0.5167321840520045, + "grad_norm": 560.0940551757812, + "learning_rate": 2.2672805177381453e-05, + "loss": 51.7901, + "step": 127900 + }, + { + "epoch": 0.5167725853173721, + "grad_norm": 959.1209106445312, + "learning_rate": 2.2670037673751984e-05, + "loss": 105.1194, + "step": 127910 + }, + { + "epoch": 0.5168129865827398, + "grad_norm": 918.0587158203125, + "learning_rate": 2.266727011806876e-05, + "loss": 81.2816, + "step": 127920 + }, + { + "epoch": 0.5168533878481074, + "grad_norm": 498.4952697753906, + "learning_rate": 2.266450251038573e-05, + "loss": 78.1654, + "step": 127930 + }, + { + "epoch": 0.516893789113475, + "grad_norm": 1018.7554931640625, + "learning_rate": 2.2661734850756856e-05, + "loss": 77.2383, + "step": 127940 + }, + { + "epoch": 0.5169341903788427, + "grad_norm": 644.2182006835938, + "learning_rate": 2.265896713923609e-05, + "loss": 61.4236, + "step": 127950 + }, + { + "epoch": 0.5169745916442103, + "grad_norm": 926.2988891601562, + "learning_rate": 2.265619937587739e-05, + "loss": 66.1119, + "step": 127960 + }, + { + "epoch": 0.517014992909578, + "grad_norm": 730.2682495117188, + "learning_rate": 2.2653431560734717e-05, + "loss": 82.3278, + "step": 127970 + }, + { + "epoch": 0.5170553941749455, + "grad_norm": 1155.07470703125, + "learning_rate": 2.2650663693862033e-05, + "loss": 146.973, + "step": 127980 + }, + { + "epoch": 0.5170957954403131, + "grad_norm": 354.4833679199219, + "learning_rate": 2.2647895775313285e-05, + "loss": 94.4936, + "step": 127990 + }, + { + "epoch": 0.5171361967056808, + "grad_norm": 505.5339050292969, + "learning_rate": 2.2645127805142453e-05, + "loss": 102.7702, + "step": 128000 + }, + { + "epoch": 0.5171765979710484, + "grad_norm": 660.2139282226562, + "learning_rate": 2.2642359783403486e-05, + "loss": 53.3763, + "step": 128010 + }, + { + "epoch": 0.5172169992364161, + "grad_norm": 745.9378662109375, + "learning_rate": 2.2639591710150364e-05, + "loss": 63.9682, + "step": 128020 + }, + { + "epoch": 0.5172574005017837, + "grad_norm": 613.600341796875, + "learning_rate": 2.2636823585437036e-05, + "loss": 81.6483, + "step": 128030 + }, + { + "epoch": 0.5172978017671513, + "grad_norm": 5011.29296875, + "learning_rate": 2.263405540931748e-05, + "loss": 85.7408, + "step": 128040 + }, + { + "epoch": 0.517338203032519, + "grad_norm": 756.796142578125, + "learning_rate": 2.2631287181845654e-05, + "loss": 70.4548, + "step": 128050 + }, + { + "epoch": 0.5173786042978866, + "grad_norm": 494.209228515625, + "learning_rate": 2.262851890307553e-05, + "loss": 79.1947, + "step": 128060 + }, + { + "epoch": 0.5174190055632543, + "grad_norm": 1440.4775390625, + "learning_rate": 2.2625750573061075e-05, + "loss": 68.5257, + "step": 128070 + }, + { + "epoch": 0.5174594068286219, + "grad_norm": 477.4607238769531, + "learning_rate": 2.2622982191856267e-05, + "loss": 63.6709, + "step": 128080 + }, + { + "epoch": 0.5174998080939895, + "grad_norm": 1164.52978515625, + "learning_rate": 2.2620213759515063e-05, + "loss": 130.53, + "step": 128090 + }, + { + "epoch": 0.5175402093593572, + "grad_norm": 1201.39013671875, + "learning_rate": 2.2617445276091447e-05, + "loss": 64.4667, + "step": 128100 + }, + { + "epoch": 0.5175806106247247, + "grad_norm": 818.1260375976562, + "learning_rate": 2.261467674163939e-05, + "loss": 81.6477, + "step": 128110 + }, + { + "epoch": 0.5176210118900924, + "grad_norm": 812.8392944335938, + "learning_rate": 2.2611908156212853e-05, + "loss": 81.9108, + "step": 128120 + }, + { + "epoch": 0.51766141315546, + "grad_norm": 613.9247436523438, + "learning_rate": 2.2609139519865835e-05, + "loss": 62.6281, + "step": 128130 + }, + { + "epoch": 0.5177018144208276, + "grad_norm": 619.42626953125, + "learning_rate": 2.2606370832652295e-05, + "loss": 55.0663, + "step": 128140 + }, + { + "epoch": 0.5177422156861953, + "grad_norm": 728.260009765625, + "learning_rate": 2.260360209462621e-05, + "loss": 58.693, + "step": 128150 + }, + { + "epoch": 0.5177826169515629, + "grad_norm": 1686.051025390625, + "learning_rate": 2.2600833305841567e-05, + "loss": 107.1985, + "step": 128160 + }, + { + "epoch": 0.5178230182169306, + "grad_norm": 493.4838562011719, + "learning_rate": 2.259806446635233e-05, + "loss": 81.2814, + "step": 128170 + }, + { + "epoch": 0.5178634194822982, + "grad_norm": 844.499755859375, + "learning_rate": 2.2595295576212502e-05, + "loss": 84.5315, + "step": 128180 + }, + { + "epoch": 0.5179038207476658, + "grad_norm": 1077.319091796875, + "learning_rate": 2.2592526635476042e-05, + "loss": 66.0768, + "step": 128190 + }, + { + "epoch": 0.5179442220130335, + "grad_norm": 773.546630859375, + "learning_rate": 2.258975764419694e-05, + "loss": 65.626, + "step": 128200 + }, + { + "epoch": 0.5179846232784011, + "grad_norm": 5192.1201171875, + "learning_rate": 2.2586988602429188e-05, + "loss": 131.22, + "step": 128210 + }, + { + "epoch": 0.5180250245437688, + "grad_norm": 634.14111328125, + "learning_rate": 2.2584219510226754e-05, + "loss": 64.264, + "step": 128220 + }, + { + "epoch": 0.5180654258091364, + "grad_norm": 1268.67578125, + "learning_rate": 2.2581450367643633e-05, + "loss": 80.8711, + "step": 128230 + }, + { + "epoch": 0.5181058270745039, + "grad_norm": 709.5951538085938, + "learning_rate": 2.2578681174733802e-05, + "loss": 66.9248, + "step": 128240 + }, + { + "epoch": 0.5181462283398716, + "grad_norm": 496.3175048828125, + "learning_rate": 2.257591193155126e-05, + "loss": 71.1901, + "step": 128250 + }, + { + "epoch": 0.5181866296052392, + "grad_norm": 785.0675048828125, + "learning_rate": 2.2573142638149985e-05, + "loss": 103.4522, + "step": 128260 + }, + { + "epoch": 0.5182270308706068, + "grad_norm": 454.5794677734375, + "learning_rate": 2.257037329458397e-05, + "loss": 127.8294, + "step": 128270 + }, + { + "epoch": 0.5182674321359745, + "grad_norm": 506.7137756347656, + "learning_rate": 2.2567603900907206e-05, + "loss": 56.733, + "step": 128280 + }, + { + "epoch": 0.5183078334013421, + "grad_norm": 493.1957092285156, + "learning_rate": 2.2564834457173684e-05, + "loss": 55.2938, + "step": 128290 + }, + { + "epoch": 0.5183482346667098, + "grad_norm": 802.1942749023438, + "learning_rate": 2.2562064963437392e-05, + "loss": 87.8438, + "step": 128300 + }, + { + "epoch": 0.5183886359320774, + "grad_norm": 933.9495849609375, + "learning_rate": 2.255929541975233e-05, + "loss": 123.9426, + "step": 128310 + }, + { + "epoch": 0.518429037197445, + "grad_norm": 721.01220703125, + "learning_rate": 2.2556525826172476e-05, + "loss": 51.2553, + "step": 128320 + }, + { + "epoch": 0.5184694384628127, + "grad_norm": 769.9254760742188, + "learning_rate": 2.255375618275184e-05, + "loss": 57.4941, + "step": 128330 + }, + { + "epoch": 0.5185098397281803, + "grad_norm": 599.5415649414062, + "learning_rate": 2.255098648954441e-05, + "loss": 74.4087, + "step": 128340 + }, + { + "epoch": 0.518550240993548, + "grad_norm": 708.5186767578125, + "learning_rate": 2.254821674660419e-05, + "loss": 62.7068, + "step": 128350 + }, + { + "epoch": 0.5185906422589156, + "grad_norm": 286.6327819824219, + "learning_rate": 2.2545446953985163e-05, + "loss": 65.6787, + "step": 128360 + }, + { + "epoch": 0.5186310435242831, + "grad_norm": 787.7909545898438, + "learning_rate": 2.2542677111741346e-05, + "loss": 70.381, + "step": 128370 + }, + { + "epoch": 0.5186714447896508, + "grad_norm": 1032.311279296875, + "learning_rate": 2.2539907219926728e-05, + "loss": 84.1738, + "step": 128380 + }, + { + "epoch": 0.5187118460550184, + "grad_norm": 1512.6717529296875, + "learning_rate": 2.253713727859531e-05, + "loss": 158.7189, + "step": 128390 + }, + { + "epoch": 0.518752247320386, + "grad_norm": 586.01123046875, + "learning_rate": 2.2534367287801096e-05, + "loss": 96.2008, + "step": 128400 + }, + { + "epoch": 0.5187926485857537, + "grad_norm": 846.8231201171875, + "learning_rate": 2.2531597247598086e-05, + "loss": 130.8776, + "step": 128410 + }, + { + "epoch": 0.5188330498511213, + "grad_norm": 1047.3594970703125, + "learning_rate": 2.2528827158040282e-05, + "loss": 52.7548, + "step": 128420 + }, + { + "epoch": 0.518873451116489, + "grad_norm": 755.9702758789062, + "learning_rate": 2.2526057019181695e-05, + "loss": 68.2876, + "step": 128430 + }, + { + "epoch": 0.5189138523818566, + "grad_norm": 1486.72705078125, + "learning_rate": 2.2523286831076324e-05, + "loss": 82.5156, + "step": 128440 + }, + { + "epoch": 0.5189542536472243, + "grad_norm": 896.5536499023438, + "learning_rate": 2.2520516593778173e-05, + "loss": 63.2248, + "step": 128450 + }, + { + "epoch": 0.5189946549125919, + "grad_norm": 572.5785522460938, + "learning_rate": 2.251774630734126e-05, + "loss": 49.2412, + "step": 128460 + }, + { + "epoch": 0.5190350561779595, + "grad_norm": 748.6103515625, + "learning_rate": 2.2514975971819585e-05, + "loss": 70.3511, + "step": 128470 + }, + { + "epoch": 0.5190754574433272, + "grad_norm": 1040.3345947265625, + "learning_rate": 2.2512205587267158e-05, + "loss": 104.3674, + "step": 128480 + }, + { + "epoch": 0.5191158587086948, + "grad_norm": 1297.327880859375, + "learning_rate": 2.250943515373799e-05, + "loss": 85.9068, + "step": 128490 + }, + { + "epoch": 0.5191562599740623, + "grad_norm": 887.408935546875, + "learning_rate": 2.2506664671286087e-05, + "loss": 123.0906, + "step": 128500 + }, + { + "epoch": 0.51919666123943, + "grad_norm": 922.3358154296875, + "learning_rate": 2.2503894139965473e-05, + "loss": 126.5924, + "step": 128510 + }, + { + "epoch": 0.5192370625047976, + "grad_norm": 907.76806640625, + "learning_rate": 2.250112355983015e-05, + "loss": 67.1993, + "step": 128520 + }, + { + "epoch": 0.5192774637701653, + "grad_norm": 760.2096557617188, + "learning_rate": 2.2498352930934132e-05, + "loss": 68.7209, + "step": 128530 + }, + { + "epoch": 0.5193178650355329, + "grad_norm": 611.1083984375, + "learning_rate": 2.249558225333144e-05, + "loss": 71.3295, + "step": 128540 + }, + { + "epoch": 0.5193582663009005, + "grad_norm": 591.5237426757812, + "learning_rate": 2.2492811527076093e-05, + "loss": 67.282, + "step": 128550 + }, + { + "epoch": 0.5193986675662682, + "grad_norm": 984.5997314453125, + "learning_rate": 2.2490040752222092e-05, + "loss": 92.6327, + "step": 128560 + }, + { + "epoch": 0.5194390688316358, + "grad_norm": 287.62396240234375, + "learning_rate": 2.248726992882347e-05, + "loss": 117.1928, + "step": 128570 + }, + { + "epoch": 0.5194794700970035, + "grad_norm": 635.9156494140625, + "learning_rate": 2.2484499056934243e-05, + "loss": 83.9338, + "step": 128580 + }, + { + "epoch": 0.5195198713623711, + "grad_norm": 672.2274780273438, + "learning_rate": 2.248172813660843e-05, + "loss": 54.8157, + "step": 128590 + }, + { + "epoch": 0.5195602726277387, + "grad_norm": 872.3346557617188, + "learning_rate": 2.2478957167900038e-05, + "loss": 84.9092, + "step": 128600 + }, + { + "epoch": 0.5196006738931064, + "grad_norm": 632.5486450195312, + "learning_rate": 2.2476186150863105e-05, + "loss": 72.527, + "step": 128610 + }, + { + "epoch": 0.5196410751584739, + "grad_norm": 873.955322265625, + "learning_rate": 2.2473415085551647e-05, + "loss": 123.8708, + "step": 128620 + }, + { + "epoch": 0.5196814764238415, + "grad_norm": 508.9787292480469, + "learning_rate": 2.247064397201969e-05, + "loss": 73.2428, + "step": 128630 + }, + { + "epoch": 0.5197218776892092, + "grad_norm": 605.8071899414062, + "learning_rate": 2.246787281032126e-05, + "loss": 46.5398, + "step": 128640 + }, + { + "epoch": 0.5197622789545768, + "grad_norm": 271.5496520996094, + "learning_rate": 2.2465101600510376e-05, + "loss": 48.413, + "step": 128650 + }, + { + "epoch": 0.5198026802199445, + "grad_norm": 953.044677734375, + "learning_rate": 2.2462330342641065e-05, + "loss": 77.1462, + "step": 128660 + }, + { + "epoch": 0.5198430814853121, + "grad_norm": 594.0732421875, + "learning_rate": 2.245955903676736e-05, + "loss": 97.3221, + "step": 128670 + }, + { + "epoch": 0.5198834827506797, + "grad_norm": 686.4603881835938, + "learning_rate": 2.2456787682943288e-05, + "loss": 53.9137, + "step": 128680 + }, + { + "epoch": 0.5199238840160474, + "grad_norm": 472.2752990722656, + "learning_rate": 2.245401628122287e-05, + "loss": 79.6793, + "step": 128690 + }, + { + "epoch": 0.519964285281415, + "grad_norm": 721.8251953125, + "learning_rate": 2.2451244831660136e-05, + "loss": 90.9087, + "step": 128700 + }, + { + "epoch": 0.5200046865467827, + "grad_norm": 1281.31298828125, + "learning_rate": 2.2448473334309132e-05, + "loss": 118.4524, + "step": 128710 + }, + { + "epoch": 0.5200450878121503, + "grad_norm": 858.774658203125, + "learning_rate": 2.2445701789223877e-05, + "loss": 61.8809, + "step": 128720 + }, + { + "epoch": 0.520085489077518, + "grad_norm": 893.1869506835938, + "learning_rate": 2.244293019645841e-05, + "loss": 66.9678, + "step": 128730 + }, + { + "epoch": 0.5201258903428856, + "grad_norm": 612.7135620117188, + "learning_rate": 2.2440158556066756e-05, + "loss": 78.6322, + "step": 128740 + }, + { + "epoch": 0.5201662916082531, + "grad_norm": 324.9839782714844, + "learning_rate": 2.243738686810295e-05, + "loss": 102.6982, + "step": 128750 + }, + { + "epoch": 0.5202066928736208, + "grad_norm": 853.4454956054688, + "learning_rate": 2.243461513262104e-05, + "loss": 73.0453, + "step": 128760 + }, + { + "epoch": 0.5202470941389884, + "grad_norm": 693.8712158203125, + "learning_rate": 2.2431843349675054e-05, + "loss": 73.9218, + "step": 128770 + }, + { + "epoch": 0.520287495404356, + "grad_norm": 992.0349731445312, + "learning_rate": 2.2429071519319026e-05, + "loss": 75.3314, + "step": 128780 + }, + { + "epoch": 0.5203278966697237, + "grad_norm": 241.26255798339844, + "learning_rate": 2.2426299641607e-05, + "loss": 165.7581, + "step": 128790 + }, + { + "epoch": 0.5203682979350913, + "grad_norm": 1119.7099609375, + "learning_rate": 2.2423527716593014e-05, + "loss": 78.4267, + "step": 128800 + }, + { + "epoch": 0.520408699200459, + "grad_norm": 378.37042236328125, + "learning_rate": 2.2420755744331104e-05, + "loss": 134.5848, + "step": 128810 + }, + { + "epoch": 0.5204491004658266, + "grad_norm": 610.6124267578125, + "learning_rate": 2.2417983724875324e-05, + "loss": 68.131, + "step": 128820 + }, + { + "epoch": 0.5204895017311942, + "grad_norm": 897.3707275390625, + "learning_rate": 2.2415211658279697e-05, + "loss": 68.1885, + "step": 128830 + }, + { + "epoch": 0.5205299029965619, + "grad_norm": 964.7876586914062, + "learning_rate": 2.241243954459828e-05, + "loss": 71.7243, + "step": 128840 + }, + { + "epoch": 0.5205703042619295, + "grad_norm": 772.0202026367188, + "learning_rate": 2.2409667383885106e-05, + "loss": 69.2844, + "step": 128850 + }, + { + "epoch": 0.5206107055272972, + "grad_norm": 445.08056640625, + "learning_rate": 2.240689517619423e-05, + "loss": 78.2768, + "step": 128860 + }, + { + "epoch": 0.5206511067926648, + "grad_norm": 670.1382446289062, + "learning_rate": 2.240412292157969e-05, + "loss": 96.4186, + "step": 128870 + }, + { + "epoch": 0.5206915080580323, + "grad_norm": 1261.2335205078125, + "learning_rate": 2.2401350620095547e-05, + "loss": 94.6326, + "step": 128880 + }, + { + "epoch": 0.5207319093234, + "grad_norm": 716.9602661132812, + "learning_rate": 2.2398578271795826e-05, + "loss": 63.716, + "step": 128890 + }, + { + "epoch": 0.5207723105887676, + "grad_norm": 1226.42724609375, + "learning_rate": 2.2395805876734595e-05, + "loss": 75.5557, + "step": 128900 + }, + { + "epoch": 0.5208127118541352, + "grad_norm": 1476.7620849609375, + "learning_rate": 2.2393033434965887e-05, + "loss": 51.694, + "step": 128910 + }, + { + "epoch": 0.5208531131195029, + "grad_norm": 1644.484130859375, + "learning_rate": 2.239026094654377e-05, + "loss": 89.6839, + "step": 128920 + }, + { + "epoch": 0.5208935143848705, + "grad_norm": 474.09320068359375, + "learning_rate": 2.2387488411522273e-05, + "loss": 62.8448, + "step": 128930 + }, + { + "epoch": 0.5209339156502382, + "grad_norm": 1125.1142578125, + "learning_rate": 2.238471582995547e-05, + "loss": 95.4031, + "step": 128940 + }, + { + "epoch": 0.5209743169156058, + "grad_norm": 624.5740966796875, + "learning_rate": 2.23819432018974e-05, + "loss": 58.0397, + "step": 128950 + }, + { + "epoch": 0.5210147181809734, + "grad_norm": 690.0698852539062, + "learning_rate": 2.237917052740212e-05, + "loss": 142.9776, + "step": 128960 + }, + { + "epoch": 0.5210551194463411, + "grad_norm": 699.4928588867188, + "learning_rate": 2.2376397806523685e-05, + "loss": 73.5402, + "step": 128970 + }, + { + "epoch": 0.5210955207117087, + "grad_norm": 872.5759887695312, + "learning_rate": 2.2373625039316153e-05, + "loss": 66.9392, + "step": 128980 + }, + { + "epoch": 0.5211359219770764, + "grad_norm": 454.537841796875, + "learning_rate": 2.237085222583358e-05, + "loss": 60.3344, + "step": 128990 + }, + { + "epoch": 0.521176323242444, + "grad_norm": 574.2120361328125, + "learning_rate": 2.2368079366130028e-05, + "loss": 70.2295, + "step": 129000 + }, + { + "epoch": 0.5212167245078115, + "grad_norm": 437.72686767578125, + "learning_rate": 2.236530646025954e-05, + "loss": 80.5022, + "step": 129010 + }, + { + "epoch": 0.5212571257731792, + "grad_norm": 942.4771728515625, + "learning_rate": 2.2362533508276182e-05, + "loss": 91.3278, + "step": 129020 + }, + { + "epoch": 0.5212975270385468, + "grad_norm": 579.5571899414062, + "learning_rate": 2.2359760510234026e-05, + "loss": 93.6877, + "step": 129030 + }, + { + "epoch": 0.5213379283039145, + "grad_norm": 728.0989379882812, + "learning_rate": 2.235698746618712e-05, + "loss": 90.9058, + "step": 129040 + }, + { + "epoch": 0.5213783295692821, + "grad_norm": 743.5670166015625, + "learning_rate": 2.235421437618953e-05, + "loss": 67.091, + "step": 129050 + }, + { + "epoch": 0.5214187308346497, + "grad_norm": 446.174072265625, + "learning_rate": 2.235144124029532e-05, + "loss": 75.2549, + "step": 129060 + }, + { + "epoch": 0.5214591321000174, + "grad_norm": 636.5354614257812, + "learning_rate": 2.2348668058558553e-05, + "loss": 44.6371, + "step": 129070 + }, + { + "epoch": 0.521499533365385, + "grad_norm": 735.9398193359375, + "learning_rate": 2.2345894831033287e-05, + "loss": 76.0227, + "step": 129080 + }, + { + "epoch": 0.5215399346307527, + "grad_norm": 734.9454345703125, + "learning_rate": 2.2343121557773605e-05, + "loss": 82.7572, + "step": 129090 + }, + { + "epoch": 0.5215803358961203, + "grad_norm": 716.6236572265625, + "learning_rate": 2.2340348238833555e-05, + "loss": 74.5575, + "step": 129100 + }, + { + "epoch": 0.5216207371614879, + "grad_norm": 884.352783203125, + "learning_rate": 2.2337574874267208e-05, + "loss": 75.2737, + "step": 129110 + }, + { + "epoch": 0.5216611384268556, + "grad_norm": 734.054931640625, + "learning_rate": 2.2334801464128638e-05, + "loss": 130.2802, + "step": 129120 + }, + { + "epoch": 0.5217015396922232, + "grad_norm": 867.4472045898438, + "learning_rate": 2.2332028008471914e-05, + "loss": 72.7202, + "step": 129130 + }, + { + "epoch": 0.5217419409575907, + "grad_norm": 275.8939514160156, + "learning_rate": 2.2329254507351103e-05, + "loss": 104.9467, + "step": 129140 + }, + { + "epoch": 0.5217823422229584, + "grad_norm": 1158.783447265625, + "learning_rate": 2.2326480960820273e-05, + "loss": 63.4585, + "step": 129150 + }, + { + "epoch": 0.521822743488326, + "grad_norm": 522.4434204101562, + "learning_rate": 2.23237073689335e-05, + "loss": 72.5293, + "step": 129160 + }, + { + "epoch": 0.5218631447536937, + "grad_norm": 889.4488525390625, + "learning_rate": 2.232093373174486e-05, + "loss": 101.92, + "step": 129170 + }, + { + "epoch": 0.5219035460190613, + "grad_norm": 1106.51611328125, + "learning_rate": 2.2318160049308413e-05, + "loss": 67.9078, + "step": 129180 + }, + { + "epoch": 0.5219439472844289, + "grad_norm": 379.05609130859375, + "learning_rate": 2.231538632167825e-05, + "loss": 61.8045, + "step": 129190 + }, + { + "epoch": 0.5219843485497966, + "grad_norm": 437.3791198730469, + "learning_rate": 2.2312612548908436e-05, + "loss": 70.573, + "step": 129200 + }, + { + "epoch": 0.5220247498151642, + "grad_norm": 344.8139953613281, + "learning_rate": 2.2309838731053047e-05, + "loss": 42.0089, + "step": 129210 + }, + { + "epoch": 0.5220651510805319, + "grad_norm": 621.1980590820312, + "learning_rate": 2.2307064868166165e-05, + "loss": 84.6923, + "step": 129220 + }, + { + "epoch": 0.5221055523458995, + "grad_norm": 614.956787109375, + "learning_rate": 2.2304290960301868e-05, + "loss": 68.0823, + "step": 129230 + }, + { + "epoch": 0.5221459536112671, + "grad_norm": 707.105712890625, + "learning_rate": 2.2301517007514232e-05, + "loss": 94.1344, + "step": 129240 + }, + { + "epoch": 0.5221863548766348, + "grad_norm": 244.5414276123047, + "learning_rate": 2.2298743009857334e-05, + "loss": 59.8514, + "step": 129250 + }, + { + "epoch": 0.5222267561420023, + "grad_norm": 428.171142578125, + "learning_rate": 2.229596896738526e-05, + "loss": 49.3736, + "step": 129260 + }, + { + "epoch": 0.52226715740737, + "grad_norm": 826.0242309570312, + "learning_rate": 2.2293194880152087e-05, + "loss": 55.8026, + "step": 129270 + }, + { + "epoch": 0.5223075586727376, + "grad_norm": 1003.6716918945312, + "learning_rate": 2.22904207482119e-05, + "loss": 54.8458, + "step": 129280 + }, + { + "epoch": 0.5223479599381052, + "grad_norm": 981.568359375, + "learning_rate": 2.2287646571618784e-05, + "loss": 65.3426, + "step": 129290 + }, + { + "epoch": 0.5223883612034729, + "grad_norm": 271.6084899902344, + "learning_rate": 2.2284872350426818e-05, + "loss": 51.8198, + "step": 129300 + }, + { + "epoch": 0.5224287624688405, + "grad_norm": 544.5498657226562, + "learning_rate": 2.228209808469009e-05, + "loss": 60.454, + "step": 129310 + }, + { + "epoch": 0.5224691637342082, + "grad_norm": 503.55364990234375, + "learning_rate": 2.227932377446268e-05, + "loss": 52.9032, + "step": 129320 + }, + { + "epoch": 0.5225095649995758, + "grad_norm": 493.88714599609375, + "learning_rate": 2.2276549419798687e-05, + "loss": 98.1219, + "step": 129330 + }, + { + "epoch": 0.5225499662649434, + "grad_norm": 512.6405639648438, + "learning_rate": 2.227377502075219e-05, + "loss": 53.2583, + "step": 129340 + }, + { + "epoch": 0.5225903675303111, + "grad_norm": 684.5833740234375, + "learning_rate": 2.227100057737727e-05, + "loss": 63.6025, + "step": 129350 + }, + { + "epoch": 0.5226307687956787, + "grad_norm": 314.4063415527344, + "learning_rate": 2.2268226089728032e-05, + "loss": 57.9359, + "step": 129360 + }, + { + "epoch": 0.5226711700610464, + "grad_norm": 376.1653747558594, + "learning_rate": 2.226545155785856e-05, + "loss": 111.1591, + "step": 129370 + }, + { + "epoch": 0.522711571326414, + "grad_norm": 774.7371215820312, + "learning_rate": 2.226267698182294e-05, + "loss": 70.9777, + "step": 129380 + }, + { + "epoch": 0.5227519725917815, + "grad_norm": 911.4149780273438, + "learning_rate": 2.2259902361675265e-05, + "loss": 97.262, + "step": 129390 + }, + { + "epoch": 0.5227923738571492, + "grad_norm": 773.6610717773438, + "learning_rate": 2.2257127697469634e-05, + "loss": 110.1158, + "step": 129400 + }, + { + "epoch": 0.5228327751225168, + "grad_norm": 970.8236083984375, + "learning_rate": 2.225435298926014e-05, + "loss": 74.0721, + "step": 129410 + }, + { + "epoch": 0.5228731763878844, + "grad_norm": 600.5000610351562, + "learning_rate": 2.2251578237100865e-05, + "loss": 72.1788, + "step": 129420 + }, + { + "epoch": 0.5229135776532521, + "grad_norm": 804.3012084960938, + "learning_rate": 2.2248803441045923e-05, + "loss": 76.1332, + "step": 129430 + }, + { + "epoch": 0.5229539789186197, + "grad_norm": 953.8740844726562, + "learning_rate": 2.224602860114939e-05, + "loss": 56.3753, + "step": 129440 + }, + { + "epoch": 0.5229943801839874, + "grad_norm": 998.566650390625, + "learning_rate": 2.224325371746538e-05, + "loss": 93.7486, + "step": 129450 + }, + { + "epoch": 0.523034781449355, + "grad_norm": 1067.3629150390625, + "learning_rate": 2.224047879004798e-05, + "loss": 61.606, + "step": 129460 + }, + { + "epoch": 0.5230751827147226, + "grad_norm": 243.81695556640625, + "learning_rate": 2.2237703818951295e-05, + "loss": 70.8039, + "step": 129470 + }, + { + "epoch": 0.5231155839800903, + "grad_norm": 696.9252319335938, + "learning_rate": 2.223492880422942e-05, + "loss": 63.0593, + "step": 129480 + }, + { + "epoch": 0.5231559852454579, + "grad_norm": 355.03466796875, + "learning_rate": 2.2232153745936463e-05, + "loss": 67.2347, + "step": 129490 + }, + { + "epoch": 0.5231963865108256, + "grad_norm": 644.423828125, + "learning_rate": 2.222937864412651e-05, + "loss": 48.168, + "step": 129500 + }, + { + "epoch": 0.5232367877761932, + "grad_norm": 458.513427734375, + "learning_rate": 2.2226603498853684e-05, + "loss": 114.4949, + "step": 129510 + }, + { + "epoch": 0.5232771890415607, + "grad_norm": 828.351806640625, + "learning_rate": 2.2223828310172063e-05, + "loss": 63.5915, + "step": 129520 + }, + { + "epoch": 0.5233175903069284, + "grad_norm": 400.17950439453125, + "learning_rate": 2.222105307813578e-05, + "loss": 72.5562, + "step": 129530 + }, + { + "epoch": 0.523357991572296, + "grad_norm": 727.1536254882812, + "learning_rate": 2.221827780279891e-05, + "loss": 52.5311, + "step": 129540 + }, + { + "epoch": 0.5233983928376637, + "grad_norm": 723.8331909179688, + "learning_rate": 2.2215502484215573e-05, + "loss": 93.196, + "step": 129550 + }, + { + "epoch": 0.5234387941030313, + "grad_norm": 1002.9207153320312, + "learning_rate": 2.221272712243988e-05, + "loss": 57.3572, + "step": 129560 + }, + { + "epoch": 0.5234791953683989, + "grad_norm": 510.4528503417969, + "learning_rate": 2.2209951717525935e-05, + "loss": 59.6779, + "step": 129570 + }, + { + "epoch": 0.5235195966337666, + "grad_norm": 187.4142303466797, + "learning_rate": 2.2207176269527836e-05, + "loss": 71.1823, + "step": 129580 + }, + { + "epoch": 0.5235599978991342, + "grad_norm": 770.1105346679688, + "learning_rate": 2.22044007784997e-05, + "loss": 93.6378, + "step": 129590 + }, + { + "epoch": 0.5236003991645019, + "grad_norm": 569.3509521484375, + "learning_rate": 2.2201625244495646e-05, + "loss": 80.991, + "step": 129600 + }, + { + "epoch": 0.5236408004298695, + "grad_norm": 582.7366943359375, + "learning_rate": 2.219884966756977e-05, + "loss": 52.9316, + "step": 129610 + }, + { + "epoch": 0.5236812016952371, + "grad_norm": 541.1594848632812, + "learning_rate": 2.2196074047776183e-05, + "loss": 99.7191, + "step": 129620 + }, + { + "epoch": 0.5237216029606048, + "grad_norm": 735.7413330078125, + "learning_rate": 2.2193298385169003e-05, + "loss": 157.8393, + "step": 129630 + }, + { + "epoch": 0.5237620042259724, + "grad_norm": 644.6112670898438, + "learning_rate": 2.219052267980234e-05, + "loss": 76.4525, + "step": 129640 + }, + { + "epoch": 0.5238024054913399, + "grad_norm": 412.6915588378906, + "learning_rate": 2.2187746931730313e-05, + "loss": 50.2999, + "step": 129650 + }, + { + "epoch": 0.5238428067567076, + "grad_norm": 733.0255737304688, + "learning_rate": 2.2184971141007034e-05, + "loss": 99.5797, + "step": 129660 + }, + { + "epoch": 0.5238832080220752, + "grad_norm": 684.810302734375, + "learning_rate": 2.218219530768661e-05, + "loss": 64.0528, + "step": 129670 + }, + { + "epoch": 0.5239236092874429, + "grad_norm": 500.62322998046875, + "learning_rate": 2.217941943182318e-05, + "loss": 65.1142, + "step": 129680 + }, + { + "epoch": 0.5239640105528105, + "grad_norm": 854.3031005859375, + "learning_rate": 2.2176643513470835e-05, + "loss": 73.6697, + "step": 129690 + }, + { + "epoch": 0.5240044118181781, + "grad_norm": 1076.4306640625, + "learning_rate": 2.2173867552683707e-05, + "loss": 105.0617, + "step": 129700 + }, + { + "epoch": 0.5240448130835458, + "grad_norm": 901.8267211914062, + "learning_rate": 2.217109154951591e-05, + "loss": 43.9814, + "step": 129710 + }, + { + "epoch": 0.5240852143489134, + "grad_norm": 1354.4957275390625, + "learning_rate": 2.2168315504021565e-05, + "loss": 113.1171, + "step": 129720 + }, + { + "epoch": 0.5241256156142811, + "grad_norm": 375.463134765625, + "learning_rate": 2.21655394162548e-05, + "loss": 74.7126, + "step": 129730 + }, + { + "epoch": 0.5241660168796487, + "grad_norm": 352.748291015625, + "learning_rate": 2.2162763286269722e-05, + "loss": 73.3717, + "step": 129740 + }, + { + "epoch": 0.5242064181450163, + "grad_norm": 698.4041137695312, + "learning_rate": 2.2159987114120457e-05, + "loss": 80.4633, + "step": 129750 + }, + { + "epoch": 0.524246819410384, + "grad_norm": 1083.1429443359375, + "learning_rate": 2.2157210899861138e-05, + "loss": 104.8227, + "step": 129760 + }, + { + "epoch": 0.5242872206757516, + "grad_norm": 313.2508544921875, + "learning_rate": 2.2154434643545884e-05, + "loss": 63.891, + "step": 129770 + }, + { + "epoch": 0.5243276219411191, + "grad_norm": 237.42657470703125, + "learning_rate": 2.2151658345228815e-05, + "loss": 76.093, + "step": 129780 + }, + { + "epoch": 0.5243680232064868, + "grad_norm": 236.59085083007812, + "learning_rate": 2.2148882004964055e-05, + "loss": 69.6301, + "step": 129790 + }, + { + "epoch": 0.5244084244718544, + "grad_norm": 930.1708374023438, + "learning_rate": 2.2146105622805733e-05, + "loss": 50.7523, + "step": 129800 + }, + { + "epoch": 0.5244488257372221, + "grad_norm": 425.23431396484375, + "learning_rate": 2.2143329198807982e-05, + "loss": 35.9424, + "step": 129810 + }, + { + "epoch": 0.5244892270025897, + "grad_norm": 472.409423828125, + "learning_rate": 2.2140552733024926e-05, + "loss": 72.4656, + "step": 129820 + }, + { + "epoch": 0.5245296282679573, + "grad_norm": 361.20123291015625, + "learning_rate": 2.2137776225510685e-05, + "loss": 108.2169, + "step": 129830 + }, + { + "epoch": 0.524570029533325, + "grad_norm": 941.8109741210938, + "learning_rate": 2.21349996763194e-05, + "loss": 89.118, + "step": 129840 + }, + { + "epoch": 0.5246104307986926, + "grad_norm": 505.551513671875, + "learning_rate": 2.21322230855052e-05, + "loss": 105.4398, + "step": 129850 + }, + { + "epoch": 0.5246508320640603, + "grad_norm": 630.2537841796875, + "learning_rate": 2.2129446453122214e-05, + "loss": 65.7776, + "step": 129860 + }, + { + "epoch": 0.5246912333294279, + "grad_norm": 799.0986328125, + "learning_rate": 2.212666977922457e-05, + "loss": 99.204, + "step": 129870 + }, + { + "epoch": 0.5247316345947955, + "grad_norm": 535.8695678710938, + "learning_rate": 2.21238930638664e-05, + "loss": 50.7148, + "step": 129880 + }, + { + "epoch": 0.5247720358601632, + "grad_norm": 707.9243774414062, + "learning_rate": 2.2121116307101845e-05, + "loss": 68.4754, + "step": 129890 + }, + { + "epoch": 0.5248124371255307, + "grad_norm": 924.9171142578125, + "learning_rate": 2.211833950898504e-05, + "loss": 89.6845, + "step": 129900 + }, + { + "epoch": 0.5248528383908984, + "grad_norm": 396.11541748046875, + "learning_rate": 2.2115562669570107e-05, + "loss": 142.1989, + "step": 129910 + }, + { + "epoch": 0.524893239656266, + "grad_norm": 603.73681640625, + "learning_rate": 2.211278578891119e-05, + "loss": 60.7227, + "step": 129920 + }, + { + "epoch": 0.5249336409216336, + "grad_norm": 1084.658203125, + "learning_rate": 2.2110008867062436e-05, + "loss": 93.8327, + "step": 129930 + }, + { + "epoch": 0.5249740421870013, + "grad_norm": 521.28564453125, + "learning_rate": 2.210723190407797e-05, + "loss": 69.4505, + "step": 129940 + }, + { + "epoch": 0.5250144434523689, + "grad_norm": 598.28369140625, + "learning_rate": 2.2104454900011937e-05, + "loss": 62.5692, + "step": 129950 + }, + { + "epoch": 0.5250548447177366, + "grad_norm": 429.80621337890625, + "learning_rate": 2.210167785491846e-05, + "loss": 50.178, + "step": 129960 + }, + { + "epoch": 0.5250952459831042, + "grad_norm": 979.90625, + "learning_rate": 2.2098900768851697e-05, + "loss": 76.9549, + "step": 129970 + }, + { + "epoch": 0.5251356472484718, + "grad_norm": 450.59063720703125, + "learning_rate": 2.209612364186579e-05, + "loss": 66.1917, + "step": 129980 + }, + { + "epoch": 0.5251760485138395, + "grad_norm": 483.9956359863281, + "learning_rate": 2.209334647401486e-05, + "loss": 71.4369, + "step": 129990 + }, + { + "epoch": 0.5252164497792071, + "grad_norm": 509.35540771484375, + "learning_rate": 2.209056926535307e-05, + "loss": 84.6931, + "step": 130000 + }, + { + "epoch": 0.5252568510445748, + "grad_norm": 511.721435546875, + "learning_rate": 2.2087792015934557e-05, + "loss": 72.1604, + "step": 130010 + }, + { + "epoch": 0.5252972523099424, + "grad_norm": 651.770263671875, + "learning_rate": 2.2085014725813464e-05, + "loss": 88.4858, + "step": 130020 + }, + { + "epoch": 0.5253376535753099, + "grad_norm": 676.105712890625, + "learning_rate": 2.2082237395043937e-05, + "loss": 67.5345, + "step": 130030 + }, + { + "epoch": 0.5253780548406776, + "grad_norm": 851.2920532226562, + "learning_rate": 2.2079460023680115e-05, + "loss": 33.0275, + "step": 130040 + }, + { + "epoch": 0.5254184561060452, + "grad_norm": 910.7112426757812, + "learning_rate": 2.207668261177615e-05, + "loss": 78.1401, + "step": 130050 + }, + { + "epoch": 0.5254588573714128, + "grad_norm": 718.9505615234375, + "learning_rate": 2.2073905159386192e-05, + "loss": 81.2251, + "step": 130060 + }, + { + "epoch": 0.5254992586367805, + "grad_norm": 643.8690185546875, + "learning_rate": 2.2071127666564382e-05, + "loss": 71.4558, + "step": 130070 + }, + { + "epoch": 0.5255396599021481, + "grad_norm": 663.6620483398438, + "learning_rate": 2.206835013336487e-05, + "loss": 116.1444, + "step": 130080 + }, + { + "epoch": 0.5255800611675158, + "grad_norm": 576.166015625, + "learning_rate": 2.206557255984181e-05, + "loss": 68.6327, + "step": 130090 + }, + { + "epoch": 0.5256204624328834, + "grad_norm": 1523.9271240234375, + "learning_rate": 2.206279494604935e-05, + "loss": 107.1576, + "step": 130100 + }, + { + "epoch": 0.525660863698251, + "grad_norm": 944.7152099609375, + "learning_rate": 2.206001729204164e-05, + "loss": 83.4681, + "step": 130110 + }, + { + "epoch": 0.5257012649636187, + "grad_norm": 667.1321411132812, + "learning_rate": 2.2057239597872835e-05, + "loss": 90.3643, + "step": 130120 + }, + { + "epoch": 0.5257416662289863, + "grad_norm": 1068.7344970703125, + "learning_rate": 2.205446186359708e-05, + "loss": 85.3819, + "step": 130130 + }, + { + "epoch": 0.525782067494354, + "grad_norm": 1033.1234130859375, + "learning_rate": 2.205168408926854e-05, + "loss": 66.6255, + "step": 130140 + }, + { + "epoch": 0.5258224687597216, + "grad_norm": 598.55322265625, + "learning_rate": 2.2048906274941353e-05, + "loss": 70.7171, + "step": 130150 + }, + { + "epoch": 0.5258628700250891, + "grad_norm": 594.4247436523438, + "learning_rate": 2.204612842066969e-05, + "loss": 72.4857, + "step": 130160 + }, + { + "epoch": 0.5259032712904568, + "grad_norm": 362.4100341796875, + "learning_rate": 2.204335052650769e-05, + "loss": 58.3671, + "step": 130170 + }, + { + "epoch": 0.5259436725558244, + "grad_norm": 474.4010925292969, + "learning_rate": 2.2040572592509535e-05, + "loss": 63.3108, + "step": 130180 + }, + { + "epoch": 0.525984073821192, + "grad_norm": 929.5862426757812, + "learning_rate": 2.2037794618729356e-05, + "loss": 85.4152, + "step": 130190 + }, + { + "epoch": 0.5260244750865597, + "grad_norm": 591.5691528320312, + "learning_rate": 2.203501660522133e-05, + "loss": 66.0489, + "step": 130200 + }, + { + "epoch": 0.5260648763519273, + "grad_norm": 658.8538208007812, + "learning_rate": 2.20322385520396e-05, + "loss": 80.3902, + "step": 130210 + }, + { + "epoch": 0.526105277617295, + "grad_norm": 685.9319458007812, + "learning_rate": 2.202946045923834e-05, + "loss": 75.9582, + "step": 130220 + }, + { + "epoch": 0.5261456788826626, + "grad_norm": 737.0738525390625, + "learning_rate": 2.20266823268717e-05, + "loss": 56.7044, + "step": 130230 + }, + { + "epoch": 0.5261860801480303, + "grad_norm": 437.1549987792969, + "learning_rate": 2.2023904154993838e-05, + "loss": 41.649, + "step": 130240 + }, + { + "epoch": 0.5262264814133979, + "grad_norm": 467.7385559082031, + "learning_rate": 2.202112594365893e-05, + "loss": 75.6985, + "step": 130250 + }, + { + "epoch": 0.5262668826787655, + "grad_norm": 729.7138671875, + "learning_rate": 2.201834769292113e-05, + "loss": 75.9901, + "step": 130260 + }, + { + "epoch": 0.5263072839441332, + "grad_norm": 369.44903564453125, + "learning_rate": 2.2015569402834604e-05, + "loss": 83.2877, + "step": 130270 + }, + { + "epoch": 0.5263476852095008, + "grad_norm": 476.76904296875, + "learning_rate": 2.201279107345351e-05, + "loss": 88.1615, + "step": 130280 + }, + { + "epoch": 0.5263880864748683, + "grad_norm": 364.17120361328125, + "learning_rate": 2.2010012704832023e-05, + "loss": 67.8906, + "step": 130290 + }, + { + "epoch": 0.526428487740236, + "grad_norm": 585.188232421875, + "learning_rate": 2.2007234297024298e-05, + "loss": 66.1378, + "step": 130300 + }, + { + "epoch": 0.5264688890056036, + "grad_norm": 1681.9949951171875, + "learning_rate": 2.2004455850084515e-05, + "loss": 71.7252, + "step": 130310 + }, + { + "epoch": 0.5265092902709713, + "grad_norm": 503.1661682128906, + "learning_rate": 2.200167736406683e-05, + "loss": 91.7242, + "step": 130320 + }, + { + "epoch": 0.5265496915363389, + "grad_norm": 583.17041015625, + "learning_rate": 2.1998898839025408e-05, + "loss": 66.8144, + "step": 130330 + }, + { + "epoch": 0.5265900928017065, + "grad_norm": 625.3662719726562, + "learning_rate": 2.1996120275014426e-05, + "loss": 84.3749, + "step": 130340 + }, + { + "epoch": 0.5266304940670742, + "grad_norm": 643.1946411132812, + "learning_rate": 2.199334167208806e-05, + "loss": 62.8451, + "step": 130350 + }, + { + "epoch": 0.5266708953324418, + "grad_norm": 1496.9039306640625, + "learning_rate": 2.1990563030300464e-05, + "loss": 78.2166, + "step": 130360 + }, + { + "epoch": 0.5267112965978095, + "grad_norm": 618.0537109375, + "learning_rate": 2.1987784349705822e-05, + "loss": 59.7724, + "step": 130370 + }, + { + "epoch": 0.5267516978631771, + "grad_norm": 501.614013671875, + "learning_rate": 2.1985005630358298e-05, + "loss": 59.524, + "step": 130380 + }, + { + "epoch": 0.5267920991285447, + "grad_norm": 823.8161010742188, + "learning_rate": 2.198222687231207e-05, + "loss": 85.6695, + "step": 130390 + }, + { + "epoch": 0.5268325003939124, + "grad_norm": 602.433837890625, + "learning_rate": 2.1979448075621302e-05, + "loss": 60.8119, + "step": 130400 + }, + { + "epoch": 0.52687290165928, + "grad_norm": 1228.8096923828125, + "learning_rate": 2.197666924034018e-05, + "loss": 84.7259, + "step": 130410 + }, + { + "epoch": 0.5269133029246476, + "grad_norm": 994.2683715820312, + "learning_rate": 2.1973890366522868e-05, + "loss": 123.5206, + "step": 130420 + }, + { + "epoch": 0.5269537041900152, + "grad_norm": 1388.2764892578125, + "learning_rate": 2.1971111454223556e-05, + "loss": 77.037, + "step": 130430 + }, + { + "epoch": 0.5269941054553828, + "grad_norm": 1634.232666015625, + "learning_rate": 2.1968332503496406e-05, + "loss": 77.2116, + "step": 130440 + }, + { + "epoch": 0.5270345067207505, + "grad_norm": 690.97705078125, + "learning_rate": 2.1965553514395597e-05, + "loss": 76.6827, + "step": 130450 + }, + { + "epoch": 0.5270749079861181, + "grad_norm": 1038.2313232421875, + "learning_rate": 2.1962774486975316e-05, + "loss": 89.5939, + "step": 130460 + }, + { + "epoch": 0.5271153092514858, + "grad_norm": 392.66497802734375, + "learning_rate": 2.195999542128974e-05, + "loss": 90.462, + "step": 130470 + }, + { + "epoch": 0.5271557105168534, + "grad_norm": 676.7084350585938, + "learning_rate": 2.1957216317393035e-05, + "loss": 67.761, + "step": 130480 + }, + { + "epoch": 0.527196111782221, + "grad_norm": 1924.166748046875, + "learning_rate": 2.195443717533939e-05, + "loss": 115.8155, + "step": 130490 + }, + { + "epoch": 0.5272365130475887, + "grad_norm": 445.7773132324219, + "learning_rate": 2.1951657995182992e-05, + "loss": 71.4141, + "step": 130500 + }, + { + "epoch": 0.5272769143129563, + "grad_norm": 904.7865600585938, + "learning_rate": 2.194887877697802e-05, + "loss": 61.7047, + "step": 130510 + }, + { + "epoch": 0.527317315578324, + "grad_norm": 1421.6781005859375, + "learning_rate": 2.194609952077864e-05, + "loss": 128.0777, + "step": 130520 + }, + { + "epoch": 0.5273577168436916, + "grad_norm": 1185.4580078125, + "learning_rate": 2.1943320226639052e-05, + "loss": 74.3987, + "step": 130530 + }, + { + "epoch": 0.5273981181090591, + "grad_norm": 1069.8759765625, + "learning_rate": 2.1940540894613448e-05, + "loss": 56.2004, + "step": 130540 + }, + { + "epoch": 0.5274385193744268, + "grad_norm": 601.6181640625, + "learning_rate": 2.1937761524755988e-05, + "loss": 67.8172, + "step": 130550 + }, + { + "epoch": 0.5274789206397944, + "grad_norm": 1025.5567626953125, + "learning_rate": 2.1934982117120875e-05, + "loss": 62.3046, + "step": 130560 + }, + { + "epoch": 0.527519321905162, + "grad_norm": 740.1962280273438, + "learning_rate": 2.1932202671762283e-05, + "loss": 46.7883, + "step": 130570 + }, + { + "epoch": 0.5275597231705297, + "grad_norm": 1970.0885009765625, + "learning_rate": 2.1929423188734408e-05, + "loss": 121.5662, + "step": 130580 + }, + { + "epoch": 0.5276001244358973, + "grad_norm": 668.3657836914062, + "learning_rate": 2.192664366809144e-05, + "loss": 79.2449, + "step": 130590 + }, + { + "epoch": 0.527640525701265, + "grad_norm": 1023.1004028320312, + "learning_rate": 2.1923864109887556e-05, + "loss": 86.2165, + "step": 130600 + }, + { + "epoch": 0.5276809269666326, + "grad_norm": 482.510009765625, + "learning_rate": 2.192108451417695e-05, + "loss": 89.5386, + "step": 130610 + }, + { + "epoch": 0.5277213282320002, + "grad_norm": 784.1842651367188, + "learning_rate": 2.1918304881013813e-05, + "loss": 83.9889, + "step": 130620 + }, + { + "epoch": 0.5277617294973679, + "grad_norm": 974.80517578125, + "learning_rate": 2.191552521045234e-05, + "loss": 72.0334, + "step": 130630 + }, + { + "epoch": 0.5278021307627355, + "grad_norm": 902.7506103515625, + "learning_rate": 2.1912745502546712e-05, + "loss": 84.492, + "step": 130640 + }, + { + "epoch": 0.5278425320281032, + "grad_norm": 630.8988037109375, + "learning_rate": 2.1909965757351128e-05, + "loss": 67.6872, + "step": 130650 + }, + { + "epoch": 0.5278829332934708, + "grad_norm": 891.9068603515625, + "learning_rate": 2.1907185974919772e-05, + "loss": 69.3973, + "step": 130660 + }, + { + "epoch": 0.5279233345588383, + "grad_norm": 786.7998657226562, + "learning_rate": 2.1904406155306855e-05, + "loss": 72.8176, + "step": 130670 + }, + { + "epoch": 0.527963735824206, + "grad_norm": 533.5980834960938, + "learning_rate": 2.190162629856655e-05, + "loss": 85.8427, + "step": 130680 + }, + { + "epoch": 0.5280041370895736, + "grad_norm": 879.7304077148438, + "learning_rate": 2.1898846404753064e-05, + "loss": 81.9489, + "step": 130690 + }, + { + "epoch": 0.5280445383549413, + "grad_norm": 862.5269775390625, + "learning_rate": 2.1896066473920588e-05, + "loss": 79.0677, + "step": 130700 + }, + { + "epoch": 0.5280849396203089, + "grad_norm": 682.307373046875, + "learning_rate": 2.1893286506123322e-05, + "loss": 68.0811, + "step": 130710 + }, + { + "epoch": 0.5281253408856765, + "grad_norm": 264.0597229003906, + "learning_rate": 2.189050650141546e-05, + "loss": 83.5249, + "step": 130720 + }, + { + "epoch": 0.5281657421510442, + "grad_norm": 850.618896484375, + "learning_rate": 2.1887726459851207e-05, + "loss": 63.459, + "step": 130730 + }, + { + "epoch": 0.5282061434164118, + "grad_norm": 529.5652465820312, + "learning_rate": 2.1884946381484748e-05, + "loss": 45.5113, + "step": 130740 + }, + { + "epoch": 0.5282465446817795, + "grad_norm": 481.17095947265625, + "learning_rate": 2.1882166266370292e-05, + "loss": 73.4088, + "step": 130750 + }, + { + "epoch": 0.5282869459471471, + "grad_norm": 459.7140197753906, + "learning_rate": 2.187938611456203e-05, + "loss": 39.6897, + "step": 130760 + }, + { + "epoch": 0.5283273472125147, + "grad_norm": 607.7068481445312, + "learning_rate": 2.1876605926114173e-05, + "loss": 70.8226, + "step": 130770 + }, + { + "epoch": 0.5283677484778824, + "grad_norm": 1945.8135986328125, + "learning_rate": 2.1873825701080916e-05, + "loss": 138.945, + "step": 130780 + }, + { + "epoch": 0.52840814974325, + "grad_norm": 1338.008056640625, + "learning_rate": 2.1871045439516463e-05, + "loss": 71.2926, + "step": 130790 + }, + { + "epoch": 0.5284485510086175, + "grad_norm": 345.3675231933594, + "learning_rate": 2.1868265141475017e-05, + "loss": 78.6305, + "step": 130800 + }, + { + "epoch": 0.5284889522739852, + "grad_norm": 806.418212890625, + "learning_rate": 2.1865484807010778e-05, + "loss": 66.9043, + "step": 130810 + }, + { + "epoch": 0.5285293535393528, + "grad_norm": 661.2883911132812, + "learning_rate": 2.186270443617795e-05, + "loss": 90.1794, + "step": 130820 + }, + { + "epoch": 0.5285697548047205, + "grad_norm": 487.6462707519531, + "learning_rate": 2.1859924029030743e-05, + "loss": 74.0889, + "step": 130830 + }, + { + "epoch": 0.5286101560700881, + "grad_norm": 539.4984130859375, + "learning_rate": 2.185714358562336e-05, + "loss": 117.8266, + "step": 130840 + }, + { + "epoch": 0.5286505573354557, + "grad_norm": 861.3887939453125, + "learning_rate": 2.1854363106010003e-05, + "loss": 70.8996, + "step": 130850 + }, + { + "epoch": 0.5286909586008234, + "grad_norm": 674.577880859375, + "learning_rate": 2.1851582590244887e-05, + "loss": 74.7064, + "step": 130860 + }, + { + "epoch": 0.528731359866191, + "grad_norm": 623.7495727539062, + "learning_rate": 2.1848802038382213e-05, + "loss": 82.2479, + "step": 130870 + }, + { + "epoch": 0.5287717611315587, + "grad_norm": 898.375, + "learning_rate": 2.1846021450476192e-05, + "loss": 60.6791, + "step": 130880 + }, + { + "epoch": 0.5288121623969263, + "grad_norm": 472.4190673828125, + "learning_rate": 2.1843240826581032e-05, + "loss": 69.5391, + "step": 130890 + }, + { + "epoch": 0.5288525636622939, + "grad_norm": 1578.5916748046875, + "learning_rate": 2.1840460166750947e-05, + "loss": 89.115, + "step": 130900 + }, + { + "epoch": 0.5288929649276616, + "grad_norm": 1358.586181640625, + "learning_rate": 2.1837679471040137e-05, + "loss": 86.0973, + "step": 130910 + }, + { + "epoch": 0.5289333661930292, + "grad_norm": 582.5421142578125, + "learning_rate": 2.1834898739502827e-05, + "loss": 76.9907, + "step": 130920 + }, + { + "epoch": 0.5289737674583967, + "grad_norm": 3223.254150390625, + "learning_rate": 2.1832117972193213e-05, + "loss": 76.7468, + "step": 130930 + }, + { + "epoch": 0.5290141687237644, + "grad_norm": 335.3118591308594, + "learning_rate": 2.182933716916552e-05, + "loss": 49.6441, + "step": 130940 + }, + { + "epoch": 0.529054569989132, + "grad_norm": 823.189453125, + "learning_rate": 2.1826556330473955e-05, + "loss": 77.5172, + "step": 130950 + }, + { + "epoch": 0.5290949712544997, + "grad_norm": 570.0392456054688, + "learning_rate": 2.182377545617274e-05, + "loss": 86.5257, + "step": 130960 + }, + { + "epoch": 0.5291353725198673, + "grad_norm": 303.16558837890625, + "learning_rate": 2.1820994546316078e-05, + "loss": 85.5057, + "step": 130970 + }, + { + "epoch": 0.529175773785235, + "grad_norm": 462.9030456542969, + "learning_rate": 2.1818213600958193e-05, + "loss": 46.878, + "step": 130980 + }, + { + "epoch": 0.5292161750506026, + "grad_norm": 466.94384765625, + "learning_rate": 2.1815432620153297e-05, + "loss": 51.338, + "step": 130990 + }, + { + "epoch": 0.5292565763159702, + "grad_norm": 757.3911743164062, + "learning_rate": 2.1812651603955608e-05, + "loss": 108.3641, + "step": 131000 + }, + { + "epoch": 0.5292969775813379, + "grad_norm": 424.4309387207031, + "learning_rate": 2.1809870552419337e-05, + "loss": 95.4393, + "step": 131010 + }, + { + "epoch": 0.5293373788467055, + "grad_norm": 683.0469360351562, + "learning_rate": 2.1807089465598714e-05, + "loss": 71.4691, + "step": 131020 + }, + { + "epoch": 0.5293777801120731, + "grad_norm": 1651.76953125, + "learning_rate": 2.1804308343547947e-05, + "loss": 104.6401, + "step": 131030 + }, + { + "epoch": 0.5294181813774408, + "grad_norm": 403.01861572265625, + "learning_rate": 2.1801527186321267e-05, + "loss": 99.9258, + "step": 131040 + }, + { + "epoch": 0.5294585826428083, + "grad_norm": 899.4434814453125, + "learning_rate": 2.1798745993972877e-05, + "loss": 81.2632, + "step": 131050 + }, + { + "epoch": 0.529498983908176, + "grad_norm": 443.64569091796875, + "learning_rate": 2.1795964766557015e-05, + "loss": 48.2979, + "step": 131060 + }, + { + "epoch": 0.5295393851735436, + "grad_norm": 820.0629272460938, + "learning_rate": 2.1793183504127895e-05, + "loss": 93.4104, + "step": 131070 + }, + { + "epoch": 0.5295797864389112, + "grad_norm": 925.0215454101562, + "learning_rate": 2.1790402206739746e-05, + "loss": 67.6241, + "step": 131080 + }, + { + "epoch": 0.5296201877042789, + "grad_norm": 594.7744140625, + "learning_rate": 2.1787620874446775e-05, + "loss": 107.6541, + "step": 131090 + }, + { + "epoch": 0.5296605889696465, + "grad_norm": 372.6765441894531, + "learning_rate": 2.1784839507303213e-05, + "loss": 65.606, + "step": 131100 + }, + { + "epoch": 0.5297009902350142, + "grad_norm": 1051.6839599609375, + "learning_rate": 2.1782058105363287e-05, + "loss": 79.9032, + "step": 131110 + }, + { + "epoch": 0.5297413915003818, + "grad_norm": 867.8452758789062, + "learning_rate": 2.1779276668681223e-05, + "loss": 115.1968, + "step": 131120 + }, + { + "epoch": 0.5297817927657494, + "grad_norm": 762.9842529296875, + "learning_rate": 2.1776495197311243e-05, + "loss": 85.6247, + "step": 131130 + }, + { + "epoch": 0.5298221940311171, + "grad_norm": 184.7125701904297, + "learning_rate": 2.1773713691307573e-05, + "loss": 92.783, + "step": 131140 + }, + { + "epoch": 0.5298625952964847, + "grad_norm": 1359.2021484375, + "learning_rate": 2.1770932150724447e-05, + "loss": 80.2446, + "step": 131150 + }, + { + "epoch": 0.5299029965618524, + "grad_norm": 412.2273254394531, + "learning_rate": 2.176815057561609e-05, + "loss": 60.3154, + "step": 131160 + }, + { + "epoch": 0.52994339782722, + "grad_norm": 3259.436279296875, + "learning_rate": 2.1765368966036715e-05, + "loss": 92.2644, + "step": 131170 + }, + { + "epoch": 0.5299837990925875, + "grad_norm": 761.640625, + "learning_rate": 2.176258732204057e-05, + "loss": 72.1992, + "step": 131180 + }, + { + "epoch": 0.5300242003579552, + "grad_norm": 574.616943359375, + "learning_rate": 2.1759805643681877e-05, + "loss": 63.2733, + "step": 131190 + }, + { + "epoch": 0.5300646016233228, + "grad_norm": 490.14617919921875, + "learning_rate": 2.175702393101487e-05, + "loss": 73.932, + "step": 131200 + }, + { + "epoch": 0.5301050028886904, + "grad_norm": 629.990478515625, + "learning_rate": 2.175424218409377e-05, + "loss": 86.3369, + "step": 131210 + }, + { + "epoch": 0.5301454041540581, + "grad_norm": 769.4268188476562, + "learning_rate": 2.175146040297282e-05, + "loss": 106.5184, + "step": 131220 + }, + { + "epoch": 0.5301858054194257, + "grad_norm": 870.2138061523438, + "learning_rate": 2.1748678587706245e-05, + "loss": 78.7124, + "step": 131230 + }, + { + "epoch": 0.5302262066847934, + "grad_norm": 824.6702270507812, + "learning_rate": 2.1745896738348283e-05, + "loss": 75.6737, + "step": 131240 + }, + { + "epoch": 0.530266607950161, + "grad_norm": 1629.01611328125, + "learning_rate": 2.174311485495317e-05, + "loss": 64.6747, + "step": 131250 + }, + { + "epoch": 0.5303070092155286, + "grad_norm": 669.0882568359375, + "learning_rate": 2.1740332937575128e-05, + "loss": 43.602, + "step": 131260 + }, + { + "epoch": 0.5303474104808963, + "grad_norm": 880.02734375, + "learning_rate": 2.17375509862684e-05, + "loss": 99.4352, + "step": 131270 + }, + { + "epoch": 0.5303878117462639, + "grad_norm": 541.5421752929688, + "learning_rate": 2.1734769001087224e-05, + "loss": 88.0695, + "step": 131280 + }, + { + "epoch": 0.5304282130116316, + "grad_norm": 1086.0965576171875, + "learning_rate": 2.173198698208583e-05, + "loss": 63.9128, + "step": 131290 + }, + { + "epoch": 0.5304686142769992, + "grad_norm": 488.5394287109375, + "learning_rate": 2.172920492931846e-05, + "loss": 78.0184, + "step": 131300 + }, + { + "epoch": 0.5305090155423667, + "grad_norm": 541.5884399414062, + "learning_rate": 2.172642284283935e-05, + "loss": 76.7773, + "step": 131310 + }, + { + "epoch": 0.5305494168077344, + "grad_norm": 848.9282836914062, + "learning_rate": 2.172364072270274e-05, + "loss": 57.1273, + "step": 131320 + }, + { + "epoch": 0.530589818073102, + "grad_norm": 355.4403076171875, + "learning_rate": 2.1720858568962865e-05, + "loss": 73.2419, + "step": 131330 + }, + { + "epoch": 0.5306302193384697, + "grad_norm": 568.5783081054688, + "learning_rate": 2.1718076381673967e-05, + "loss": 63.66, + "step": 131340 + }, + { + "epoch": 0.5306706206038373, + "grad_norm": 1096.16552734375, + "learning_rate": 2.171529416089028e-05, + "loss": 86.2019, + "step": 131350 + }, + { + "epoch": 0.5307110218692049, + "grad_norm": 814.0066528320312, + "learning_rate": 2.171251190666605e-05, + "loss": 92.9418, + "step": 131360 + }, + { + "epoch": 0.5307514231345726, + "grad_norm": 557.1846313476562, + "learning_rate": 2.170972961905553e-05, + "loss": 83.5845, + "step": 131370 + }, + { + "epoch": 0.5307918243999402, + "grad_norm": 436.90435791015625, + "learning_rate": 2.1706947298112943e-05, + "loss": 85.5495, + "step": 131380 + }, + { + "epoch": 0.5308322256653079, + "grad_norm": 563.3603515625, + "learning_rate": 2.1704164943892536e-05, + "loss": 66.9731, + "step": 131390 + }, + { + "epoch": 0.5308726269306755, + "grad_norm": 786.006591796875, + "learning_rate": 2.1701382556448563e-05, + "loss": 69.8326, + "step": 131400 + }, + { + "epoch": 0.5309130281960431, + "grad_norm": 912.2850341796875, + "learning_rate": 2.1698600135835258e-05, + "loss": 71.4801, + "step": 131410 + }, + { + "epoch": 0.5309534294614108, + "grad_norm": 741.9838256835938, + "learning_rate": 2.1695817682106874e-05, + "loss": 75.9357, + "step": 131420 + }, + { + "epoch": 0.5309938307267784, + "grad_norm": 405.2071533203125, + "learning_rate": 2.1693035195317642e-05, + "loss": 79.3297, + "step": 131430 + }, + { + "epoch": 0.5310342319921459, + "grad_norm": 554.6468505859375, + "learning_rate": 2.1690252675521823e-05, + "loss": 85.0767, + "step": 131440 + }, + { + "epoch": 0.5310746332575136, + "grad_norm": 826.110107421875, + "learning_rate": 2.1687470122773658e-05, + "loss": 61.4633, + "step": 131450 + }, + { + "epoch": 0.5311150345228812, + "grad_norm": 474.8224182128906, + "learning_rate": 2.1684687537127394e-05, + "loss": 75.1593, + "step": 131460 + }, + { + "epoch": 0.5311554357882489, + "grad_norm": 547.2439575195312, + "learning_rate": 2.1681904918637275e-05, + "loss": 61.7188, + "step": 131470 + }, + { + "epoch": 0.5311958370536165, + "grad_norm": 651.0337524414062, + "learning_rate": 2.1679122267357555e-05, + "loss": 83.3349, + "step": 131480 + }, + { + "epoch": 0.5312362383189841, + "grad_norm": 663.5857543945312, + "learning_rate": 2.1676339583342485e-05, + "loss": 55.097, + "step": 131490 + }, + { + "epoch": 0.5312766395843518, + "grad_norm": 1205.1168212890625, + "learning_rate": 2.167355686664632e-05, + "loss": 94.9986, + "step": 131500 + }, + { + "epoch": 0.5313170408497194, + "grad_norm": 854.0094604492188, + "learning_rate": 2.1670774117323287e-05, + "loss": 91.0396, + "step": 131510 + }, + { + "epoch": 0.5313574421150871, + "grad_norm": 975.82861328125, + "learning_rate": 2.1667991335427655e-05, + "loss": 93.6129, + "step": 131520 + }, + { + "epoch": 0.5313978433804547, + "grad_norm": 701.6337890625, + "learning_rate": 2.1665208521013682e-05, + "loss": 68.7761, + "step": 131530 + }, + { + "epoch": 0.5314382446458223, + "grad_norm": 231.34490966796875, + "learning_rate": 2.1662425674135602e-05, + "loss": 81.8495, + "step": 131540 + }, + { + "epoch": 0.53147864591119, + "grad_norm": 652.6868896484375, + "learning_rate": 2.165964279484768e-05, + "loss": 69.513, + "step": 131550 + }, + { + "epoch": 0.5315190471765576, + "grad_norm": 423.39544677734375, + "learning_rate": 2.1656859883204165e-05, + "loss": 98.701, + "step": 131560 + }, + { + "epoch": 0.5315594484419252, + "grad_norm": 1123.4146728515625, + "learning_rate": 2.1654076939259325e-05, + "loss": 84.2372, + "step": 131570 + }, + { + "epoch": 0.5315998497072928, + "grad_norm": 1566.5975341796875, + "learning_rate": 2.1651293963067396e-05, + "loss": 67.8266, + "step": 131580 + }, + { + "epoch": 0.5316402509726604, + "grad_norm": 167.6468963623047, + "learning_rate": 2.164851095468264e-05, + "loss": 80.8677, + "step": 131590 + }, + { + "epoch": 0.5316806522380281, + "grad_norm": 573.9483032226562, + "learning_rate": 2.1645727914159315e-05, + "loss": 93.065, + "step": 131600 + }, + { + "epoch": 0.5317210535033957, + "grad_norm": 765.2149658203125, + "learning_rate": 2.164294484155168e-05, + "loss": 76.4018, + "step": 131610 + }, + { + "epoch": 0.5317614547687634, + "grad_norm": 322.4963073730469, + "learning_rate": 2.1640161736913985e-05, + "loss": 67.6671, + "step": 131620 + }, + { + "epoch": 0.531801856034131, + "grad_norm": 682.9099731445312, + "learning_rate": 2.1637378600300493e-05, + "loss": 68.4641, + "step": 131630 + }, + { + "epoch": 0.5318422572994986, + "grad_norm": 530.6994018554688, + "learning_rate": 2.163459543176546e-05, + "loss": 66.0492, + "step": 131640 + }, + { + "epoch": 0.5318826585648663, + "grad_norm": 967.1116943359375, + "learning_rate": 2.1631812231363158e-05, + "loss": 77.7043, + "step": 131650 + }, + { + "epoch": 0.5319230598302339, + "grad_norm": 686.3056640625, + "learning_rate": 2.1629028999147822e-05, + "loss": 53.7673, + "step": 131660 + }, + { + "epoch": 0.5319634610956016, + "grad_norm": 521.983642578125, + "learning_rate": 2.162624573517374e-05, + "loss": 97.6784, + "step": 131670 + }, + { + "epoch": 0.5320038623609692, + "grad_norm": 470.3642578125, + "learning_rate": 2.1623462439495154e-05, + "loss": 88.1898, + "step": 131680 + }, + { + "epoch": 0.5320442636263367, + "grad_norm": 643.4794311523438, + "learning_rate": 2.1620679112166333e-05, + "loss": 67.3134, + "step": 131690 + }, + { + "epoch": 0.5320846648917044, + "grad_norm": 882.0040893554688, + "learning_rate": 2.1617895753241536e-05, + "loss": 89.7396, + "step": 131700 + }, + { + "epoch": 0.532125066157072, + "grad_norm": 914.55517578125, + "learning_rate": 2.161511236277503e-05, + "loss": 79.4077, + "step": 131710 + }, + { + "epoch": 0.5321654674224396, + "grad_norm": 485.9687194824219, + "learning_rate": 2.1612328940821077e-05, + "loss": 71.0271, + "step": 131720 + }, + { + "epoch": 0.5322058686878073, + "grad_norm": 495.38482666015625, + "learning_rate": 2.1609545487433942e-05, + "loss": 75.686, + "step": 131730 + }, + { + "epoch": 0.5322462699531749, + "grad_norm": 705.0997314453125, + "learning_rate": 2.1606762002667887e-05, + "loss": 71.733, + "step": 131740 + }, + { + "epoch": 0.5322866712185426, + "grad_norm": 309.4384765625, + "learning_rate": 2.1603978486577177e-05, + "loss": 55.6571, + "step": 131750 + }, + { + "epoch": 0.5323270724839102, + "grad_norm": 730.5018920898438, + "learning_rate": 2.160119493921609e-05, + "loss": 74.7741, + "step": 131760 + }, + { + "epoch": 0.5323674737492778, + "grad_norm": 862.374755859375, + "learning_rate": 2.159841136063888e-05, + "loss": 82.4054, + "step": 131770 + }, + { + "epoch": 0.5324078750146455, + "grad_norm": 507.3111572265625, + "learning_rate": 2.1595627750899815e-05, + "loss": 71.38, + "step": 131780 + }, + { + "epoch": 0.5324482762800131, + "grad_norm": 506.0101013183594, + "learning_rate": 2.1592844110053163e-05, + "loss": 77.1397, + "step": 131790 + }, + { + "epoch": 0.5324886775453808, + "grad_norm": 736.158203125, + "learning_rate": 2.15900604381532e-05, + "loss": 101.1001, + "step": 131800 + }, + { + "epoch": 0.5325290788107484, + "grad_norm": 1539.8658447265625, + "learning_rate": 2.1587276735254187e-05, + "loss": 75.9614, + "step": 131810 + }, + { + "epoch": 0.5325694800761159, + "grad_norm": 384.14111328125, + "learning_rate": 2.15844930014104e-05, + "loss": 50.2662, + "step": 131820 + }, + { + "epoch": 0.5326098813414836, + "grad_norm": 747.5145874023438, + "learning_rate": 2.1581709236676098e-05, + "loss": 71.0599, + "step": 131830 + }, + { + "epoch": 0.5326502826068512, + "grad_norm": 944.6781616210938, + "learning_rate": 2.1578925441105573e-05, + "loss": 77.2872, + "step": 131840 + }, + { + "epoch": 0.5326906838722189, + "grad_norm": 711.0193481445312, + "learning_rate": 2.157614161475307e-05, + "loss": 59.8465, + "step": 131850 + }, + { + "epoch": 0.5327310851375865, + "grad_norm": 508.24237060546875, + "learning_rate": 2.1573357757672887e-05, + "loss": 69.6337, + "step": 131860 + }, + { + "epoch": 0.5327714864029541, + "grad_norm": 1259.34619140625, + "learning_rate": 2.157057386991928e-05, + "loss": 124.27, + "step": 131870 + }, + { + "epoch": 0.5328118876683218, + "grad_norm": 601.3455810546875, + "learning_rate": 2.1567789951546524e-05, + "loss": 68.4694, + "step": 131880 + }, + { + "epoch": 0.5328522889336894, + "grad_norm": 696.8367309570312, + "learning_rate": 2.1565006002608896e-05, + "loss": 55.615, + "step": 131890 + }, + { + "epoch": 0.532892690199057, + "grad_norm": 1485.8975830078125, + "learning_rate": 2.156222202316067e-05, + "loss": 70.7275, + "step": 131900 + }, + { + "epoch": 0.5329330914644247, + "grad_norm": 779.203857421875, + "learning_rate": 2.1559438013256123e-05, + "loss": 62.4011, + "step": 131910 + }, + { + "epoch": 0.5329734927297923, + "grad_norm": 1028.074462890625, + "learning_rate": 2.155665397294953e-05, + "loss": 78.4772, + "step": 131920 + }, + { + "epoch": 0.53301389399516, + "grad_norm": 852.2239990234375, + "learning_rate": 2.1553869902295168e-05, + "loss": 101.0818, + "step": 131930 + }, + { + "epoch": 0.5330542952605276, + "grad_norm": 969.8702392578125, + "learning_rate": 2.1551085801347314e-05, + "loss": 67.8391, + "step": 131940 + }, + { + "epoch": 0.5330946965258951, + "grad_norm": 1079.14599609375, + "learning_rate": 2.1548301670160238e-05, + "loss": 91.2421, + "step": 131950 + }, + { + "epoch": 0.5331350977912628, + "grad_norm": 1148.4508056640625, + "learning_rate": 2.1545517508788225e-05, + "loss": 82.0861, + "step": 131960 + }, + { + "epoch": 0.5331754990566304, + "grad_norm": 366.4356994628906, + "learning_rate": 2.154273331728555e-05, + "loss": 79.3362, + "step": 131970 + }, + { + "epoch": 0.5332159003219981, + "grad_norm": 1020.5046997070312, + "learning_rate": 2.15399490957065e-05, + "loss": 64.181, + "step": 131980 + }, + { + "epoch": 0.5332563015873657, + "grad_norm": 1305.626220703125, + "learning_rate": 2.1537164844105347e-05, + "loss": 99.4774, + "step": 131990 + }, + { + "epoch": 0.5332967028527333, + "grad_norm": 2004.4862060546875, + "learning_rate": 2.1534380562536373e-05, + "loss": 84.8186, + "step": 132000 + }, + { + "epoch": 0.533337104118101, + "grad_norm": 759.3684692382812, + "learning_rate": 2.1531596251053862e-05, + "loss": 93.5765, + "step": 132010 + }, + { + "epoch": 0.5333775053834686, + "grad_norm": 1245.24169921875, + "learning_rate": 2.15288119097121e-05, + "loss": 81.0024, + "step": 132020 + }, + { + "epoch": 0.5334179066488363, + "grad_norm": 835.1526489257812, + "learning_rate": 2.1526027538565355e-05, + "loss": 73.9725, + "step": 132030 + }, + { + "epoch": 0.5334583079142039, + "grad_norm": 810.0197143554688, + "learning_rate": 2.1523243137667914e-05, + "loss": 67.5436, + "step": 132040 + }, + { + "epoch": 0.5334987091795715, + "grad_norm": 618.1740112304688, + "learning_rate": 2.1520458707074065e-05, + "loss": 51.5902, + "step": 132050 + }, + { + "epoch": 0.5335391104449392, + "grad_norm": 1072.64599609375, + "learning_rate": 2.1517674246838097e-05, + "loss": 79.249, + "step": 132060 + }, + { + "epoch": 0.5335795117103068, + "grad_norm": 547.8218994140625, + "learning_rate": 2.1514889757014283e-05, + "loss": 59.7692, + "step": 132070 + }, + { + "epoch": 0.5336199129756743, + "grad_norm": 689.697998046875, + "learning_rate": 2.1512105237656917e-05, + "loss": 86.2271, + "step": 132080 + }, + { + "epoch": 0.533660314241042, + "grad_norm": 1306.5926513671875, + "learning_rate": 2.1509320688820273e-05, + "loss": 118.471, + "step": 132090 + }, + { + "epoch": 0.5337007155064096, + "grad_norm": 2212.953125, + "learning_rate": 2.1506536110558657e-05, + "loss": 87.6218, + "step": 132100 + }, + { + "epoch": 0.5337411167717773, + "grad_norm": 3474.05322265625, + "learning_rate": 2.150375150292634e-05, + "loss": 118.7717, + "step": 132110 + }, + { + "epoch": 0.5337815180371449, + "grad_norm": 906.9917602539062, + "learning_rate": 2.150096686597761e-05, + "loss": 57.8979, + "step": 132120 + }, + { + "epoch": 0.5338219193025125, + "grad_norm": 677.981201171875, + "learning_rate": 2.149818219976676e-05, + "loss": 103.0298, + "step": 132130 + }, + { + "epoch": 0.5338623205678802, + "grad_norm": 547.3273315429688, + "learning_rate": 2.149539750434808e-05, + "loss": 78.3932, + "step": 132140 + }, + { + "epoch": 0.5339027218332478, + "grad_norm": 1958.6021728515625, + "learning_rate": 2.149261277977585e-05, + "loss": 92.8857, + "step": 132150 + }, + { + "epoch": 0.5339431230986155, + "grad_norm": 770.9187622070312, + "learning_rate": 2.148982802610436e-05, + "loss": 79.1352, + "step": 132160 + }, + { + "epoch": 0.5339835243639831, + "grad_norm": 569.2794189453125, + "learning_rate": 2.1487043243387915e-05, + "loss": 71.0996, + "step": 132170 + }, + { + "epoch": 0.5340239256293507, + "grad_norm": 380.9273376464844, + "learning_rate": 2.14842584316808e-05, + "loss": 75.5646, + "step": 132180 + }, + { + "epoch": 0.5340643268947184, + "grad_norm": 1255.79736328125, + "learning_rate": 2.1481473591037297e-05, + "loss": 123.2141, + "step": 132190 + }, + { + "epoch": 0.534104728160086, + "grad_norm": 489.69244384765625, + "learning_rate": 2.1478688721511706e-05, + "loss": 127.2762, + "step": 132200 + }, + { + "epoch": 0.5341451294254536, + "grad_norm": 496.0174255371094, + "learning_rate": 2.1475903823158308e-05, + "loss": 71.4422, + "step": 132210 + }, + { + "epoch": 0.5341855306908212, + "grad_norm": 723.1309204101562, + "learning_rate": 2.1473118896031417e-05, + "loss": 96.7086, + "step": 132220 + }, + { + "epoch": 0.5342259319561888, + "grad_norm": 208.33712768554688, + "learning_rate": 2.1470333940185308e-05, + "loss": 117.7949, + "step": 132230 + }, + { + "epoch": 0.5342663332215565, + "grad_norm": 363.8735046386719, + "learning_rate": 2.1467548955674283e-05, + "loss": 43.7172, + "step": 132240 + }, + { + "epoch": 0.5343067344869241, + "grad_norm": 814.595703125, + "learning_rate": 2.1464763942552636e-05, + "loss": 96.1385, + "step": 132250 + }, + { + "epoch": 0.5343471357522918, + "grad_norm": 385.4045104980469, + "learning_rate": 2.1461978900874666e-05, + "loss": 48.5761, + "step": 132260 + }, + { + "epoch": 0.5343875370176594, + "grad_norm": 753.64404296875, + "learning_rate": 2.1459193830694658e-05, + "loss": 86.3119, + "step": 132270 + }, + { + "epoch": 0.534427938283027, + "grad_norm": 1018.7460327148438, + "learning_rate": 2.145640873206692e-05, + "loss": 65.695, + "step": 132280 + }, + { + "epoch": 0.5344683395483947, + "grad_norm": 415.44110107421875, + "learning_rate": 2.1453623605045738e-05, + "loss": 51.381, + "step": 132290 + }, + { + "epoch": 0.5345087408137623, + "grad_norm": 365.41021728515625, + "learning_rate": 2.145083844968542e-05, + "loss": 90.9319, + "step": 132300 + }, + { + "epoch": 0.53454914207913, + "grad_norm": 1081.14111328125, + "learning_rate": 2.1448053266040262e-05, + "loss": 92.2154, + "step": 132310 + }, + { + "epoch": 0.5345895433444976, + "grad_norm": 483.8822021484375, + "learning_rate": 2.1445268054164554e-05, + "loss": 90.3007, + "step": 132320 + }, + { + "epoch": 0.5346299446098651, + "grad_norm": 775.784912109375, + "learning_rate": 2.14424828141126e-05, + "loss": 87.4827, + "step": 132330 + }, + { + "epoch": 0.5346703458752328, + "grad_norm": 736.3746948242188, + "learning_rate": 2.1439697545938704e-05, + "loss": 98.9762, + "step": 132340 + }, + { + "epoch": 0.5347107471406004, + "grad_norm": 1554.9989013671875, + "learning_rate": 2.143691224969717e-05, + "loss": 72.1206, + "step": 132350 + }, + { + "epoch": 0.534751148405968, + "grad_norm": 983.4346313476562, + "learning_rate": 2.1434126925442286e-05, + "loss": 64.2431, + "step": 132360 + }, + { + "epoch": 0.5347915496713357, + "grad_norm": 624.306396484375, + "learning_rate": 2.1431341573228356e-05, + "loss": 56.091, + "step": 132370 + }, + { + "epoch": 0.5348319509367033, + "grad_norm": 780.2047729492188, + "learning_rate": 2.1428556193109688e-05, + "loss": 82.323, + "step": 132380 + }, + { + "epoch": 0.534872352202071, + "grad_norm": 533.654052734375, + "learning_rate": 2.1425770785140584e-05, + "loss": 52.886, + "step": 132390 + }, + { + "epoch": 0.5349127534674386, + "grad_norm": 599.1363525390625, + "learning_rate": 2.142298534937534e-05, + "loss": 76.2064, + "step": 132400 + }, + { + "epoch": 0.5349531547328062, + "grad_norm": 479.0877685546875, + "learning_rate": 2.1420199885868265e-05, + "loss": 40.2998, + "step": 132410 + }, + { + "epoch": 0.5349935559981739, + "grad_norm": 963.1239013671875, + "learning_rate": 2.1417414394673664e-05, + "loss": 78.2794, + "step": 132420 + }, + { + "epoch": 0.5350339572635415, + "grad_norm": 527.8165283203125, + "learning_rate": 2.141462887584584e-05, + "loss": 81.4025, + "step": 132430 + }, + { + "epoch": 0.5350743585289092, + "grad_norm": 1076.4185791015625, + "learning_rate": 2.1411843329439094e-05, + "loss": 109.4635, + "step": 132440 + }, + { + "epoch": 0.5351147597942768, + "grad_norm": 835.4868774414062, + "learning_rate": 2.140905775550774e-05, + "loss": 102.1664, + "step": 132450 + }, + { + "epoch": 0.5351551610596443, + "grad_norm": 1301.7496337890625, + "learning_rate": 2.140627215410608e-05, + "loss": 77.9637, + "step": 132460 + }, + { + "epoch": 0.535195562325012, + "grad_norm": 356.7514953613281, + "learning_rate": 2.1403486525288422e-05, + "loss": 59.5739, + "step": 132470 + }, + { + "epoch": 0.5352359635903796, + "grad_norm": 514.1602783203125, + "learning_rate": 2.1400700869109062e-05, + "loss": 66.8787, + "step": 132480 + }, + { + "epoch": 0.5352763648557473, + "grad_norm": 1367.6866455078125, + "learning_rate": 2.1397915185622322e-05, + "loss": 58.8028, + "step": 132490 + }, + { + "epoch": 0.5353167661211149, + "grad_norm": 904.6476440429688, + "learning_rate": 2.1395129474882507e-05, + "loss": 47.811, + "step": 132500 + }, + { + "epoch": 0.5353571673864825, + "grad_norm": 1184.765625, + "learning_rate": 2.1392343736943925e-05, + "loss": 105.3035, + "step": 132510 + }, + { + "epoch": 0.5353975686518502, + "grad_norm": 348.420166015625, + "learning_rate": 2.1389557971860886e-05, + "loss": 63.2613, + "step": 132520 + }, + { + "epoch": 0.5354379699172178, + "grad_norm": 919.851806640625, + "learning_rate": 2.13867721796877e-05, + "loss": 76.0928, + "step": 132530 + }, + { + "epoch": 0.5354783711825855, + "grad_norm": 459.1181640625, + "learning_rate": 2.138398636047867e-05, + "loss": 68.4711, + "step": 132540 + }, + { + "epoch": 0.5355187724479531, + "grad_norm": 617.1149291992188, + "learning_rate": 2.1381200514288124e-05, + "loss": 68.8242, + "step": 132550 + }, + { + "epoch": 0.5355591737133207, + "grad_norm": 694.1890258789062, + "learning_rate": 2.137841464117035e-05, + "loss": 58.5727, + "step": 132560 + }, + { + "epoch": 0.5355995749786884, + "grad_norm": 1042.8485107421875, + "learning_rate": 2.137562874117968e-05, + "loss": 88.5464, + "step": 132570 + }, + { + "epoch": 0.535639976244056, + "grad_norm": 828.8763427734375, + "learning_rate": 2.1372842814370416e-05, + "loss": 60.8816, + "step": 132580 + }, + { + "epoch": 0.5356803775094235, + "grad_norm": 920.8046875, + "learning_rate": 2.137005686079688e-05, + "loss": 106.6258, + "step": 132590 + }, + { + "epoch": 0.5357207787747912, + "grad_norm": 625.4419555664062, + "learning_rate": 2.1367270880513377e-05, + "loss": 59.1052, + "step": 132600 + }, + { + "epoch": 0.5357611800401588, + "grad_norm": 674.377197265625, + "learning_rate": 2.136448487357422e-05, + "loss": 66.5575, + "step": 132610 + }, + { + "epoch": 0.5358015813055265, + "grad_norm": 845.07470703125, + "learning_rate": 2.1361698840033735e-05, + "loss": 64.6944, + "step": 132620 + }, + { + "epoch": 0.5358419825708941, + "grad_norm": 859.8458251953125, + "learning_rate": 2.1358912779946236e-05, + "loss": 72.721, + "step": 132630 + }, + { + "epoch": 0.5358823838362617, + "grad_norm": 923.4352416992188, + "learning_rate": 2.135612669336602e-05, + "loss": 80.2607, + "step": 132640 + }, + { + "epoch": 0.5359227851016294, + "grad_norm": 969.9942016601562, + "learning_rate": 2.135334058034742e-05, + "loss": 67.5742, + "step": 132650 + }, + { + "epoch": 0.535963186366997, + "grad_norm": 710.0928344726562, + "learning_rate": 2.1350554440944745e-05, + "loss": 72.0729, + "step": 132660 + }, + { + "epoch": 0.5360035876323647, + "grad_norm": 546.4231567382812, + "learning_rate": 2.1347768275212323e-05, + "loss": 47.9104, + "step": 132670 + }, + { + "epoch": 0.5360439888977323, + "grad_norm": 712.3460083007812, + "learning_rate": 2.134498208320446e-05, + "loss": 58.663, + "step": 132680 + }, + { + "epoch": 0.5360843901630999, + "grad_norm": 1002.625244140625, + "learning_rate": 2.134219586497548e-05, + "loss": 90.4401, + "step": 132690 + }, + { + "epoch": 0.5361247914284676, + "grad_norm": 583.6229248046875, + "learning_rate": 2.1339409620579704e-05, + "loss": 43.8357, + "step": 132700 + }, + { + "epoch": 0.5361651926938352, + "grad_norm": 769.8447265625, + "learning_rate": 2.133662335007144e-05, + "loss": 77.2382, + "step": 132710 + }, + { + "epoch": 0.5362055939592028, + "grad_norm": 560.1478881835938, + "learning_rate": 2.1333837053505025e-05, + "loss": 76.0368, + "step": 132720 + }, + { + "epoch": 0.5362459952245704, + "grad_norm": 584.6332397460938, + "learning_rate": 2.1331050730934762e-05, + "loss": 69.6549, + "step": 132730 + }, + { + "epoch": 0.536286396489938, + "grad_norm": 1060.5848388671875, + "learning_rate": 2.132826438241498e-05, + "loss": 76.4073, + "step": 132740 + }, + { + "epoch": 0.5363267977553057, + "grad_norm": 574.5213012695312, + "learning_rate": 2.132547800800001e-05, + "loss": 84.6, + "step": 132750 + }, + { + "epoch": 0.5363671990206733, + "grad_norm": 747.6767578125, + "learning_rate": 2.132269160774415e-05, + "loss": 68.7951, + "step": 132760 + }, + { + "epoch": 0.536407600286041, + "grad_norm": 695.412353515625, + "learning_rate": 2.1319905181701746e-05, + "loss": 47.6472, + "step": 132770 + }, + { + "epoch": 0.5364480015514086, + "grad_norm": 741.8221435546875, + "learning_rate": 2.1317118729927104e-05, + "loss": 44.2284, + "step": 132780 + }, + { + "epoch": 0.5364884028167762, + "grad_norm": 3119.658203125, + "learning_rate": 2.1314332252474563e-05, + "loss": 82.1212, + "step": 132790 + }, + { + "epoch": 0.5365288040821439, + "grad_norm": 706.6655883789062, + "learning_rate": 2.1311545749398435e-05, + "loss": 69.6131, + "step": 132800 + }, + { + "epoch": 0.5365692053475115, + "grad_norm": 581.317626953125, + "learning_rate": 2.1308759220753045e-05, + "loss": 106.6761, + "step": 132810 + }, + { + "epoch": 0.5366096066128792, + "grad_norm": 1085.3177490234375, + "learning_rate": 2.1305972666592716e-05, + "loss": 83.9079, + "step": 132820 + }, + { + "epoch": 0.5366500078782468, + "grad_norm": 1035.4493408203125, + "learning_rate": 2.1303186086971786e-05, + "loss": 79.7324, + "step": 132830 + }, + { + "epoch": 0.5366904091436144, + "grad_norm": 294.62298583984375, + "learning_rate": 2.130039948194457e-05, + "loss": 70.9114, + "step": 132840 + }, + { + "epoch": 0.536730810408982, + "grad_norm": 1081.477783203125, + "learning_rate": 2.1297612851565394e-05, + "loss": 100.8989, + "step": 132850 + }, + { + "epoch": 0.5367712116743496, + "grad_norm": 477.44287109375, + "learning_rate": 2.1294826195888586e-05, + "loss": 81.5394, + "step": 132860 + }, + { + "epoch": 0.5368116129397172, + "grad_norm": 901.669921875, + "learning_rate": 2.1292039514968475e-05, + "loss": 102.8499, + "step": 132870 + }, + { + "epoch": 0.5368520142050849, + "grad_norm": 896.7144165039062, + "learning_rate": 2.1289252808859393e-05, + "loss": 94.437, + "step": 132880 + }, + { + "epoch": 0.5368924154704525, + "grad_norm": 330.1830749511719, + "learning_rate": 2.1286466077615662e-05, + "loss": 62.8172, + "step": 132890 + }, + { + "epoch": 0.5369328167358202, + "grad_norm": 607.5623779296875, + "learning_rate": 2.128367932129161e-05, + "loss": 60.5205, + "step": 132900 + }, + { + "epoch": 0.5369732180011878, + "grad_norm": 713.755859375, + "learning_rate": 2.1280892539941572e-05, + "loss": 96.1822, + "step": 132910 + }, + { + "epoch": 0.5370136192665554, + "grad_norm": 713.6353149414062, + "learning_rate": 2.1278105733619876e-05, + "loss": 63.5193, + "step": 132920 + }, + { + "epoch": 0.5370540205319231, + "grad_norm": 488.12286376953125, + "learning_rate": 2.1275318902380847e-05, + "loss": 42.4369, + "step": 132930 + }, + { + "epoch": 0.5370944217972907, + "grad_norm": 743.5506591796875, + "learning_rate": 2.1272532046278815e-05, + "loss": 71.5208, + "step": 132940 + }, + { + "epoch": 0.5371348230626584, + "grad_norm": 622.0604248046875, + "learning_rate": 2.1269745165368123e-05, + "loss": 91.9462, + "step": 132950 + }, + { + "epoch": 0.537175224328026, + "grad_norm": 716.4788208007812, + "learning_rate": 2.1266958259703093e-05, + "loss": 72.7967, + "step": 132960 + }, + { + "epoch": 0.5372156255933935, + "grad_norm": 599.436279296875, + "learning_rate": 2.1264171329338065e-05, + "loss": 100.1251, + "step": 132970 + }, + { + "epoch": 0.5372560268587612, + "grad_norm": 1607.9947509765625, + "learning_rate": 2.1261384374327362e-05, + "loss": 89.2174, + "step": 132980 + }, + { + "epoch": 0.5372964281241288, + "grad_norm": 755.1240234375, + "learning_rate": 2.1258597394725314e-05, + "loss": 111.1329, + "step": 132990 + }, + { + "epoch": 0.5373368293894965, + "grad_norm": 575.7804565429688, + "learning_rate": 2.125581039058627e-05, + "loss": 79.3358, + "step": 133000 + }, + { + "epoch": 0.5373772306548641, + "grad_norm": 802.4450073242188, + "learning_rate": 2.1253023361964552e-05, + "loss": 81.254, + "step": 133010 + }, + { + "epoch": 0.5374176319202317, + "grad_norm": 447.39532470703125, + "learning_rate": 2.1250236308914502e-05, + "loss": 104.0173, + "step": 133020 + }, + { + "epoch": 0.5374580331855994, + "grad_norm": 634.5095825195312, + "learning_rate": 2.1247449231490445e-05, + "loss": 106.6998, + "step": 133030 + }, + { + "epoch": 0.537498434450967, + "grad_norm": 548.2379760742188, + "learning_rate": 2.124466212974673e-05, + "loss": 88.3591, + "step": 133040 + }, + { + "epoch": 0.5375388357163347, + "grad_norm": 575.937255859375, + "learning_rate": 2.1241875003737684e-05, + "loss": 37.7224, + "step": 133050 + }, + { + "epoch": 0.5375792369817023, + "grad_norm": 537.3082885742188, + "learning_rate": 2.1239087853517648e-05, + "loss": 67.8022, + "step": 133060 + }, + { + "epoch": 0.5376196382470699, + "grad_norm": 2111.785888671875, + "learning_rate": 2.1236300679140953e-05, + "loss": 121.285, + "step": 133070 + }, + { + "epoch": 0.5376600395124376, + "grad_norm": 821.30615234375, + "learning_rate": 2.1233513480661943e-05, + "loss": 80.936, + "step": 133080 + }, + { + "epoch": 0.5377004407778052, + "grad_norm": 652.485107421875, + "learning_rate": 2.1230726258134945e-05, + "loss": 87.8591, + "step": 133090 + }, + { + "epoch": 0.5377408420431727, + "grad_norm": 643.4332275390625, + "learning_rate": 2.1227939011614313e-05, + "loss": 68.8878, + "step": 133100 + }, + { + "epoch": 0.5377812433085404, + "grad_norm": 476.5476379394531, + "learning_rate": 2.122515174115437e-05, + "loss": 87.2, + "step": 133110 + }, + { + "epoch": 0.537821644573908, + "grad_norm": 837.8448486328125, + "learning_rate": 2.1222364446809473e-05, + "loss": 109.229, + "step": 133120 + }, + { + "epoch": 0.5378620458392757, + "grad_norm": 392.1357116699219, + "learning_rate": 2.1219577128633943e-05, + "loss": 60.4203, + "step": 133130 + }, + { + "epoch": 0.5379024471046433, + "grad_norm": 572.6224365234375, + "learning_rate": 2.1216789786682134e-05, + "loss": 83.3304, + "step": 133140 + }, + { + "epoch": 0.5379428483700109, + "grad_norm": 1218.8992919921875, + "learning_rate": 2.121400242100838e-05, + "loss": 64.4339, + "step": 133150 + }, + { + "epoch": 0.5379832496353786, + "grad_norm": 1469.4859619140625, + "learning_rate": 2.1211215031667027e-05, + "loss": 73.6719, + "step": 133160 + }, + { + "epoch": 0.5380236509007462, + "grad_norm": 400.685546875, + "learning_rate": 2.120842761871241e-05, + "loss": 60.2101, + "step": 133170 + }, + { + "epoch": 0.5380640521661139, + "grad_norm": 880.283447265625, + "learning_rate": 2.1205640182198876e-05, + "loss": 66.1812, + "step": 133180 + }, + { + "epoch": 0.5381044534314815, + "grad_norm": 1685.418212890625, + "learning_rate": 2.1202852722180757e-05, + "loss": 59.0287, + "step": 133190 + }, + { + "epoch": 0.5381448546968491, + "grad_norm": 974.5607299804688, + "learning_rate": 2.1200065238712416e-05, + "loss": 87.0326, + "step": 133200 + }, + { + "epoch": 0.5381852559622168, + "grad_norm": 1013.3322143554688, + "learning_rate": 2.1197277731848184e-05, + "loss": 84.3924, + "step": 133210 + }, + { + "epoch": 0.5382256572275844, + "grad_norm": 544.249755859375, + "learning_rate": 2.11944902016424e-05, + "loss": 56.9755, + "step": 133220 + }, + { + "epoch": 0.538266058492952, + "grad_norm": 484.43524169921875, + "learning_rate": 2.1191702648149423e-05, + "loss": 40.5162, + "step": 133230 + }, + { + "epoch": 0.5383064597583196, + "grad_norm": 675.5714111328125, + "learning_rate": 2.118891507142359e-05, + "loss": 73.6148, + "step": 133240 + }, + { + "epoch": 0.5383468610236872, + "grad_norm": 601.24755859375, + "learning_rate": 2.1186127471519238e-05, + "loss": 84.3455, + "step": 133250 + }, + { + "epoch": 0.5383872622890549, + "grad_norm": 1747.7109375, + "learning_rate": 2.118333984849072e-05, + "loss": 71.7988, + "step": 133260 + }, + { + "epoch": 0.5384276635544225, + "grad_norm": 811.1027221679688, + "learning_rate": 2.1180552202392387e-05, + "loss": 60.8827, + "step": 133270 + }, + { + "epoch": 0.5384680648197901, + "grad_norm": 665.8331909179688, + "learning_rate": 2.1177764533278578e-05, + "loss": 54.8076, + "step": 133280 + }, + { + "epoch": 0.5385084660851578, + "grad_norm": 1030.3944091796875, + "learning_rate": 2.1174976841203644e-05, + "loss": 103.0432, + "step": 133290 + }, + { + "epoch": 0.5385488673505254, + "grad_norm": 816.7426147460938, + "learning_rate": 2.117218912622193e-05, + "loss": 94.6841, + "step": 133300 + }, + { + "epoch": 0.5385892686158931, + "grad_norm": 817.4564819335938, + "learning_rate": 2.1169401388387795e-05, + "loss": 76.9917, + "step": 133310 + }, + { + "epoch": 0.5386296698812607, + "grad_norm": 800.2697143554688, + "learning_rate": 2.116661362775557e-05, + "loss": 65.4954, + "step": 133320 + }, + { + "epoch": 0.5386700711466283, + "grad_norm": 996.630859375, + "learning_rate": 2.116382584437961e-05, + "loss": 82.4859, + "step": 133330 + }, + { + "epoch": 0.538710472411996, + "grad_norm": 1115.2091064453125, + "learning_rate": 2.116103803831427e-05, + "loss": 106.9963, + "step": 133340 + }, + { + "epoch": 0.5387508736773636, + "grad_norm": 244.52052307128906, + "learning_rate": 2.115825020961389e-05, + "loss": 51.4016, + "step": 133350 + }, + { + "epoch": 0.5387912749427312, + "grad_norm": 748.3160400390625, + "learning_rate": 2.115546235833283e-05, + "loss": 65.6281, + "step": 133360 + }, + { + "epoch": 0.5388316762080988, + "grad_norm": 767.2844848632812, + "learning_rate": 2.1152674484525435e-05, + "loss": 109.3059, + "step": 133370 + }, + { + "epoch": 0.5388720774734664, + "grad_norm": 690.7229614257812, + "learning_rate": 2.114988658824606e-05, + "loss": 62.196, + "step": 133380 + }, + { + "epoch": 0.5389124787388341, + "grad_norm": 505.8988952636719, + "learning_rate": 2.114709866954905e-05, + "loss": 62.7189, + "step": 133390 + }, + { + "epoch": 0.5389528800042017, + "grad_norm": 220.22398376464844, + "learning_rate": 2.1144310728488766e-05, + "loss": 40.4032, + "step": 133400 + }, + { + "epoch": 0.5389932812695694, + "grad_norm": 4520.24951171875, + "learning_rate": 2.1141522765119556e-05, + "loss": 92.0016, + "step": 133410 + }, + { + "epoch": 0.539033682534937, + "grad_norm": 119.63796997070312, + "learning_rate": 2.1138734779495766e-05, + "loss": 44.4233, + "step": 133420 + }, + { + "epoch": 0.5390740838003046, + "grad_norm": 2672.920654296875, + "learning_rate": 2.113594677167176e-05, + "loss": 131.3466, + "step": 133430 + }, + { + "epoch": 0.5391144850656723, + "grad_norm": 348.34503173828125, + "learning_rate": 2.113315874170188e-05, + "loss": 78.8663, + "step": 133440 + }, + { + "epoch": 0.5391548863310399, + "grad_norm": 368.7242431640625, + "learning_rate": 2.1130370689640495e-05, + "loss": 59.9358, + "step": 133450 + }, + { + "epoch": 0.5391952875964076, + "grad_norm": 363.8022155761719, + "learning_rate": 2.1127582615541944e-05, + "loss": 108.3846, + "step": 133460 + }, + { + "epoch": 0.5392356888617752, + "grad_norm": 526.435791015625, + "learning_rate": 2.1124794519460595e-05, + "loss": 81.3659, + "step": 133470 + }, + { + "epoch": 0.5392760901271428, + "grad_norm": 1277.24853515625, + "learning_rate": 2.11220064014508e-05, + "loss": 83.4911, + "step": 133480 + }, + { + "epoch": 0.5393164913925104, + "grad_norm": 401.4178466796875, + "learning_rate": 2.111921826156691e-05, + "loss": 61.9173, + "step": 133490 + }, + { + "epoch": 0.539356892657878, + "grad_norm": 803.8014526367188, + "learning_rate": 2.111643009986328e-05, + "loss": 68.7154, + "step": 133500 + }, + { + "epoch": 0.5393972939232456, + "grad_norm": 766.3026733398438, + "learning_rate": 2.1113641916394272e-05, + "loss": 137.1897, + "step": 133510 + }, + { + "epoch": 0.5394376951886133, + "grad_norm": 665.9508056640625, + "learning_rate": 2.111085371121424e-05, + "loss": 86.9268, + "step": 133520 + }, + { + "epoch": 0.5394780964539809, + "grad_norm": 740.249267578125, + "learning_rate": 2.1108065484377552e-05, + "loss": 67.5212, + "step": 133530 + }, + { + "epoch": 0.5395184977193486, + "grad_norm": 425.18408203125, + "learning_rate": 2.1105277235938547e-05, + "loss": 117.4206, + "step": 133540 + }, + { + "epoch": 0.5395588989847162, + "grad_norm": 588.6842041015625, + "learning_rate": 2.1102488965951596e-05, + "loss": 74.7824, + "step": 133550 + }, + { + "epoch": 0.5395993002500838, + "grad_norm": 2359.981689453125, + "learning_rate": 2.1099700674471052e-05, + "loss": 92.4056, + "step": 133560 + }, + { + "epoch": 0.5396397015154515, + "grad_norm": 768.6509399414062, + "learning_rate": 2.1096912361551284e-05, + "loss": 90.0393, + "step": 133570 + }, + { + "epoch": 0.5396801027808191, + "grad_norm": 1320.6839599609375, + "learning_rate": 2.1094124027246642e-05, + "loss": 105.6083, + "step": 133580 + }, + { + "epoch": 0.5397205040461868, + "grad_norm": 702.5809936523438, + "learning_rate": 2.1091335671611487e-05, + "loss": 81.7022, + "step": 133590 + }, + { + "epoch": 0.5397609053115544, + "grad_norm": 1370.920654296875, + "learning_rate": 2.1088547294700182e-05, + "loss": 85.3496, + "step": 133600 + }, + { + "epoch": 0.5398013065769219, + "grad_norm": 771.2918090820312, + "learning_rate": 2.1085758896567094e-05, + "loss": 55.8644, + "step": 133610 + }, + { + "epoch": 0.5398417078422896, + "grad_norm": 767.2468872070312, + "learning_rate": 2.1082970477266568e-05, + "loss": 85.8624, + "step": 133620 + }, + { + "epoch": 0.5398821091076572, + "grad_norm": 497.5660705566406, + "learning_rate": 2.1080182036852976e-05, + "loss": 86.4075, + "step": 133630 + }, + { + "epoch": 0.5399225103730249, + "grad_norm": 633.65087890625, + "learning_rate": 2.1077393575380682e-05, + "loss": 60.8948, + "step": 133640 + }, + { + "epoch": 0.5399629116383925, + "grad_norm": 546.7970581054688, + "learning_rate": 2.1074605092904044e-05, + "loss": 65.6224, + "step": 133650 + }, + { + "epoch": 0.5400033129037601, + "grad_norm": 374.5375061035156, + "learning_rate": 2.1071816589477436e-05, + "loss": 93.0352, + "step": 133660 + }, + { + "epoch": 0.5400437141691278, + "grad_norm": 853.1981201171875, + "learning_rate": 2.1069028065155198e-05, + "loss": 118.1653, + "step": 133670 + }, + { + "epoch": 0.5400841154344954, + "grad_norm": 1009.9605102539062, + "learning_rate": 2.1066239519991712e-05, + "loss": 82.5146, + "step": 133680 + }, + { + "epoch": 0.540124516699863, + "grad_norm": 496.1924743652344, + "learning_rate": 2.106345095404134e-05, + "loss": 75.004, + "step": 133690 + }, + { + "epoch": 0.5401649179652307, + "grad_norm": 812.1956176757812, + "learning_rate": 2.106066236735844e-05, + "loss": 73.1044, + "step": 133700 + }, + { + "epoch": 0.5402053192305983, + "grad_norm": 645.0123291015625, + "learning_rate": 2.105787375999738e-05, + "loss": 63.0161, + "step": 133710 + }, + { + "epoch": 0.540245720495966, + "grad_norm": 1160.9556884765625, + "learning_rate": 2.105508513201253e-05, + "loss": 90.8676, + "step": 133720 + }, + { + "epoch": 0.5402861217613336, + "grad_norm": 607.78125, + "learning_rate": 2.1052296483458255e-05, + "loss": 64.3178, + "step": 133730 + }, + { + "epoch": 0.5403265230267011, + "grad_norm": 779.45556640625, + "learning_rate": 2.1049507814388912e-05, + "loss": 55.7248, + "step": 133740 + }, + { + "epoch": 0.5403669242920688, + "grad_norm": 1157.5513916015625, + "learning_rate": 2.1046719124858882e-05, + "loss": 66.7, + "step": 133750 + }, + { + "epoch": 0.5404073255574364, + "grad_norm": 691.0505981445312, + "learning_rate": 2.1043930414922517e-05, + "loss": 81.5556, + "step": 133760 + }, + { + "epoch": 0.5404477268228041, + "grad_norm": 983.38134765625, + "learning_rate": 2.1041141684634193e-05, + "loss": 76.8668, + "step": 133770 + }, + { + "epoch": 0.5404881280881717, + "grad_norm": 705.6129150390625, + "learning_rate": 2.103835293404828e-05, + "loss": 57.1581, + "step": 133780 + }, + { + "epoch": 0.5405285293535393, + "grad_norm": 648.5277709960938, + "learning_rate": 2.1035564163219133e-05, + "loss": 80.1478, + "step": 133790 + }, + { + "epoch": 0.540568930618907, + "grad_norm": 348.1455078125, + "learning_rate": 2.1032775372201132e-05, + "loss": 117.6879, + "step": 133800 + }, + { + "epoch": 0.5406093318842746, + "grad_norm": 1351.91162109375, + "learning_rate": 2.1029986561048646e-05, + "loss": 98.9355, + "step": 133810 + }, + { + "epoch": 0.5406497331496423, + "grad_norm": 1495.1817626953125, + "learning_rate": 2.102719772981604e-05, + "loss": 62.9616, + "step": 133820 + }, + { + "epoch": 0.5406901344150099, + "grad_norm": 1146.8870849609375, + "learning_rate": 2.102440887855769e-05, + "loss": 79.62, + "step": 133830 + }, + { + "epoch": 0.5407305356803775, + "grad_norm": 1013.6943969726562, + "learning_rate": 2.102162000732796e-05, + "loss": 74.7107, + "step": 133840 + }, + { + "epoch": 0.5407709369457452, + "grad_norm": 746.4028930664062, + "learning_rate": 2.101883111618122e-05, + "loss": 57.3402, + "step": 133850 + }, + { + "epoch": 0.5408113382111128, + "grad_norm": 800.1146240234375, + "learning_rate": 2.1016042205171842e-05, + "loss": 74.1301, + "step": 133860 + }, + { + "epoch": 0.5408517394764804, + "grad_norm": 763.4237060546875, + "learning_rate": 2.10132532743542e-05, + "loss": 49.5388, + "step": 133870 + }, + { + "epoch": 0.540892140741848, + "grad_norm": 662.3217163085938, + "learning_rate": 2.101046432378266e-05, + "loss": 54.4715, + "step": 133880 + }, + { + "epoch": 0.5409325420072156, + "grad_norm": 531.9530639648438, + "learning_rate": 2.10076753535116e-05, + "loss": 83.7345, + "step": 133890 + }, + { + "epoch": 0.5409729432725833, + "grad_norm": 426.0985107421875, + "learning_rate": 2.1004886363595392e-05, + "loss": 71.8224, + "step": 133900 + }, + { + "epoch": 0.5410133445379509, + "grad_norm": 416.53387451171875, + "learning_rate": 2.1002097354088407e-05, + "loss": 62.0419, + "step": 133910 + }, + { + "epoch": 0.5410537458033186, + "grad_norm": 562.1279907226562, + "learning_rate": 2.099930832504502e-05, + "loss": 46.3857, + "step": 133920 + }, + { + "epoch": 0.5410941470686862, + "grad_norm": 694.3231201171875, + "learning_rate": 2.0996519276519594e-05, + "loss": 68.7931, + "step": 133930 + }, + { + "epoch": 0.5411345483340538, + "grad_norm": 692.349853515625, + "learning_rate": 2.0993730208566526e-05, + "loss": 49.6247, + "step": 133940 + }, + { + "epoch": 0.5411749495994215, + "grad_norm": 1105.6856689453125, + "learning_rate": 2.0990941121240164e-05, + "loss": 75.3943, + "step": 133950 + }, + { + "epoch": 0.5412153508647891, + "grad_norm": 775.1527709960938, + "learning_rate": 2.0988152014594898e-05, + "loss": 64.1878, + "step": 133960 + }, + { + "epoch": 0.5412557521301568, + "grad_norm": 615.8643798828125, + "learning_rate": 2.0985362888685098e-05, + "loss": 67.3897, + "step": 133970 + }, + { + "epoch": 0.5412961533955244, + "grad_norm": 651.5286865234375, + "learning_rate": 2.0982573743565146e-05, + "loss": 75.001, + "step": 133980 + }, + { + "epoch": 0.541336554660892, + "grad_norm": 326.02459716796875, + "learning_rate": 2.0979784579289405e-05, + "loss": 48.4562, + "step": 133990 + }, + { + "epoch": 0.5413769559262596, + "grad_norm": 772.4134521484375, + "learning_rate": 2.097699539591227e-05, + "loss": 58.2929, + "step": 134000 + }, + { + "epoch": 0.5414173571916272, + "grad_norm": 1020.9420166015625, + "learning_rate": 2.0974206193488098e-05, + "loss": 71.4519, + "step": 134010 + }, + { + "epoch": 0.5414577584569948, + "grad_norm": 419.01300048828125, + "learning_rate": 2.0971416972071284e-05, + "loss": 69.4528, + "step": 134020 + }, + { + "epoch": 0.5414981597223625, + "grad_norm": 313.4629821777344, + "learning_rate": 2.0968627731716186e-05, + "loss": 29.8431, + "step": 134030 + }, + { + "epoch": 0.5415385609877301, + "grad_norm": 1089.7841796875, + "learning_rate": 2.0965838472477196e-05, + "loss": 78.026, + "step": 134040 + }, + { + "epoch": 0.5415789622530978, + "grad_norm": 1091.8165283203125, + "learning_rate": 2.0963049194408684e-05, + "loss": 64.4741, + "step": 134050 + }, + { + "epoch": 0.5416193635184654, + "grad_norm": 468.828369140625, + "learning_rate": 2.0960259897565037e-05, + "loss": 93.1563, + "step": 134060 + }, + { + "epoch": 0.541659764783833, + "grad_norm": 1129.590087890625, + "learning_rate": 2.0957470582000626e-05, + "loss": 85.9222, + "step": 134070 + }, + { + "epoch": 0.5417001660492007, + "grad_norm": 644.4100341796875, + "learning_rate": 2.0954681247769835e-05, + "loss": 88.1089, + "step": 134080 + }, + { + "epoch": 0.5417405673145683, + "grad_norm": 389.9478759765625, + "learning_rate": 2.0951891894927043e-05, + "loss": 85.7799, + "step": 134090 + }, + { + "epoch": 0.541780968579936, + "grad_norm": 304.0990295410156, + "learning_rate": 2.094910252352663e-05, + "loss": 130.4222, + "step": 134100 + }, + { + "epoch": 0.5418213698453036, + "grad_norm": 692.5805053710938, + "learning_rate": 2.094631313362297e-05, + "loss": 84.9188, + "step": 134110 + }, + { + "epoch": 0.5418617711106711, + "grad_norm": 796.6018676757812, + "learning_rate": 2.0943523725270446e-05, + "loss": 91.7787, + "step": 134120 + }, + { + "epoch": 0.5419021723760388, + "grad_norm": 1004.4500122070312, + "learning_rate": 2.0940734298523444e-05, + "loss": 90.3217, + "step": 134130 + }, + { + "epoch": 0.5419425736414064, + "grad_norm": 456.35235595703125, + "learning_rate": 2.0937944853436342e-05, + "loss": 74.3718, + "step": 134140 + }, + { + "epoch": 0.541982974906774, + "grad_norm": 545.5726928710938, + "learning_rate": 2.0935155390063527e-05, + "loss": 67.0441, + "step": 134150 + }, + { + "epoch": 0.5420233761721417, + "grad_norm": 831.8565063476562, + "learning_rate": 2.0932365908459367e-05, + "loss": 100.1182, + "step": 134160 + }, + { + "epoch": 0.5420637774375093, + "grad_norm": 547.4397583007812, + "learning_rate": 2.0929576408678264e-05, + "loss": 98.2462, + "step": 134170 + }, + { + "epoch": 0.542104178702877, + "grad_norm": 707.8832397460938, + "learning_rate": 2.0926786890774582e-05, + "loss": 59.9948, + "step": 134180 + }, + { + "epoch": 0.5421445799682446, + "grad_norm": 430.6379699707031, + "learning_rate": 2.0923997354802718e-05, + "loss": 61.7761, + "step": 134190 + }, + { + "epoch": 0.5421849812336123, + "grad_norm": 742.9754638671875, + "learning_rate": 2.0921207800817045e-05, + "loss": 66.1969, + "step": 134200 + }, + { + "epoch": 0.5422253824989799, + "grad_norm": 372.631591796875, + "learning_rate": 2.0918418228871953e-05, + "loss": 63.406, + "step": 134210 + }, + { + "epoch": 0.5422657837643475, + "grad_norm": 904.8087768554688, + "learning_rate": 2.091562863902183e-05, + "loss": 83.6552, + "step": 134220 + }, + { + "epoch": 0.5423061850297152, + "grad_norm": 741.4019165039062, + "learning_rate": 2.091283903132105e-05, + "loss": 65.4828, + "step": 134230 + }, + { + "epoch": 0.5423465862950828, + "grad_norm": 1009.4669799804688, + "learning_rate": 2.0910049405824004e-05, + "loss": 57.45, + "step": 134240 + }, + { + "epoch": 0.5423869875604503, + "grad_norm": 821.2303466796875, + "learning_rate": 2.0907259762585074e-05, + "loss": 81.0688, + "step": 134250 + }, + { + "epoch": 0.542427388825818, + "grad_norm": 843.075927734375, + "learning_rate": 2.0904470101658652e-05, + "loss": 84.8291, + "step": 134260 + }, + { + "epoch": 0.5424677900911856, + "grad_norm": 626.9430541992188, + "learning_rate": 2.0901680423099126e-05, + "loss": 55.9671, + "step": 134270 + }, + { + "epoch": 0.5425081913565533, + "grad_norm": 769.5240478515625, + "learning_rate": 2.0898890726960867e-05, + "loss": 85.0358, + "step": 134280 + }, + { + "epoch": 0.5425485926219209, + "grad_norm": 710.1192626953125, + "learning_rate": 2.089610101329827e-05, + "loss": 59.0625, + "step": 134290 + }, + { + "epoch": 0.5425889938872885, + "grad_norm": 1006.579345703125, + "learning_rate": 2.089331128216573e-05, + "loss": 48.3036, + "step": 134300 + }, + { + "epoch": 0.5426293951526562, + "grad_norm": 248.5617218017578, + "learning_rate": 2.0890521533617614e-05, + "loss": 48.5117, + "step": 134310 + }, + { + "epoch": 0.5426697964180238, + "grad_norm": 881.7984008789062, + "learning_rate": 2.088773176770833e-05, + "loss": 76.0495, + "step": 134320 + }, + { + "epoch": 0.5427101976833915, + "grad_norm": 649.823486328125, + "learning_rate": 2.0884941984492255e-05, + "loss": 86.2261, + "step": 134330 + }, + { + "epoch": 0.5427505989487591, + "grad_norm": 571.3045043945312, + "learning_rate": 2.0882152184023786e-05, + "loss": 49.2459, + "step": 134340 + }, + { + "epoch": 0.5427910002141267, + "grad_norm": 620.4608764648438, + "learning_rate": 2.08793623663573e-05, + "loss": 53.8049, + "step": 134350 + }, + { + "epoch": 0.5428314014794944, + "grad_norm": 1195.08251953125, + "learning_rate": 2.0876572531547197e-05, + "loss": 82.6899, + "step": 134360 + }, + { + "epoch": 0.542871802744862, + "grad_norm": 543.7266845703125, + "learning_rate": 2.0873782679647857e-05, + "loss": 57.6255, + "step": 134370 + }, + { + "epoch": 0.5429122040102295, + "grad_norm": 658.723388671875, + "learning_rate": 2.087099281071368e-05, + "loss": 82.4216, + "step": 134380 + }, + { + "epoch": 0.5429526052755972, + "grad_norm": 664.275146484375, + "learning_rate": 2.0868202924799042e-05, + "loss": 63.9077, + "step": 134390 + }, + { + "epoch": 0.5429930065409648, + "grad_norm": 278.2515869140625, + "learning_rate": 2.0865413021958344e-05, + "loss": 69.19, + "step": 134400 + }, + { + "epoch": 0.5430334078063325, + "grad_norm": 756.2687377929688, + "learning_rate": 2.0862623102245973e-05, + "loss": 62.3357, + "step": 134410 + }, + { + "epoch": 0.5430738090717001, + "grad_norm": 769.6613159179688, + "learning_rate": 2.0859833165716318e-05, + "loss": 69.3291, + "step": 134420 + }, + { + "epoch": 0.5431142103370677, + "grad_norm": 318.6669006347656, + "learning_rate": 2.085704321242378e-05, + "loss": 93.5344, + "step": 134430 + }, + { + "epoch": 0.5431546116024354, + "grad_norm": 704.4751586914062, + "learning_rate": 2.0854253242422743e-05, + "loss": 56.1492, + "step": 134440 + }, + { + "epoch": 0.543195012867803, + "grad_norm": 1058.3157958984375, + "learning_rate": 2.0851463255767596e-05, + "loss": 81.6392, + "step": 134450 + }, + { + "epoch": 0.5432354141331707, + "grad_norm": 385.5838317871094, + "learning_rate": 2.084867325251273e-05, + "loss": 54.2564, + "step": 134460 + }, + { + "epoch": 0.5432758153985383, + "grad_norm": 1015.0359497070312, + "learning_rate": 2.0845883232712548e-05, + "loss": 73.1264, + "step": 134470 + }, + { + "epoch": 0.543316216663906, + "grad_norm": 548.817626953125, + "learning_rate": 2.0843093196421433e-05, + "loss": 48.9213, + "step": 134480 + }, + { + "epoch": 0.5433566179292736, + "grad_norm": 310.2113342285156, + "learning_rate": 2.0840303143693785e-05, + "loss": 64.3279, + "step": 134490 + }, + { + "epoch": 0.5433970191946412, + "grad_norm": 521.5067749023438, + "learning_rate": 2.0837513074583993e-05, + "loss": 112.1796, + "step": 134500 + }, + { + "epoch": 0.5434374204600088, + "grad_norm": 762.0655517578125, + "learning_rate": 2.0834722989146457e-05, + "loss": 70.7463, + "step": 134510 + }, + { + "epoch": 0.5434778217253764, + "grad_norm": 1150.6900634765625, + "learning_rate": 2.083193288743556e-05, + "loss": 78.6468, + "step": 134520 + }, + { + "epoch": 0.543518222990744, + "grad_norm": 479.1996154785156, + "learning_rate": 2.0829142769505707e-05, + "loss": 67.8264, + "step": 134530 + }, + { + "epoch": 0.5435586242561117, + "grad_norm": 370.4224853515625, + "learning_rate": 2.082635263541129e-05, + "loss": 71.1443, + "step": 134540 + }, + { + "epoch": 0.5435990255214793, + "grad_norm": 508.53558349609375, + "learning_rate": 2.0823562485206704e-05, + "loss": 42.115, + "step": 134550 + }, + { + "epoch": 0.543639426786847, + "grad_norm": 660.82080078125, + "learning_rate": 2.0820772318946335e-05, + "loss": 51.4152, + "step": 134560 + }, + { + "epoch": 0.5436798280522146, + "grad_norm": 854.4629516601562, + "learning_rate": 2.0817982136684592e-05, + "loss": 83.4693, + "step": 134570 + }, + { + "epoch": 0.5437202293175822, + "grad_norm": 764.4788818359375, + "learning_rate": 2.0815191938475862e-05, + "loss": 72.191, + "step": 134580 + }, + { + "epoch": 0.5437606305829499, + "grad_norm": 1158.4273681640625, + "learning_rate": 2.0812401724374554e-05, + "loss": 76.3404, + "step": 134590 + }, + { + "epoch": 0.5438010318483175, + "grad_norm": 607.3169555664062, + "learning_rate": 2.080961149443505e-05, + "loss": 111.8637, + "step": 134600 + }, + { + "epoch": 0.5438414331136852, + "grad_norm": 1117.007080078125, + "learning_rate": 2.0806821248711756e-05, + "loss": 86.4088, + "step": 134610 + }, + { + "epoch": 0.5438818343790528, + "grad_norm": 697.7039794921875, + "learning_rate": 2.0804030987259067e-05, + "loss": 69.141, + "step": 134620 + }, + { + "epoch": 0.5439222356444204, + "grad_norm": 558.0200805664062, + "learning_rate": 2.0801240710131382e-05, + "loss": 56.7939, + "step": 134630 + }, + { + "epoch": 0.543962636909788, + "grad_norm": 667.4652099609375, + "learning_rate": 2.0798450417383092e-05, + "loss": 63.4903, + "step": 134640 + }, + { + "epoch": 0.5440030381751556, + "grad_norm": 1750.8870849609375, + "learning_rate": 2.07956601090686e-05, + "loss": 84.324, + "step": 134650 + }, + { + "epoch": 0.5440434394405232, + "grad_norm": 493.9021911621094, + "learning_rate": 2.0792869785242304e-05, + "loss": 46.7915, + "step": 134660 + }, + { + "epoch": 0.5440838407058909, + "grad_norm": 576.186279296875, + "learning_rate": 2.0790079445958607e-05, + "loss": 80.8611, + "step": 134670 + }, + { + "epoch": 0.5441242419712585, + "grad_norm": 1491.5946044921875, + "learning_rate": 2.07872890912719e-05, + "loss": 120.4975, + "step": 134680 + }, + { + "epoch": 0.5441646432366262, + "grad_norm": 587.732177734375, + "learning_rate": 2.0784498721236593e-05, + "loss": 88.975, + "step": 134690 + }, + { + "epoch": 0.5442050445019938, + "grad_norm": 492.6392517089844, + "learning_rate": 2.0781708335907073e-05, + "loss": 84.0454, + "step": 134700 + }, + { + "epoch": 0.5442454457673614, + "grad_norm": 906.8009643554688, + "learning_rate": 2.0778917935337753e-05, + "loss": 99.8707, + "step": 134710 + }, + { + "epoch": 0.5442858470327291, + "grad_norm": 1117.7572021484375, + "learning_rate": 2.0776127519583023e-05, + "loss": 91.7259, + "step": 134720 + }, + { + "epoch": 0.5443262482980967, + "grad_norm": 886.3023071289062, + "learning_rate": 2.0773337088697283e-05, + "loss": 82.6375, + "step": 134730 + }, + { + "epoch": 0.5443666495634644, + "grad_norm": 307.8722839355469, + "learning_rate": 2.0770546642734946e-05, + "loss": 74.1856, + "step": 134740 + }, + { + "epoch": 0.544407050828832, + "grad_norm": 479.5414733886719, + "learning_rate": 2.07677561817504e-05, + "loss": 82.0898, + "step": 134750 + }, + { + "epoch": 0.5444474520941995, + "grad_norm": 577.2052612304688, + "learning_rate": 2.0764965705798053e-05, + "loss": 78.6139, + "step": 134760 + }, + { + "epoch": 0.5444878533595672, + "grad_norm": 602.6316528320312, + "learning_rate": 2.0762175214932307e-05, + "loss": 129.516, + "step": 134770 + }, + { + "epoch": 0.5445282546249348, + "grad_norm": 803.7373657226562, + "learning_rate": 2.0759384709207567e-05, + "loss": 84.9438, + "step": 134780 + }, + { + "epoch": 0.5445686558903025, + "grad_norm": 1348.444580078125, + "learning_rate": 2.0756594188678233e-05, + "loss": 77.8075, + "step": 134790 + }, + { + "epoch": 0.5446090571556701, + "grad_norm": 981.3920288085938, + "learning_rate": 2.0753803653398697e-05, + "loss": 102.9448, + "step": 134800 + }, + { + "epoch": 0.5446494584210377, + "grad_norm": 715.5809326171875, + "learning_rate": 2.075101310342337e-05, + "loss": 86.7292, + "step": 134810 + }, + { + "epoch": 0.5446898596864054, + "grad_norm": 979.0699462890625, + "learning_rate": 2.0748222538806656e-05, + "loss": 96.1142, + "step": 134820 + }, + { + "epoch": 0.544730260951773, + "grad_norm": 485.0465393066406, + "learning_rate": 2.074543195960297e-05, + "loss": 56.3571, + "step": 134830 + }, + { + "epoch": 0.5447706622171407, + "grad_norm": 625.8384399414062, + "learning_rate": 2.0742641365866692e-05, + "loss": 54.1729, + "step": 134840 + }, + { + "epoch": 0.5448110634825083, + "grad_norm": 638.9879760742188, + "learning_rate": 2.073985075765224e-05, + "loss": 39.9463, + "step": 134850 + }, + { + "epoch": 0.5448514647478759, + "grad_norm": 575.7022094726562, + "learning_rate": 2.0737060135014025e-05, + "loss": 89.072, + "step": 134860 + }, + { + "epoch": 0.5448918660132436, + "grad_norm": 578.9482421875, + "learning_rate": 2.0734269498006432e-05, + "loss": 84.6766, + "step": 134870 + }, + { + "epoch": 0.5449322672786112, + "grad_norm": 538.7471313476562, + "learning_rate": 2.0731478846683884e-05, + "loss": 52.3695, + "step": 134880 + }, + { + "epoch": 0.5449726685439787, + "grad_norm": 837.3687744140625, + "learning_rate": 2.0728688181100778e-05, + "loss": 98.174, + "step": 134890 + }, + { + "epoch": 0.5450130698093464, + "grad_norm": 758.5521850585938, + "learning_rate": 2.072589750131152e-05, + "loss": 75.0622, + "step": 134900 + }, + { + "epoch": 0.545053471074714, + "grad_norm": 686.6367797851562, + "learning_rate": 2.0723106807370516e-05, + "loss": 35.3552, + "step": 134910 + }, + { + "epoch": 0.5450938723400817, + "grad_norm": 1005.4103393554688, + "learning_rate": 2.0720316099332173e-05, + "loss": 65.8011, + "step": 134920 + }, + { + "epoch": 0.5451342736054493, + "grad_norm": 1383.9107666015625, + "learning_rate": 2.0717525377250895e-05, + "loss": 78.6861, + "step": 134930 + }, + { + "epoch": 0.5451746748708169, + "grad_norm": 496.865478515625, + "learning_rate": 2.071473464118109e-05, + "loss": 96.3676, + "step": 134940 + }, + { + "epoch": 0.5452150761361846, + "grad_norm": 632.6151123046875, + "learning_rate": 2.0711943891177172e-05, + "loss": 63.7085, + "step": 134950 + }, + { + "epoch": 0.5452554774015522, + "grad_norm": 1596.8426513671875, + "learning_rate": 2.070915312729354e-05, + "loss": 101.9239, + "step": 134960 + }, + { + "epoch": 0.5452958786669199, + "grad_norm": 805.793701171875, + "learning_rate": 2.07063623495846e-05, + "loss": 116.755, + "step": 134970 + }, + { + "epoch": 0.5453362799322875, + "grad_norm": 562.1574096679688, + "learning_rate": 2.0703571558104757e-05, + "loss": 52.804, + "step": 134980 + }, + { + "epoch": 0.5453766811976551, + "grad_norm": 642.0929565429688, + "learning_rate": 2.0700780752908432e-05, + "loss": 79.4218, + "step": 134990 + }, + { + "epoch": 0.5454170824630228, + "grad_norm": 550.6673583984375, + "learning_rate": 2.0697989934050025e-05, + "loss": 62.6429, + "step": 135000 + }, + { + "epoch": 0.5454574837283904, + "grad_norm": 786.7946166992188, + "learning_rate": 2.0695199101583938e-05, + "loss": 103.928, + "step": 135010 + }, + { + "epoch": 0.545497884993758, + "grad_norm": 1141.880126953125, + "learning_rate": 2.0692408255564593e-05, + "loss": 103.6542, + "step": 135020 + }, + { + "epoch": 0.5455382862591256, + "grad_norm": 632.7206420898438, + "learning_rate": 2.0689617396046395e-05, + "loss": 84.6242, + "step": 135030 + }, + { + "epoch": 0.5455786875244932, + "grad_norm": 594.935546875, + "learning_rate": 2.068682652308374e-05, + "loss": 71.4828, + "step": 135040 + }, + { + "epoch": 0.5456190887898609, + "grad_norm": 735.9957275390625, + "learning_rate": 2.068403563673106e-05, + "loss": 72.6457, + "step": 135050 + }, + { + "epoch": 0.5456594900552285, + "grad_norm": 2172.44384765625, + "learning_rate": 2.0681244737042746e-05, + "loss": 77.7959, + "step": 135060 + }, + { + "epoch": 0.5456998913205962, + "grad_norm": 1474.780029296875, + "learning_rate": 2.0678453824073218e-05, + "loss": 66.5353, + "step": 135070 + }, + { + "epoch": 0.5457402925859638, + "grad_norm": 1291.4022216796875, + "learning_rate": 2.067566289787688e-05, + "loss": 93.3319, + "step": 135080 + }, + { + "epoch": 0.5457806938513314, + "grad_norm": 559.5963745117188, + "learning_rate": 2.067287195850815e-05, + "loss": 55.4858, + "step": 135090 + }, + { + "epoch": 0.5458210951166991, + "grad_norm": 249.5072784423828, + "learning_rate": 2.067008100602143e-05, + "loss": 48.3866, + "step": 135100 + }, + { + "epoch": 0.5458614963820667, + "grad_norm": 771.659912109375, + "learning_rate": 2.066729004047114e-05, + "loss": 88.0867, + "step": 135110 + }, + { + "epoch": 0.5459018976474344, + "grad_norm": 1236.453125, + "learning_rate": 2.066449906191169e-05, + "loss": 88.682, + "step": 135120 + }, + { + "epoch": 0.545942298912802, + "grad_norm": 733.9824829101562, + "learning_rate": 2.0661708070397485e-05, + "loss": 80.8835, + "step": 135130 + }, + { + "epoch": 0.5459827001781696, + "grad_norm": 904.1165161132812, + "learning_rate": 2.065891706598294e-05, + "loss": 148.4646, + "step": 135140 + }, + { + "epoch": 0.5460231014435372, + "grad_norm": 1660.830810546875, + "learning_rate": 2.0656126048722465e-05, + "loss": 145.4095, + "step": 135150 + }, + { + "epoch": 0.5460635027089048, + "grad_norm": 822.3232421875, + "learning_rate": 2.065333501867048e-05, + "loss": 69.1111, + "step": 135160 + }, + { + "epoch": 0.5461039039742724, + "grad_norm": 917.6844482421875, + "learning_rate": 2.065054397588139e-05, + "loss": 113.4229, + "step": 135170 + }, + { + "epoch": 0.5461443052396401, + "grad_norm": 725.8449096679688, + "learning_rate": 2.064775292040961e-05, + "loss": 63.3272, + "step": 135180 + }, + { + "epoch": 0.5461847065050077, + "grad_norm": 1212.4375, + "learning_rate": 2.064496185230955e-05, + "loss": 91.0021, + "step": 135190 + }, + { + "epoch": 0.5462251077703754, + "grad_norm": 740.646484375, + "learning_rate": 2.0642170771635636e-05, + "loss": 62.9773, + "step": 135200 + }, + { + "epoch": 0.546265509035743, + "grad_norm": 567.89306640625, + "learning_rate": 2.0639379678442264e-05, + "loss": 63.2857, + "step": 135210 + }, + { + "epoch": 0.5463059103011106, + "grad_norm": 681.7307739257812, + "learning_rate": 2.063658857278386e-05, + "loss": 91.6868, + "step": 135220 + }, + { + "epoch": 0.5463463115664783, + "grad_norm": 1040.7576904296875, + "learning_rate": 2.0633797454714832e-05, + "loss": 82.5468, + "step": 135230 + }, + { + "epoch": 0.5463867128318459, + "grad_norm": 765.203857421875, + "learning_rate": 2.06310063242896e-05, + "loss": 55.6379, + "step": 135240 + }, + { + "epoch": 0.5464271140972136, + "grad_norm": 522.9273681640625, + "learning_rate": 2.0628215181562567e-05, + "loss": 97.3638, + "step": 135250 + }, + { + "epoch": 0.5464675153625812, + "grad_norm": 314.6795654296875, + "learning_rate": 2.0625424026588164e-05, + "loss": 63.6087, + "step": 135260 + }, + { + "epoch": 0.5465079166279488, + "grad_norm": 579.9944458007812, + "learning_rate": 2.062263285942079e-05, + "loss": 55.428, + "step": 135270 + }, + { + "epoch": 0.5465483178933164, + "grad_norm": 863.68359375, + "learning_rate": 2.0619841680114874e-05, + "loss": 75.9747, + "step": 135280 + }, + { + "epoch": 0.546588719158684, + "grad_norm": 801.5532836914062, + "learning_rate": 2.061705048872482e-05, + "loss": 74.5285, + "step": 135290 + }, + { + "epoch": 0.5466291204240517, + "grad_norm": 501.6993713378906, + "learning_rate": 2.061425928530506e-05, + "loss": 89.9877, + "step": 135300 + }, + { + "epoch": 0.5466695216894193, + "grad_norm": 599.3051147460938, + "learning_rate": 2.0611468069909986e-05, + "loss": 58.2, + "step": 135310 + }, + { + "epoch": 0.5467099229547869, + "grad_norm": 638.8753051757812, + "learning_rate": 2.0608676842594036e-05, + "loss": 75.5444, + "step": 135320 + }, + { + "epoch": 0.5467503242201546, + "grad_norm": 775.8065795898438, + "learning_rate": 2.0605885603411612e-05, + "loss": 101.4624, + "step": 135330 + }, + { + "epoch": 0.5467907254855222, + "grad_norm": 792.2493896484375, + "learning_rate": 2.0603094352417137e-05, + "loss": 73.5163, + "step": 135340 + }, + { + "epoch": 0.5468311267508899, + "grad_norm": 1126.951171875, + "learning_rate": 2.0600303089665025e-05, + "loss": 70.9816, + "step": 135350 + }, + { + "epoch": 0.5468715280162575, + "grad_norm": 402.4384765625, + "learning_rate": 2.0597511815209703e-05, + "loss": 61.0648, + "step": 135360 + }, + { + "epoch": 0.5469119292816251, + "grad_norm": 525.196533203125, + "learning_rate": 2.0594720529105574e-05, + "loss": 40.3472, + "step": 135370 + }, + { + "epoch": 0.5469523305469928, + "grad_norm": 995.4124145507812, + "learning_rate": 2.0591929231407065e-05, + "loss": 94.4273, + "step": 135380 + }, + { + "epoch": 0.5469927318123604, + "grad_norm": 943.0812377929688, + "learning_rate": 2.0589137922168595e-05, + "loss": 95.573, + "step": 135390 + }, + { + "epoch": 0.5470331330777279, + "grad_norm": 365.83251953125, + "learning_rate": 2.0586346601444573e-05, + "loss": 98.0826, + "step": 135400 + }, + { + "epoch": 0.5470735343430956, + "grad_norm": 446.82818603515625, + "learning_rate": 2.058355526928942e-05, + "loss": 73.0327, + "step": 135410 + }, + { + "epoch": 0.5471139356084632, + "grad_norm": 388.7535705566406, + "learning_rate": 2.0580763925757558e-05, + "loss": 44.5363, + "step": 135420 + }, + { + "epoch": 0.5471543368738309, + "grad_norm": 1455.14501953125, + "learning_rate": 2.0577972570903403e-05, + "loss": 77.2512, + "step": 135430 + }, + { + "epoch": 0.5471947381391985, + "grad_norm": 1053.9644775390625, + "learning_rate": 2.0575181204781373e-05, + "loss": 61.1446, + "step": 135440 + }, + { + "epoch": 0.5472351394045661, + "grad_norm": 3395.21630859375, + "learning_rate": 2.0572389827445896e-05, + "loss": 125.1507, + "step": 135450 + }, + { + "epoch": 0.5472755406699338, + "grad_norm": 585.2877807617188, + "learning_rate": 2.0569598438951384e-05, + "loss": 82.0461, + "step": 135460 + }, + { + "epoch": 0.5473159419353014, + "grad_norm": 728.3803100585938, + "learning_rate": 2.056680703935226e-05, + "loss": 80.8527, + "step": 135470 + }, + { + "epoch": 0.5473563432006691, + "grad_norm": 326.429443359375, + "learning_rate": 2.0564015628702933e-05, + "loss": 74.7714, + "step": 135480 + }, + { + "epoch": 0.5473967444660367, + "grad_norm": 743.4617309570312, + "learning_rate": 2.0561224207057835e-05, + "loss": 91.7, + "step": 135490 + }, + { + "epoch": 0.5474371457314043, + "grad_norm": 582.888427734375, + "learning_rate": 2.0558432774471382e-05, + "loss": 80.3049, + "step": 135500 + }, + { + "epoch": 0.547477546996772, + "grad_norm": 988.9017333984375, + "learning_rate": 2.0555641330997995e-05, + "loss": 64.2773, + "step": 135510 + }, + { + "epoch": 0.5475179482621396, + "grad_norm": 504.8742370605469, + "learning_rate": 2.055284987669209e-05, + "loss": 87.8501, + "step": 135520 + }, + { + "epoch": 0.5475583495275071, + "grad_norm": 351.64373779296875, + "learning_rate": 2.0550058411608095e-05, + "loss": 45.8285, + "step": 135530 + }, + { + "epoch": 0.5475987507928748, + "grad_norm": 459.09423828125, + "learning_rate": 2.054726693580043e-05, + "loss": 79.246, + "step": 135540 + }, + { + "epoch": 0.5476391520582424, + "grad_norm": 1099.9534912109375, + "learning_rate": 2.0544475449323505e-05, + "loss": 99.9396, + "step": 135550 + }, + { + "epoch": 0.5476795533236101, + "grad_norm": 697.3394775390625, + "learning_rate": 2.0541683952231763e-05, + "loss": 57.5686, + "step": 135560 + }, + { + "epoch": 0.5477199545889777, + "grad_norm": 743.0536499023438, + "learning_rate": 2.0538892444579614e-05, + "loss": 65.4765, + "step": 135570 + }, + { + "epoch": 0.5477603558543453, + "grad_norm": 769.9522094726562, + "learning_rate": 2.0536100926421474e-05, + "loss": 63.6503, + "step": 135580 + }, + { + "epoch": 0.547800757119713, + "grad_norm": 459.262939453125, + "learning_rate": 2.0533309397811765e-05, + "loss": 69.5442, + "step": 135590 + }, + { + "epoch": 0.5478411583850806, + "grad_norm": 779.0827026367188, + "learning_rate": 2.053051785880492e-05, + "loss": 68.069, + "step": 135600 + }, + { + "epoch": 0.5478815596504483, + "grad_norm": 412.2923278808594, + "learning_rate": 2.052772630945536e-05, + "loss": 82.6345, + "step": 135610 + }, + { + "epoch": 0.5479219609158159, + "grad_norm": 1096.228271484375, + "learning_rate": 2.05249347498175e-05, + "loss": 75.1005, + "step": 135620 + }, + { + "epoch": 0.5479623621811835, + "grad_norm": 727.6666259765625, + "learning_rate": 2.0522143179945765e-05, + "loss": 85.3141, + "step": 135630 + }, + { + "epoch": 0.5480027634465512, + "grad_norm": 322.0718688964844, + "learning_rate": 2.0519351599894586e-05, + "loss": 72.7567, + "step": 135640 + }, + { + "epoch": 0.5480431647119188, + "grad_norm": 1061.6793212890625, + "learning_rate": 2.0516560009718378e-05, + "loss": 75.753, + "step": 135650 + }, + { + "epoch": 0.5480835659772864, + "grad_norm": 687.3995361328125, + "learning_rate": 2.0513768409471565e-05, + "loss": 101.9975, + "step": 135660 + }, + { + "epoch": 0.548123967242654, + "grad_norm": 1199.5616455078125, + "learning_rate": 2.051097679920857e-05, + "loss": 66.9585, + "step": 135670 + }, + { + "epoch": 0.5481643685080216, + "grad_norm": 591.5929565429688, + "learning_rate": 2.050818517898382e-05, + "loss": 70.8764, + "step": 135680 + }, + { + "epoch": 0.5482047697733893, + "grad_norm": 664.4053955078125, + "learning_rate": 2.050539354885174e-05, + "loss": 75.6904, + "step": 135690 + }, + { + "epoch": 0.5482451710387569, + "grad_norm": 842.0626831054688, + "learning_rate": 2.0502601908866754e-05, + "loss": 58.726, + "step": 135700 + }, + { + "epoch": 0.5482855723041246, + "grad_norm": 1287.5699462890625, + "learning_rate": 2.049981025908328e-05, + "loss": 82.7304, + "step": 135710 + }, + { + "epoch": 0.5483259735694922, + "grad_norm": 947.1760864257812, + "learning_rate": 2.049701859955575e-05, + "loss": 85.2842, + "step": 135720 + }, + { + "epoch": 0.5483663748348598, + "grad_norm": 411.21832275390625, + "learning_rate": 2.0494226930338592e-05, + "loss": 49.9027, + "step": 135730 + }, + { + "epoch": 0.5484067761002275, + "grad_norm": 905.7086181640625, + "learning_rate": 2.049143525148622e-05, + "loss": 119.6573, + "step": 135740 + }, + { + "epoch": 0.5484471773655951, + "grad_norm": 341.1328125, + "learning_rate": 2.0488643563053066e-05, + "loss": 59.4859, + "step": 135750 + }, + { + "epoch": 0.5484875786309628, + "grad_norm": 512.0372924804688, + "learning_rate": 2.0485851865093552e-05, + "loss": 53.1349, + "step": 135760 + }, + { + "epoch": 0.5485279798963304, + "grad_norm": 809.0970458984375, + "learning_rate": 2.0483060157662113e-05, + "loss": 120.1056, + "step": 135770 + }, + { + "epoch": 0.548568381161698, + "grad_norm": 639.502197265625, + "learning_rate": 2.0480268440813158e-05, + "loss": 74.8697, + "step": 135780 + }, + { + "epoch": 0.5486087824270656, + "grad_norm": 276.24322509765625, + "learning_rate": 2.0477476714601127e-05, + "loss": 81.1373, + "step": 135790 + }, + { + "epoch": 0.5486491836924332, + "grad_norm": 1120.3238525390625, + "learning_rate": 2.0474684979080442e-05, + "loss": 99.8282, + "step": 135800 + }, + { + "epoch": 0.5486895849578008, + "grad_norm": 668.6294555664062, + "learning_rate": 2.0471893234305533e-05, + "loss": 82.4476, + "step": 135810 + }, + { + "epoch": 0.5487299862231685, + "grad_norm": 541.7166137695312, + "learning_rate": 2.0469101480330815e-05, + "loss": 68.5365, + "step": 135820 + }, + { + "epoch": 0.5487703874885361, + "grad_norm": 618.491455078125, + "learning_rate": 2.0466309717210727e-05, + "loss": 50.6164, + "step": 135830 + }, + { + "epoch": 0.5488107887539038, + "grad_norm": 719.8905029296875, + "learning_rate": 2.046351794499969e-05, + "loss": 72.1805, + "step": 135840 + }, + { + "epoch": 0.5488511900192714, + "grad_norm": 761.3929443359375, + "learning_rate": 2.046072616375213e-05, + "loss": 60.4001, + "step": 135850 + }, + { + "epoch": 0.548891591284639, + "grad_norm": 1198.167236328125, + "learning_rate": 2.0457934373522477e-05, + "loss": 104.5814, + "step": 135860 + }, + { + "epoch": 0.5489319925500067, + "grad_norm": 705.7415771484375, + "learning_rate": 2.0455142574365156e-05, + "loss": 83.1622, + "step": 135870 + }, + { + "epoch": 0.5489723938153743, + "grad_norm": 432.1953430175781, + "learning_rate": 2.0452350766334598e-05, + "loss": 62.572, + "step": 135880 + }, + { + "epoch": 0.549012795080742, + "grad_norm": 1589.596923828125, + "learning_rate": 2.0449558949485228e-05, + "loss": 86.6618, + "step": 135890 + }, + { + "epoch": 0.5490531963461096, + "grad_norm": 224.01947021484375, + "learning_rate": 2.0446767123871476e-05, + "loss": 61.7152, + "step": 135900 + }, + { + "epoch": 0.5490935976114772, + "grad_norm": 1060.09228515625, + "learning_rate": 2.0443975289547772e-05, + "loss": 86.994, + "step": 135910 + }, + { + "epoch": 0.5491339988768448, + "grad_norm": 764.6885986328125, + "learning_rate": 2.0441183446568536e-05, + "loss": 75.0984, + "step": 135920 + }, + { + "epoch": 0.5491744001422124, + "grad_norm": 527.9359741210938, + "learning_rate": 2.0438391594988207e-05, + "loss": 58.7953, + "step": 135930 + }, + { + "epoch": 0.54921480140758, + "grad_norm": 541.9168090820312, + "learning_rate": 2.0435599734861206e-05, + "loss": 80.2392, + "step": 135940 + }, + { + "epoch": 0.5492552026729477, + "grad_norm": 609.749267578125, + "learning_rate": 2.0432807866241958e-05, + "loss": 67.645, + "step": 135950 + }, + { + "epoch": 0.5492956039383153, + "grad_norm": 578.8295288085938, + "learning_rate": 2.04300159891849e-05, + "loss": 105.6286, + "step": 135960 + }, + { + "epoch": 0.549336005203683, + "grad_norm": 985.0025634765625, + "learning_rate": 2.0427224103744462e-05, + "loss": 69.5929, + "step": 135970 + }, + { + "epoch": 0.5493764064690506, + "grad_norm": 556.3909912109375, + "learning_rate": 2.042443220997507e-05, + "loss": 69.7567, + "step": 135980 + }, + { + "epoch": 0.5494168077344183, + "grad_norm": 1475.6163330078125, + "learning_rate": 2.0421640307931154e-05, + "loss": 109.0498, + "step": 135990 + }, + { + "epoch": 0.5494572089997859, + "grad_norm": 565.7216796875, + "learning_rate": 2.0418848397667142e-05, + "loss": 68.4617, + "step": 136000 + }, + { + "epoch": 0.5494976102651535, + "grad_norm": 289.0955810546875, + "learning_rate": 2.0416056479237464e-05, + "loss": 62.5588, + "step": 136010 + }, + { + "epoch": 0.5495380115305212, + "grad_norm": 718.0391845703125, + "learning_rate": 2.0413264552696555e-05, + "loss": 64.3728, + "step": 136020 + }, + { + "epoch": 0.5495784127958888, + "grad_norm": 731.8460693359375, + "learning_rate": 2.0410472618098834e-05, + "loss": 82.9925, + "step": 136030 + }, + { + "epoch": 0.5496188140612563, + "grad_norm": 1151.6058349609375, + "learning_rate": 2.0407680675498743e-05, + "loss": 112.6433, + "step": 136040 + }, + { + "epoch": 0.549659215326624, + "grad_norm": 657.1612548828125, + "learning_rate": 2.0404888724950704e-05, + "loss": 80.3766, + "step": 136050 + }, + { + "epoch": 0.5496996165919916, + "grad_norm": 695.3689575195312, + "learning_rate": 2.0402096766509155e-05, + "loss": 72.9554, + "step": 136060 + }, + { + "epoch": 0.5497400178573593, + "grad_norm": 684.4832763671875, + "learning_rate": 2.039930480022852e-05, + "loss": 62.3732, + "step": 136070 + }, + { + "epoch": 0.5497804191227269, + "grad_norm": 598.4655151367188, + "learning_rate": 2.0396512826163234e-05, + "loss": 84.5542, + "step": 136080 + }, + { + "epoch": 0.5498208203880945, + "grad_norm": 690.8124389648438, + "learning_rate": 2.0393720844367723e-05, + "loss": 81.4515, + "step": 136090 + }, + { + "epoch": 0.5498612216534622, + "grad_norm": 1138.5592041015625, + "learning_rate": 2.0390928854896427e-05, + "loss": 58.897, + "step": 136100 + }, + { + "epoch": 0.5499016229188298, + "grad_norm": 647.6051025390625, + "learning_rate": 2.0388136857803765e-05, + "loss": 69.1504, + "step": 136110 + }, + { + "epoch": 0.5499420241841975, + "grad_norm": 1371.436279296875, + "learning_rate": 2.0385344853144175e-05, + "loss": 116.1363, + "step": 136120 + }, + { + "epoch": 0.5499824254495651, + "grad_norm": 414.0524597167969, + "learning_rate": 2.0382552840972093e-05, + "loss": 50.9115, + "step": 136130 + }, + { + "epoch": 0.5500228267149327, + "grad_norm": 312.8243713378906, + "learning_rate": 2.0379760821341948e-05, + "loss": 48.395, + "step": 136140 + }, + { + "epoch": 0.5500632279803004, + "grad_norm": 613.7324829101562, + "learning_rate": 2.0376968794308166e-05, + "loss": 71.1729, + "step": 136150 + }, + { + "epoch": 0.550103629245668, + "grad_norm": 403.1412353515625, + "learning_rate": 2.0374176759925187e-05, + "loss": 78.3058, + "step": 136160 + }, + { + "epoch": 0.5501440305110356, + "grad_norm": 379.0944519042969, + "learning_rate": 2.0371384718247435e-05, + "loss": 57.7306, + "step": 136170 + }, + { + "epoch": 0.5501844317764032, + "grad_norm": 721.72314453125, + "learning_rate": 2.0368592669329353e-05, + "loss": 79.2175, + "step": 136180 + }, + { + "epoch": 0.5502248330417708, + "grad_norm": 533.6517944335938, + "learning_rate": 2.036580061322536e-05, + "loss": 65.3355, + "step": 136190 + }, + { + "epoch": 0.5502652343071385, + "grad_norm": 699.5355224609375, + "learning_rate": 2.0363008549989897e-05, + "loss": 78.131, + "step": 136200 + }, + { + "epoch": 0.5503056355725061, + "grad_norm": 1031.4927978515625, + "learning_rate": 2.0360216479677396e-05, + "loss": 76.2408, + "step": 136210 + }, + { + "epoch": 0.5503460368378738, + "grad_norm": 782.4487915039062, + "learning_rate": 2.0357424402342295e-05, + "loss": 76.8647, + "step": 136220 + }, + { + "epoch": 0.5503864381032414, + "grad_norm": 759.9113159179688, + "learning_rate": 2.0354632318039016e-05, + "loss": 64.2082, + "step": 136230 + }, + { + "epoch": 0.550426839368609, + "grad_norm": 830.5465698242188, + "learning_rate": 2.0351840226821996e-05, + "loss": 117.2705, + "step": 136240 + }, + { + "epoch": 0.5504672406339767, + "grad_norm": 418.8070373535156, + "learning_rate": 2.0349048128745673e-05, + "loss": 71.4022, + "step": 136250 + }, + { + "epoch": 0.5505076418993443, + "grad_norm": 937.578369140625, + "learning_rate": 2.034625602386448e-05, + "loss": 58.9699, + "step": 136260 + }, + { + "epoch": 0.550548043164712, + "grad_norm": 353.7993469238281, + "learning_rate": 2.034346391223284e-05, + "loss": 68.2288, + "step": 136270 + }, + { + "epoch": 0.5505884444300796, + "grad_norm": 848.7138671875, + "learning_rate": 2.0340671793905198e-05, + "loss": 74.7446, + "step": 136280 + }, + { + "epoch": 0.5506288456954472, + "grad_norm": 503.1951904296875, + "learning_rate": 2.033787966893598e-05, + "loss": 71.6737, + "step": 136290 + }, + { + "epoch": 0.5506692469608148, + "grad_norm": 650.5276489257812, + "learning_rate": 2.0335087537379632e-05, + "loss": 100.0236, + "step": 136300 + }, + { + "epoch": 0.5507096482261824, + "grad_norm": 781.9000854492188, + "learning_rate": 2.033229539929057e-05, + "loss": 88.8462, + "step": 136310 + }, + { + "epoch": 0.55075004949155, + "grad_norm": 574.4034423828125, + "learning_rate": 2.0329503254723245e-05, + "loss": 133.3479, + "step": 136320 + }, + { + "epoch": 0.5507904507569177, + "grad_norm": 370.0722351074219, + "learning_rate": 2.0326711103732086e-05, + "loss": 48.0901, + "step": 136330 + }, + { + "epoch": 0.5508308520222853, + "grad_norm": 713.8074951171875, + "learning_rate": 2.032391894637152e-05, + "loss": 76.3756, + "step": 136340 + }, + { + "epoch": 0.550871253287653, + "grad_norm": 782.6560668945312, + "learning_rate": 2.0321126782695996e-05, + "loss": 73.8929, + "step": 136350 + }, + { + "epoch": 0.5509116545530206, + "grad_norm": 681.9956665039062, + "learning_rate": 2.031833461275993e-05, + "loss": 47.4342, + "step": 136360 + }, + { + "epoch": 0.5509520558183882, + "grad_norm": 945.3119506835938, + "learning_rate": 2.031554243661777e-05, + "loss": 59.4428, + "step": 136370 + }, + { + "epoch": 0.5509924570837559, + "grad_norm": 1086.6834716796875, + "learning_rate": 2.0312750254323953e-05, + "loss": 67.8418, + "step": 136380 + }, + { + "epoch": 0.5510328583491235, + "grad_norm": 538.7207641601562, + "learning_rate": 2.03099580659329e-05, + "loss": 137.3599, + "step": 136390 + }, + { + "epoch": 0.5510732596144912, + "grad_norm": 874.9195556640625, + "learning_rate": 2.0307165871499062e-05, + "loss": 79.2496, + "step": 136400 + }, + { + "epoch": 0.5511136608798588, + "grad_norm": 844.8164672851562, + "learning_rate": 2.0304373671076863e-05, + "loss": 63.9664, + "step": 136410 + }, + { + "epoch": 0.5511540621452264, + "grad_norm": 1007.9264526367188, + "learning_rate": 2.0301581464720744e-05, + "loss": 70.0988, + "step": 136420 + }, + { + "epoch": 0.551194463410594, + "grad_norm": 870.6340942382812, + "learning_rate": 2.0298789252485146e-05, + "loss": 112.6136, + "step": 136430 + }, + { + "epoch": 0.5512348646759616, + "grad_norm": 577.7737426757812, + "learning_rate": 2.0295997034424485e-05, + "loss": 88.2542, + "step": 136440 + }, + { + "epoch": 0.5512752659413293, + "grad_norm": 791.1004028320312, + "learning_rate": 2.0293204810593216e-05, + "loss": 67.1195, + "step": 136450 + }, + { + "epoch": 0.5513156672066969, + "grad_norm": 464.8357238769531, + "learning_rate": 2.0290412581045768e-05, + "loss": 75.482, + "step": 136460 + }, + { + "epoch": 0.5513560684720645, + "grad_norm": 997.6325073242188, + "learning_rate": 2.028762034583658e-05, + "loss": 64.0906, + "step": 136470 + }, + { + "epoch": 0.5513964697374322, + "grad_norm": 840.1932983398438, + "learning_rate": 2.0284828105020077e-05, + "loss": 67.7992, + "step": 136480 + }, + { + "epoch": 0.5514368710027998, + "grad_norm": 1204.8055419921875, + "learning_rate": 2.0282035858650708e-05, + "loss": 77.25, + "step": 136490 + }, + { + "epoch": 0.5514772722681675, + "grad_norm": 879.9944458007812, + "learning_rate": 2.027924360678291e-05, + "loss": 75.5329, + "step": 136500 + }, + { + "epoch": 0.5515176735335351, + "grad_norm": 633.8605346679688, + "learning_rate": 2.0276451349471107e-05, + "loss": 70.9737, + "step": 136510 + }, + { + "epoch": 0.5515580747989027, + "grad_norm": 733.7218627929688, + "learning_rate": 2.0273659086769747e-05, + "loss": 85.6726, + "step": 136520 + }, + { + "epoch": 0.5515984760642704, + "grad_norm": 185.4625244140625, + "learning_rate": 2.027086681873326e-05, + "loss": 107.7958, + "step": 136530 + }, + { + "epoch": 0.551638877329638, + "grad_norm": 513.7817993164062, + "learning_rate": 2.0268074545416084e-05, + "loss": 53.9088, + "step": 136540 + }, + { + "epoch": 0.5516792785950057, + "grad_norm": 1058.3626708984375, + "learning_rate": 2.026528226687266e-05, + "loss": 84.805, + "step": 136550 + }, + { + "epoch": 0.5517196798603732, + "grad_norm": 1120.7093505859375, + "learning_rate": 2.0262489983157418e-05, + "loss": 78.024, + "step": 136560 + }, + { + "epoch": 0.5517600811257408, + "grad_norm": 828.7412109375, + "learning_rate": 2.02596976943248e-05, + "loss": 79.532, + "step": 136570 + }, + { + "epoch": 0.5518004823911085, + "grad_norm": 1324.56982421875, + "learning_rate": 2.0256905400429238e-05, + "loss": 115.4065, + "step": 136580 + }, + { + "epoch": 0.5518408836564761, + "grad_norm": 853.2250366210938, + "learning_rate": 2.025411310152518e-05, + "loss": 75.1745, + "step": 136590 + }, + { + "epoch": 0.5518812849218437, + "grad_norm": 1236.4229736328125, + "learning_rate": 2.0251320797667056e-05, + "loss": 71.4895, + "step": 136600 + }, + { + "epoch": 0.5519216861872114, + "grad_norm": 1373.6689453125, + "learning_rate": 2.0248528488909302e-05, + "loss": 87.8768, + "step": 136610 + }, + { + "epoch": 0.551962087452579, + "grad_norm": 1122.4835205078125, + "learning_rate": 2.0245736175306354e-05, + "loss": 59.5781, + "step": 136620 + }, + { + "epoch": 0.5520024887179467, + "grad_norm": 688.1407470703125, + "learning_rate": 2.0242943856912655e-05, + "loss": 54.0682, + "step": 136630 + }, + { + "epoch": 0.5520428899833143, + "grad_norm": 1209.1324462890625, + "learning_rate": 2.024015153378264e-05, + "loss": 89.7972, + "step": 136640 + }, + { + "epoch": 0.5520832912486819, + "grad_norm": 840.1266479492188, + "learning_rate": 2.0237359205970747e-05, + "loss": 68.9107, + "step": 136650 + }, + { + "epoch": 0.5521236925140496, + "grad_norm": 1245.0975341796875, + "learning_rate": 2.0234566873531412e-05, + "loss": 102.3461, + "step": 136660 + }, + { + "epoch": 0.5521640937794172, + "grad_norm": 648.3052978515625, + "learning_rate": 2.0231774536519082e-05, + "loss": 43.931, + "step": 136670 + }, + { + "epoch": 0.5522044950447847, + "grad_norm": 786.69775390625, + "learning_rate": 2.022898219498818e-05, + "loss": 79.8498, + "step": 136680 + }, + { + "epoch": 0.5522448963101524, + "grad_norm": 333.08343505859375, + "learning_rate": 2.022618984899316e-05, + "loss": 79.2477, + "step": 136690 + }, + { + "epoch": 0.55228529757552, + "grad_norm": 840.3515625, + "learning_rate": 2.0223397498588447e-05, + "loss": 84.538, + "step": 136700 + }, + { + "epoch": 0.5523256988408877, + "grad_norm": 641.870361328125, + "learning_rate": 2.0220605143828486e-05, + "loss": 68.0855, + "step": 136710 + }, + { + "epoch": 0.5523661001062553, + "grad_norm": 547.9876708984375, + "learning_rate": 2.0217812784767716e-05, + "loss": 99.4564, + "step": 136720 + }, + { + "epoch": 0.552406501371623, + "grad_norm": 613.6123046875, + "learning_rate": 2.021502042146057e-05, + "loss": 83.5419, + "step": 136730 + }, + { + "epoch": 0.5524469026369906, + "grad_norm": 996.94970703125, + "learning_rate": 2.021222805396149e-05, + "loss": 76.8083, + "step": 136740 + }, + { + "epoch": 0.5524873039023582, + "grad_norm": 1258.95361328125, + "learning_rate": 2.020943568232492e-05, + "loss": 60.3509, + "step": 136750 + }, + { + "epoch": 0.5525277051677259, + "grad_norm": 326.8428039550781, + "learning_rate": 2.0206643306605288e-05, + "loss": 81.2248, + "step": 136760 + }, + { + "epoch": 0.5525681064330935, + "grad_norm": 308.568359375, + "learning_rate": 2.0203850926857042e-05, + "loss": 55.3316, + "step": 136770 + }, + { + "epoch": 0.5526085076984611, + "grad_norm": 431.4501647949219, + "learning_rate": 2.0201058543134616e-05, + "loss": 61.0275, + "step": 136780 + }, + { + "epoch": 0.5526489089638288, + "grad_norm": 1213.836181640625, + "learning_rate": 2.0198266155492453e-05, + "loss": 88.6358, + "step": 136790 + }, + { + "epoch": 0.5526893102291964, + "grad_norm": 533.3643798828125, + "learning_rate": 2.0195473763984985e-05, + "loss": 81.8243, + "step": 136800 + }, + { + "epoch": 0.552729711494564, + "grad_norm": 311.5760192871094, + "learning_rate": 2.0192681368666657e-05, + "loss": 64.8101, + "step": 136810 + }, + { + "epoch": 0.5527701127599316, + "grad_norm": 584.5789794921875, + "learning_rate": 2.0189888969591905e-05, + "loss": 48.3148, + "step": 136820 + }, + { + "epoch": 0.5528105140252992, + "grad_norm": 657.4978637695312, + "learning_rate": 2.018709656681517e-05, + "loss": 57.6803, + "step": 136830 + }, + { + "epoch": 0.5528509152906669, + "grad_norm": 917.8217163085938, + "learning_rate": 2.018430416039089e-05, + "loss": 72.8576, + "step": 136840 + }, + { + "epoch": 0.5528913165560345, + "grad_norm": 994.44091796875, + "learning_rate": 2.0181511750373505e-05, + "loss": 83.2803, + "step": 136850 + }, + { + "epoch": 0.5529317178214022, + "grad_norm": 1251.86376953125, + "learning_rate": 2.0178719336817463e-05, + "loss": 63.0956, + "step": 136860 + }, + { + "epoch": 0.5529721190867698, + "grad_norm": 1225.458251953125, + "learning_rate": 2.0175926919777193e-05, + "loss": 81.5135, + "step": 136870 + }, + { + "epoch": 0.5530125203521374, + "grad_norm": 770.8958129882812, + "learning_rate": 2.0173134499307133e-05, + "loss": 76.9202, + "step": 136880 + }, + { + "epoch": 0.5530529216175051, + "grad_norm": 710.4417114257812, + "learning_rate": 2.0170342075461725e-05, + "loss": 81.3395, + "step": 136890 + }, + { + "epoch": 0.5530933228828727, + "grad_norm": 513.7435913085938, + "learning_rate": 2.0167549648295413e-05, + "loss": 58.2044, + "step": 136900 + }, + { + "epoch": 0.5531337241482404, + "grad_norm": 1447.06982421875, + "learning_rate": 2.016475721786264e-05, + "loss": 83.2971, + "step": 136910 + }, + { + "epoch": 0.553174125413608, + "grad_norm": 644.0418701171875, + "learning_rate": 2.016196478421783e-05, + "loss": 66.9737, + "step": 136920 + }, + { + "epoch": 0.5532145266789756, + "grad_norm": 420.6496887207031, + "learning_rate": 2.0159172347415437e-05, + "loss": 55.1374, + "step": 136930 + }, + { + "epoch": 0.5532549279443432, + "grad_norm": 775.0262451171875, + "learning_rate": 2.0156379907509902e-05, + "loss": 73.4463, + "step": 136940 + }, + { + "epoch": 0.5532953292097108, + "grad_norm": 983.1422729492188, + "learning_rate": 2.015358746455566e-05, + "loss": 83.8649, + "step": 136950 + }, + { + "epoch": 0.5533357304750784, + "grad_norm": 726.2222290039062, + "learning_rate": 2.0150795018607145e-05, + "loss": 95.118, + "step": 136960 + }, + { + "epoch": 0.5533761317404461, + "grad_norm": 676.4175415039062, + "learning_rate": 2.0148002569718805e-05, + "loss": 73.4568, + "step": 136970 + }, + { + "epoch": 0.5534165330058137, + "grad_norm": 1613.2225341796875, + "learning_rate": 2.014521011794508e-05, + "loss": 67.2453, + "step": 136980 + }, + { + "epoch": 0.5534569342711814, + "grad_norm": 986.556640625, + "learning_rate": 2.0142417663340407e-05, + "loss": 90.3444, + "step": 136990 + }, + { + "epoch": 0.553497335536549, + "grad_norm": 829.18310546875, + "learning_rate": 2.0139625205959234e-05, + "loss": 89.1069, + "step": 137000 + }, + { + "epoch": 0.5535377368019166, + "grad_norm": 395.59307861328125, + "learning_rate": 2.013683274585599e-05, + "loss": 73.3519, + "step": 137010 + }, + { + "epoch": 0.5535781380672843, + "grad_norm": 843.0636596679688, + "learning_rate": 2.013404028308512e-05, + "loss": 64.2219, + "step": 137020 + }, + { + "epoch": 0.5536185393326519, + "grad_norm": 911.5646362304688, + "learning_rate": 2.0131247817701074e-05, + "loss": 61.3521, + "step": 137030 + }, + { + "epoch": 0.5536589405980196, + "grad_norm": 745.8252563476562, + "learning_rate": 2.0128455349758284e-05, + "loss": 45.2966, + "step": 137040 + }, + { + "epoch": 0.5536993418633872, + "grad_norm": 582.2078857421875, + "learning_rate": 2.0125662879311183e-05, + "loss": 77.5672, + "step": 137050 + }, + { + "epoch": 0.5537397431287548, + "grad_norm": 608.273193359375, + "learning_rate": 2.0122870406414222e-05, + "loss": 58.4586, + "step": 137060 + }, + { + "epoch": 0.5537801443941224, + "grad_norm": 710.7454223632812, + "learning_rate": 2.0120077931121836e-05, + "loss": 74.0468, + "step": 137070 + }, + { + "epoch": 0.55382054565949, + "grad_norm": 1036.02880859375, + "learning_rate": 2.0117285453488477e-05, + "loss": 61.6327, + "step": 137080 + }, + { + "epoch": 0.5538609469248577, + "grad_norm": 922.4265747070312, + "learning_rate": 2.0114492973568574e-05, + "loss": 55.2463, + "step": 137090 + }, + { + "epoch": 0.5539013481902253, + "grad_norm": 517.0756225585938, + "learning_rate": 2.0111700491416565e-05, + "loss": 104.2937, + "step": 137100 + }, + { + "epoch": 0.5539417494555929, + "grad_norm": 296.4446105957031, + "learning_rate": 2.010890800708691e-05, + "loss": 51.9961, + "step": 137110 + }, + { + "epoch": 0.5539821507209606, + "grad_norm": 419.5916442871094, + "learning_rate": 2.010611552063403e-05, + "loss": 107.8957, + "step": 137120 + }, + { + "epoch": 0.5540225519863282, + "grad_norm": 600.9053344726562, + "learning_rate": 2.010332303211237e-05, + "loss": 73.9911, + "step": 137130 + }, + { + "epoch": 0.5540629532516959, + "grad_norm": 844.0247802734375, + "learning_rate": 2.010053054157638e-05, + "loss": 51.8869, + "step": 137140 + }, + { + "epoch": 0.5541033545170635, + "grad_norm": 848.6297607421875, + "learning_rate": 2.0097738049080494e-05, + "loss": 80.5309, + "step": 137150 + }, + { + "epoch": 0.5541437557824311, + "grad_norm": 810.6255493164062, + "learning_rate": 2.0094945554679156e-05, + "loss": 51.6763, + "step": 137160 + }, + { + "epoch": 0.5541841570477988, + "grad_norm": 590.5045776367188, + "learning_rate": 2.00921530584268e-05, + "loss": 59.0387, + "step": 137170 + }, + { + "epoch": 0.5542245583131664, + "grad_norm": 908.5037841796875, + "learning_rate": 2.0089360560377877e-05, + "loss": 96.899, + "step": 137180 + }, + { + "epoch": 0.5542649595785339, + "grad_norm": 591.05517578125, + "learning_rate": 2.008656806058682e-05, + "loss": 60.5624, + "step": 137190 + }, + { + "epoch": 0.5543053608439016, + "grad_norm": 564.7469482421875, + "learning_rate": 2.0083775559108082e-05, + "loss": 81.3159, + "step": 137200 + }, + { + "epoch": 0.5543457621092692, + "grad_norm": 705.06982421875, + "learning_rate": 2.0080983055996094e-05, + "loss": 97.9213, + "step": 137210 + }, + { + "epoch": 0.5543861633746369, + "grad_norm": 858.2857055664062, + "learning_rate": 2.007819055130529e-05, + "loss": 86.8036, + "step": 137220 + }, + { + "epoch": 0.5544265646400045, + "grad_norm": 577.837646484375, + "learning_rate": 2.0075398045090127e-05, + "loss": 73.0283, + "step": 137230 + }, + { + "epoch": 0.5544669659053721, + "grad_norm": 699.1233520507812, + "learning_rate": 2.0072605537405046e-05, + "loss": 66.9021, + "step": 137240 + }, + { + "epoch": 0.5545073671707398, + "grad_norm": 1110.770263671875, + "learning_rate": 2.0069813028304478e-05, + "loss": 90.7218, + "step": 137250 + }, + { + "epoch": 0.5545477684361074, + "grad_norm": 885.0283203125, + "learning_rate": 2.0067020517842866e-05, + "loss": 92.9069, + "step": 137260 + }, + { + "epoch": 0.5545881697014751, + "grad_norm": 624.3333740234375, + "learning_rate": 2.0064228006074656e-05, + "loss": 90.8773, + "step": 137270 + }, + { + "epoch": 0.5546285709668427, + "grad_norm": 643.1002807617188, + "learning_rate": 2.0061435493054294e-05, + "loss": 92.598, + "step": 137280 + }, + { + "epoch": 0.5546689722322103, + "grad_norm": 739.7564086914062, + "learning_rate": 2.0058642978836213e-05, + "loss": 92.2599, + "step": 137290 + }, + { + "epoch": 0.554709373497578, + "grad_norm": 742.0172729492188, + "learning_rate": 2.0055850463474855e-05, + "loss": 86.6578, + "step": 137300 + }, + { + "epoch": 0.5547497747629456, + "grad_norm": 271.28387451171875, + "learning_rate": 2.005305794702466e-05, + "loss": 53.9066, + "step": 137310 + }, + { + "epoch": 0.5547901760283132, + "grad_norm": 1208.0994873046875, + "learning_rate": 2.0050265429540082e-05, + "loss": 74.6682, + "step": 137320 + }, + { + "epoch": 0.5548305772936808, + "grad_norm": 1272.5548095703125, + "learning_rate": 2.0047472911075544e-05, + "loss": 80.988, + "step": 137330 + }, + { + "epoch": 0.5548709785590484, + "grad_norm": 899.0355834960938, + "learning_rate": 2.00446803916855e-05, + "loss": 62.6924, + "step": 137340 + }, + { + "epoch": 0.5549113798244161, + "grad_norm": 1053.8612060546875, + "learning_rate": 2.0041887871424386e-05, + "loss": 74.655, + "step": 137350 + }, + { + "epoch": 0.5549517810897837, + "grad_norm": 1014.7800903320312, + "learning_rate": 2.0039095350346653e-05, + "loss": 57.3688, + "step": 137360 + }, + { + "epoch": 0.5549921823551514, + "grad_norm": 475.80194091796875, + "learning_rate": 2.0036302828506736e-05, + "loss": 67.561, + "step": 137370 + }, + { + "epoch": 0.555032583620519, + "grad_norm": 365.3185729980469, + "learning_rate": 2.0033510305959076e-05, + "loss": 93.3158, + "step": 137380 + }, + { + "epoch": 0.5550729848858866, + "grad_norm": 1049.4718017578125, + "learning_rate": 2.003071778275811e-05, + "loss": 50.1866, + "step": 137390 + }, + { + "epoch": 0.5551133861512543, + "grad_norm": 1022.3670654296875, + "learning_rate": 2.002792525895829e-05, + "loss": 65.5431, + "step": 137400 + }, + { + "epoch": 0.5551537874166219, + "grad_norm": 540.8389892578125, + "learning_rate": 2.0025132734614053e-05, + "loss": 94.5445, + "step": 137410 + }, + { + "epoch": 0.5551941886819896, + "grad_norm": 803.6575927734375, + "learning_rate": 2.0022340209779835e-05, + "loss": 91.1649, + "step": 137420 + }, + { + "epoch": 0.5552345899473572, + "grad_norm": 571.5538330078125, + "learning_rate": 2.0019547684510085e-05, + "loss": 85.0952, + "step": 137430 + }, + { + "epoch": 0.5552749912127248, + "grad_norm": 843.7247314453125, + "learning_rate": 2.0016755158859245e-05, + "loss": 84.6503, + "step": 137440 + }, + { + "epoch": 0.5553153924780924, + "grad_norm": 505.02838134765625, + "learning_rate": 2.0013962632881752e-05, + "loss": 62.6207, + "step": 137450 + }, + { + "epoch": 0.55535579374346, + "grad_norm": 934.4701538085938, + "learning_rate": 2.001117010663206e-05, + "loss": 54.7067, + "step": 137460 + }, + { + "epoch": 0.5553961950088276, + "grad_norm": 591.134765625, + "learning_rate": 2.000837758016459e-05, + "loss": 85.6493, + "step": 137470 + }, + { + "epoch": 0.5554365962741953, + "grad_norm": 2955.295166015625, + "learning_rate": 2.00055850535338e-05, + "loss": 95.5627, + "step": 137480 + }, + { + "epoch": 0.5554769975395629, + "grad_norm": 591.887451171875, + "learning_rate": 2.0002792526794123e-05, + "loss": 94.456, + "step": 137490 + }, + { + "epoch": 0.5555173988049306, + "grad_norm": 1180.4378662109375, + "learning_rate": 2e-05, + "loss": 109.3895, + "step": 137500 + }, + { + "epoch": 0.5555578000702982, + "grad_norm": 985.9782104492188, + "learning_rate": 1.9997207473205888e-05, + "loss": 49.461, + "step": 137510 + }, + { + "epoch": 0.5555982013356658, + "grad_norm": 440.26910400390625, + "learning_rate": 1.9994414946466207e-05, + "loss": 57.6069, + "step": 137520 + }, + { + "epoch": 0.5556386026010335, + "grad_norm": 802.1146240234375, + "learning_rate": 1.9991622419835418e-05, + "loss": 57.1419, + "step": 137530 + }, + { + "epoch": 0.5556790038664011, + "grad_norm": 595.895751953125, + "learning_rate": 1.998882989336795e-05, + "loss": 68.737, + "step": 137540 + }, + { + "epoch": 0.5557194051317688, + "grad_norm": 1164.4100341796875, + "learning_rate": 1.998603736711825e-05, + "loss": 78.3813, + "step": 137550 + }, + { + "epoch": 0.5557598063971364, + "grad_norm": 734.2694702148438, + "learning_rate": 1.998324484114076e-05, + "loss": 139.2311, + "step": 137560 + }, + { + "epoch": 0.555800207662504, + "grad_norm": 693.5689697265625, + "learning_rate": 1.9980452315489918e-05, + "loss": 61.6289, + "step": 137570 + }, + { + "epoch": 0.5558406089278716, + "grad_norm": 593.5734252929688, + "learning_rate": 1.9977659790220172e-05, + "loss": 67.5988, + "step": 137580 + }, + { + "epoch": 0.5558810101932392, + "grad_norm": 1165.6331787109375, + "learning_rate": 1.997486726538596e-05, + "loss": 113.3505, + "step": 137590 + }, + { + "epoch": 0.5559214114586069, + "grad_norm": 399.13885498046875, + "learning_rate": 1.9972074741041712e-05, + "loss": 57.2826, + "step": 137600 + }, + { + "epoch": 0.5559618127239745, + "grad_norm": 1760.214599609375, + "learning_rate": 1.9969282217241897e-05, + "loss": 109.9102, + "step": 137610 + }, + { + "epoch": 0.5560022139893421, + "grad_norm": 467.3599853515625, + "learning_rate": 1.996648969404093e-05, + "loss": 115.3844, + "step": 137620 + }, + { + "epoch": 0.5560426152547098, + "grad_norm": 841.14794921875, + "learning_rate": 1.9963697171493274e-05, + "loss": 61.8939, + "step": 137630 + }, + { + "epoch": 0.5560830165200774, + "grad_norm": 1026.7003173828125, + "learning_rate": 1.996090464965335e-05, + "loss": 86.2182, + "step": 137640 + }, + { + "epoch": 0.556123417785445, + "grad_norm": 774.94677734375, + "learning_rate": 1.9958112128575614e-05, + "loss": 62.1354, + "step": 137650 + }, + { + "epoch": 0.5561638190508127, + "grad_norm": 361.0375061035156, + "learning_rate": 1.9955319608314506e-05, + "loss": 67.6955, + "step": 137660 + }, + { + "epoch": 0.5562042203161803, + "grad_norm": 544.73681640625, + "learning_rate": 1.9952527088924466e-05, + "loss": 61.6558, + "step": 137670 + }, + { + "epoch": 0.556244621581548, + "grad_norm": 1256.9271240234375, + "learning_rate": 1.9949734570459925e-05, + "loss": 73.2164, + "step": 137680 + }, + { + "epoch": 0.5562850228469156, + "grad_norm": 301.9201965332031, + "learning_rate": 1.9946942052975343e-05, + "loss": 44.7376, + "step": 137690 + }, + { + "epoch": 0.5563254241122833, + "grad_norm": 708.0748291015625, + "learning_rate": 1.994414953652515e-05, + "loss": 68.7991, + "step": 137700 + }, + { + "epoch": 0.5563658253776508, + "grad_norm": 596.506103515625, + "learning_rate": 1.9941357021163793e-05, + "loss": 47.6664, + "step": 137710 + }, + { + "epoch": 0.5564062266430184, + "grad_norm": 392.4789733886719, + "learning_rate": 1.9938564506945713e-05, + "loss": 53.2883, + "step": 137720 + }, + { + "epoch": 0.5564466279083861, + "grad_norm": 646.1731567382812, + "learning_rate": 1.9935771993925344e-05, + "loss": 96.4923, + "step": 137730 + }, + { + "epoch": 0.5564870291737537, + "grad_norm": 1003.5181274414062, + "learning_rate": 1.9932979482157137e-05, + "loss": 77.656, + "step": 137740 + }, + { + "epoch": 0.5565274304391213, + "grad_norm": 352.80096435546875, + "learning_rate": 1.9930186971695532e-05, + "loss": 37.7957, + "step": 137750 + }, + { + "epoch": 0.556567831704489, + "grad_norm": 250.6719207763672, + "learning_rate": 1.9927394462594957e-05, + "loss": 85.5114, + "step": 137760 + }, + { + "epoch": 0.5566082329698566, + "grad_norm": 717.8180541992188, + "learning_rate": 1.9924601954909876e-05, + "loss": 58.6161, + "step": 137770 + }, + { + "epoch": 0.5566486342352243, + "grad_norm": 305.3313903808594, + "learning_rate": 1.9921809448694705e-05, + "loss": 64.3708, + "step": 137780 + }, + { + "epoch": 0.5566890355005919, + "grad_norm": 4628.505859375, + "learning_rate": 1.9919016944003912e-05, + "loss": 80.1965, + "step": 137790 + }, + { + "epoch": 0.5567294367659595, + "grad_norm": 584.9466552734375, + "learning_rate": 1.9916224440891928e-05, + "loss": 67.9669, + "step": 137800 + }, + { + "epoch": 0.5567698380313272, + "grad_norm": 576.5956420898438, + "learning_rate": 1.9913431939413182e-05, + "loss": 53.9918, + "step": 137810 + }, + { + "epoch": 0.5568102392966948, + "grad_norm": 938.0853271484375, + "learning_rate": 1.991063943962213e-05, + "loss": 84.6586, + "step": 137820 + }, + { + "epoch": 0.5568506405620623, + "grad_norm": 668.665283203125, + "learning_rate": 1.990784694157321e-05, + "loss": 50.0916, + "step": 137830 + }, + { + "epoch": 0.55689104182743, + "grad_norm": 530.2503662109375, + "learning_rate": 1.990505444532085e-05, + "loss": 69.7192, + "step": 137840 + }, + { + "epoch": 0.5569314430927976, + "grad_norm": 544.7189331054688, + "learning_rate": 1.9902261950919516e-05, + "loss": 47.4911, + "step": 137850 + }, + { + "epoch": 0.5569718443581653, + "grad_norm": 379.1934509277344, + "learning_rate": 1.9899469458423623e-05, + "loss": 93.945, + "step": 137860 + }, + { + "epoch": 0.5570122456235329, + "grad_norm": 994.3744506835938, + "learning_rate": 1.989667696788763e-05, + "loss": 90.5681, + "step": 137870 + }, + { + "epoch": 0.5570526468889005, + "grad_norm": 588.68896484375, + "learning_rate": 1.989388447936598e-05, + "loss": 73.6523, + "step": 137880 + }, + { + "epoch": 0.5570930481542682, + "grad_norm": 749.584228515625, + "learning_rate": 1.9891091992913097e-05, + "loss": 63.4446, + "step": 137890 + }, + { + "epoch": 0.5571334494196358, + "grad_norm": 789.9939575195312, + "learning_rate": 1.9888299508583438e-05, + "loss": 60.7109, + "step": 137900 + }, + { + "epoch": 0.5571738506850035, + "grad_norm": 564.454833984375, + "learning_rate": 1.988550702643144e-05, + "loss": 74.3176, + "step": 137910 + }, + { + "epoch": 0.5572142519503711, + "grad_norm": 1083.2015380859375, + "learning_rate": 1.988271454651153e-05, + "loss": 104.5949, + "step": 137920 + }, + { + "epoch": 0.5572546532157387, + "grad_norm": 672.8120727539062, + "learning_rate": 1.9879922068878167e-05, + "loss": 50.1751, + "step": 137930 + }, + { + "epoch": 0.5572950544811064, + "grad_norm": 371.62921142578125, + "learning_rate": 1.987712959358578e-05, + "loss": 65.2458, + "step": 137940 + }, + { + "epoch": 0.557335455746474, + "grad_norm": 550.440673828125, + "learning_rate": 1.9874337120688824e-05, + "loss": 70.9043, + "step": 137950 + }, + { + "epoch": 0.5573758570118416, + "grad_norm": 826.49462890625, + "learning_rate": 1.9871544650241726e-05, + "loss": 97.2711, + "step": 137960 + }, + { + "epoch": 0.5574162582772092, + "grad_norm": 958.9838256835938, + "learning_rate": 1.9868752182298933e-05, + "loss": 91.7799, + "step": 137970 + }, + { + "epoch": 0.5574566595425768, + "grad_norm": 746.602783203125, + "learning_rate": 1.9865959716914884e-05, + "loss": 77.6851, + "step": 137980 + }, + { + "epoch": 0.5574970608079445, + "grad_norm": 734.5572509765625, + "learning_rate": 1.9863167254144016e-05, + "loss": 76.1136, + "step": 137990 + }, + { + "epoch": 0.5575374620733121, + "grad_norm": 861.4277954101562, + "learning_rate": 1.986037479404077e-05, + "loss": 53.0477, + "step": 138000 + }, + { + "epoch": 0.5575778633386798, + "grad_norm": 718.0148315429688, + "learning_rate": 1.9857582336659596e-05, + "loss": 60.639, + "step": 138010 + }, + { + "epoch": 0.5576182646040474, + "grad_norm": 969.1907958984375, + "learning_rate": 1.9854789882054922e-05, + "loss": 95.4613, + "step": 138020 + }, + { + "epoch": 0.557658665869415, + "grad_norm": 1313.0958251953125, + "learning_rate": 1.9851997430281198e-05, + "loss": 79.0844, + "step": 138030 + }, + { + "epoch": 0.5576990671347827, + "grad_norm": 789.0957641601562, + "learning_rate": 1.984920498139286e-05, + "loss": 62.5049, + "step": 138040 + }, + { + "epoch": 0.5577394684001503, + "grad_norm": 936.0442504882812, + "learning_rate": 1.9846412535444346e-05, + "loss": 69.6763, + "step": 138050 + }, + { + "epoch": 0.557779869665518, + "grad_norm": 579.9495849609375, + "learning_rate": 1.9843620092490105e-05, + "loss": 68.7881, + "step": 138060 + }, + { + "epoch": 0.5578202709308856, + "grad_norm": 846.2564086914062, + "learning_rate": 1.9840827652584563e-05, + "loss": 70.8273, + "step": 138070 + }, + { + "epoch": 0.5578606721962532, + "grad_norm": 453.9554748535156, + "learning_rate": 1.9838035215782173e-05, + "loss": 38.5299, + "step": 138080 + }, + { + "epoch": 0.5579010734616208, + "grad_norm": 726.3963012695312, + "learning_rate": 1.9835242782137373e-05, + "loss": 58.7237, + "step": 138090 + }, + { + "epoch": 0.5579414747269884, + "grad_norm": 330.2505798339844, + "learning_rate": 1.983245035170459e-05, + "loss": 49.6117, + "step": 138100 + }, + { + "epoch": 0.557981875992356, + "grad_norm": 552.5769653320312, + "learning_rate": 1.9829657924538278e-05, + "loss": 62.8492, + "step": 138110 + }, + { + "epoch": 0.5580222772577237, + "grad_norm": 875.038330078125, + "learning_rate": 1.9826865500692877e-05, + "loss": 79.782, + "step": 138120 + }, + { + "epoch": 0.5580626785230913, + "grad_norm": 593.3241577148438, + "learning_rate": 1.982407308022281e-05, + "loss": 90.7508, + "step": 138130 + }, + { + "epoch": 0.558103079788459, + "grad_norm": 1867.5921630859375, + "learning_rate": 1.9821280663182543e-05, + "loss": 95.5716, + "step": 138140 + }, + { + "epoch": 0.5581434810538266, + "grad_norm": 470.7640075683594, + "learning_rate": 1.9818488249626492e-05, + "loss": 58.9143, + "step": 138150 + }, + { + "epoch": 0.5581838823191942, + "grad_norm": 595.0718994140625, + "learning_rate": 1.9815695839609114e-05, + "loss": 69.8825, + "step": 138160 + }, + { + "epoch": 0.5582242835845619, + "grad_norm": 680.0055541992188, + "learning_rate": 1.981290343318484e-05, + "loss": 101.462, + "step": 138170 + }, + { + "epoch": 0.5582646848499295, + "grad_norm": 499.6974182128906, + "learning_rate": 1.98101110304081e-05, + "loss": 85.3225, + "step": 138180 + }, + { + "epoch": 0.5583050861152972, + "grad_norm": 903.34033203125, + "learning_rate": 1.980731863133335e-05, + "loss": 108.8303, + "step": 138190 + }, + { + "epoch": 0.5583454873806648, + "grad_norm": 803.7764282226562, + "learning_rate": 1.9804526236015025e-05, + "loss": 58.8341, + "step": 138200 + }, + { + "epoch": 0.5583858886460324, + "grad_norm": 735.4111938476562, + "learning_rate": 1.9801733844507553e-05, + "loss": 68.9388, + "step": 138210 + }, + { + "epoch": 0.5584262899114, + "grad_norm": 493.4570617675781, + "learning_rate": 1.979894145686539e-05, + "loss": 62.5186, + "step": 138220 + }, + { + "epoch": 0.5584666911767676, + "grad_norm": 656.532958984375, + "learning_rate": 1.9796149073142964e-05, + "loss": 71.7654, + "step": 138230 + }, + { + "epoch": 0.5585070924421353, + "grad_norm": 445.9362487792969, + "learning_rate": 1.979335669339472e-05, + "loss": 60.543, + "step": 138240 + }, + { + "epoch": 0.5585474937075029, + "grad_norm": 383.1541442871094, + "learning_rate": 1.979056431767509e-05, + "loss": 96.5799, + "step": 138250 + }, + { + "epoch": 0.5585878949728705, + "grad_norm": 648.4757080078125, + "learning_rate": 1.9787771946038513e-05, + "loss": 84.2316, + "step": 138260 + }, + { + "epoch": 0.5586282962382382, + "grad_norm": 366.2901306152344, + "learning_rate": 1.9784979578539438e-05, + "loss": 75.6429, + "step": 138270 + }, + { + "epoch": 0.5586686975036058, + "grad_norm": 659.4552612304688, + "learning_rate": 1.9782187215232295e-05, + "loss": 71.5755, + "step": 138280 + }, + { + "epoch": 0.5587090987689735, + "grad_norm": 562.4083251953125, + "learning_rate": 1.9779394856171517e-05, + "loss": 72.0512, + "step": 138290 + }, + { + "epoch": 0.5587495000343411, + "grad_norm": 732.6774291992188, + "learning_rate": 1.977660250141156e-05, + "loss": 59.2943, + "step": 138300 + }, + { + "epoch": 0.5587899012997087, + "grad_norm": 520.9224853515625, + "learning_rate": 1.9773810151006846e-05, + "loss": 91.6547, + "step": 138310 + }, + { + "epoch": 0.5588303025650764, + "grad_norm": 356.8598327636719, + "learning_rate": 1.9771017805011826e-05, + "loss": 86.9803, + "step": 138320 + }, + { + "epoch": 0.558870703830444, + "grad_norm": 709.25, + "learning_rate": 1.9768225463480925e-05, + "loss": 33.1886, + "step": 138330 + }, + { + "epoch": 0.5589111050958117, + "grad_norm": 868.8605346679688, + "learning_rate": 1.976543312646859e-05, + "loss": 85.1374, + "step": 138340 + }, + { + "epoch": 0.5589515063611792, + "grad_norm": 855.1586303710938, + "learning_rate": 1.976264079402926e-05, + "loss": 80.6468, + "step": 138350 + }, + { + "epoch": 0.5589919076265468, + "grad_norm": 681.206787109375, + "learning_rate": 1.975984846621737e-05, + "loss": 69.7913, + "step": 138360 + }, + { + "epoch": 0.5590323088919145, + "grad_norm": 438.8380126953125, + "learning_rate": 1.975705614308735e-05, + "loss": 67.3316, + "step": 138370 + }, + { + "epoch": 0.5590727101572821, + "grad_norm": 819.5712890625, + "learning_rate": 1.9754263824693653e-05, + "loss": 75.0976, + "step": 138380 + }, + { + "epoch": 0.5591131114226497, + "grad_norm": 296.8226318359375, + "learning_rate": 1.97514715110907e-05, + "loss": 62.6272, + "step": 138390 + }, + { + "epoch": 0.5591535126880174, + "grad_norm": 492.2091979980469, + "learning_rate": 1.9748679202332948e-05, + "loss": 43.5994, + "step": 138400 + }, + { + "epoch": 0.559193913953385, + "grad_norm": 572.0256958007812, + "learning_rate": 1.9745886898474825e-05, + "loss": 65.3023, + "step": 138410 + }, + { + "epoch": 0.5592343152187527, + "grad_norm": 797.8814697265625, + "learning_rate": 1.974309459957076e-05, + "loss": 81.9767, + "step": 138420 + }, + { + "epoch": 0.5592747164841203, + "grad_norm": 726.57421875, + "learning_rate": 1.9740302305675207e-05, + "loss": 52.8083, + "step": 138430 + }, + { + "epoch": 0.5593151177494879, + "grad_norm": 796.0791625976562, + "learning_rate": 1.9737510016842592e-05, + "loss": 91.5996, + "step": 138440 + }, + { + "epoch": 0.5593555190148556, + "grad_norm": 431.6235046386719, + "learning_rate": 1.9734717733127347e-05, + "loss": 55.2597, + "step": 138450 + }, + { + "epoch": 0.5593959202802232, + "grad_norm": 1075.969482421875, + "learning_rate": 1.9731925454583923e-05, + "loss": 101.1105, + "step": 138460 + }, + { + "epoch": 0.5594363215455908, + "grad_norm": 871.5814819335938, + "learning_rate": 1.9729133181266742e-05, + "loss": 64.5682, + "step": 138470 + }, + { + "epoch": 0.5594767228109584, + "grad_norm": 246.2523956298828, + "learning_rate": 1.9726340913230257e-05, + "loss": 100.7472, + "step": 138480 + }, + { + "epoch": 0.559517124076326, + "grad_norm": 1541.6746826171875, + "learning_rate": 1.97235486505289e-05, + "loss": 83.4667, + "step": 138490 + }, + { + "epoch": 0.5595575253416937, + "grad_norm": 808.3538208007812, + "learning_rate": 1.9720756393217098e-05, + "loss": 99.6665, + "step": 138500 + }, + { + "epoch": 0.5595979266070613, + "grad_norm": 305.4700012207031, + "learning_rate": 1.97179641413493e-05, + "loss": 76.1929, + "step": 138510 + }, + { + "epoch": 0.559638327872429, + "grad_norm": 573.3967895507812, + "learning_rate": 1.9715171894979933e-05, + "loss": 102.5564, + "step": 138520 + }, + { + "epoch": 0.5596787291377966, + "grad_norm": 553.982177734375, + "learning_rate": 1.9712379654163427e-05, + "loss": 56.0331, + "step": 138530 + }, + { + "epoch": 0.5597191304031642, + "grad_norm": 569.3568725585938, + "learning_rate": 1.970958741895424e-05, + "loss": 72.3033, + "step": 138540 + }, + { + "epoch": 0.5597595316685319, + "grad_norm": 740.70263671875, + "learning_rate": 1.9706795189406788e-05, + "loss": 66.0863, + "step": 138550 + }, + { + "epoch": 0.5597999329338995, + "grad_norm": 923.6962890625, + "learning_rate": 1.970400296557552e-05, + "loss": 68.1295, + "step": 138560 + }, + { + "epoch": 0.5598403341992672, + "grad_norm": 744.362548828125, + "learning_rate": 1.9701210747514868e-05, + "loss": 94.1921, + "step": 138570 + }, + { + "epoch": 0.5598807354646348, + "grad_norm": 738.4619140625, + "learning_rate": 1.969841853527926e-05, + "loss": 67.1854, + "step": 138580 + }, + { + "epoch": 0.5599211367300024, + "grad_norm": 438.287841796875, + "learning_rate": 1.9695626328923144e-05, + "loss": 69.7606, + "step": 138590 + }, + { + "epoch": 0.55996153799537, + "grad_norm": 961.183837890625, + "learning_rate": 1.969283412850094e-05, + "loss": 65.5499, + "step": 138600 + }, + { + "epoch": 0.5600019392607376, + "grad_norm": 1849.86572265625, + "learning_rate": 1.9690041934067102e-05, + "loss": 54.9724, + "step": 138610 + }, + { + "epoch": 0.5600423405261052, + "grad_norm": 880.5283813476562, + "learning_rate": 1.9687249745676057e-05, + "loss": 75.6915, + "step": 138620 + }, + { + "epoch": 0.5600827417914729, + "grad_norm": 272.4528503417969, + "learning_rate": 1.968445756338223e-05, + "loss": 88.8507, + "step": 138630 + }, + { + "epoch": 0.5601231430568405, + "grad_norm": 890.7822265625, + "learning_rate": 1.9681665387240074e-05, + "loss": 44.6233, + "step": 138640 + }, + { + "epoch": 0.5601635443222082, + "grad_norm": 309.0090026855469, + "learning_rate": 1.9678873217304014e-05, + "loss": 118.1789, + "step": 138650 + }, + { + "epoch": 0.5602039455875758, + "grad_norm": 972.3203125, + "learning_rate": 1.9676081053628483e-05, + "loss": 79.2914, + "step": 138660 + }, + { + "epoch": 0.5602443468529434, + "grad_norm": 943.167724609375, + "learning_rate": 1.967328889626792e-05, + "loss": 76.4567, + "step": 138670 + }, + { + "epoch": 0.5602847481183111, + "grad_norm": 289.7102355957031, + "learning_rate": 1.9670496745276758e-05, + "loss": 73.726, + "step": 138680 + }, + { + "epoch": 0.5603251493836787, + "grad_norm": 424.80255126953125, + "learning_rate": 1.9667704600709433e-05, + "loss": 76.5593, + "step": 138690 + }, + { + "epoch": 0.5603655506490464, + "grad_norm": 843.5333862304688, + "learning_rate": 1.9664912462620378e-05, + "loss": 88.0906, + "step": 138700 + }, + { + "epoch": 0.560405951914414, + "grad_norm": 782.3602905273438, + "learning_rate": 1.966212033106402e-05, + "loss": 59.4667, + "step": 138710 + }, + { + "epoch": 0.5604463531797816, + "grad_norm": 673.7774658203125, + "learning_rate": 1.965932820609481e-05, + "loss": 54.4682, + "step": 138720 + }, + { + "epoch": 0.5604867544451492, + "grad_norm": 1156.109375, + "learning_rate": 1.9656536087767168e-05, + "loss": 72.1234, + "step": 138730 + }, + { + "epoch": 0.5605271557105168, + "grad_norm": 450.18927001953125, + "learning_rate": 1.9653743976135526e-05, + "loss": 61.591, + "step": 138740 + }, + { + "epoch": 0.5605675569758845, + "grad_norm": 369.8658447265625, + "learning_rate": 1.965095187125433e-05, + "loss": 96.1716, + "step": 138750 + }, + { + "epoch": 0.5606079582412521, + "grad_norm": 411.26171875, + "learning_rate": 1.9648159773178007e-05, + "loss": 51.8823, + "step": 138760 + }, + { + "epoch": 0.5606483595066197, + "grad_norm": 369.2925109863281, + "learning_rate": 1.964536768196099e-05, + "loss": 61.8382, + "step": 138770 + }, + { + "epoch": 0.5606887607719874, + "grad_norm": 685.6666259765625, + "learning_rate": 1.9642575597657715e-05, + "loss": 74.6941, + "step": 138780 + }, + { + "epoch": 0.560729162037355, + "grad_norm": 636.4925537109375, + "learning_rate": 1.9639783520322603e-05, + "loss": 52.5641, + "step": 138790 + }, + { + "epoch": 0.5607695633027227, + "grad_norm": 414.0549621582031, + "learning_rate": 1.9636991450010106e-05, + "loss": 41.9788, + "step": 138800 + }, + { + "epoch": 0.5608099645680903, + "grad_norm": 656.4385986328125, + "learning_rate": 1.9634199386774647e-05, + "loss": 44.7072, + "step": 138810 + }, + { + "epoch": 0.5608503658334579, + "grad_norm": 1296.465087890625, + "learning_rate": 1.9631407330670653e-05, + "loss": 50.9918, + "step": 138820 + }, + { + "epoch": 0.5608907670988256, + "grad_norm": 721.7696533203125, + "learning_rate": 1.962861528175257e-05, + "loss": 56.8514, + "step": 138830 + }, + { + "epoch": 0.5609311683641932, + "grad_norm": 678.5653686523438, + "learning_rate": 1.962582324007482e-05, + "loss": 81.7997, + "step": 138840 + }, + { + "epoch": 0.5609715696295609, + "grad_norm": 996.1943359375, + "learning_rate": 1.962303120569184e-05, + "loss": 75.5431, + "step": 138850 + }, + { + "epoch": 0.5610119708949284, + "grad_norm": 387.04449462890625, + "learning_rate": 1.9620239178658062e-05, + "loss": 96.5256, + "step": 138860 + }, + { + "epoch": 0.561052372160296, + "grad_norm": 1047.2957763671875, + "learning_rate": 1.961744715902791e-05, + "loss": 68.4555, + "step": 138870 + }, + { + "epoch": 0.5610927734256637, + "grad_norm": 1183.5830078125, + "learning_rate": 1.961465514685583e-05, + "loss": 82.5965, + "step": 138880 + }, + { + "epoch": 0.5611331746910313, + "grad_norm": 1018.3452758789062, + "learning_rate": 1.9611863142196245e-05, + "loss": 94.7845, + "step": 138890 + }, + { + "epoch": 0.5611735759563989, + "grad_norm": 1208.625, + "learning_rate": 1.960907114510358e-05, + "loss": 99.2028, + "step": 138900 + }, + { + "epoch": 0.5612139772217666, + "grad_norm": 604.837646484375, + "learning_rate": 1.9606279155632284e-05, + "loss": 63.9613, + "step": 138910 + }, + { + "epoch": 0.5612543784871342, + "grad_norm": 903.6295166015625, + "learning_rate": 1.9603487173836772e-05, + "loss": 99.01, + "step": 138920 + }, + { + "epoch": 0.5612947797525019, + "grad_norm": 863.2391967773438, + "learning_rate": 1.960069519977149e-05, + "loss": 65.1577, + "step": 138930 + }, + { + "epoch": 0.5613351810178695, + "grad_norm": 628.4549560546875, + "learning_rate": 1.9597903233490855e-05, + "loss": 59.0426, + "step": 138940 + }, + { + "epoch": 0.5613755822832371, + "grad_norm": 1324.755615234375, + "learning_rate": 1.95951112750493e-05, + "loss": 82.7338, + "step": 138950 + }, + { + "epoch": 0.5614159835486048, + "grad_norm": 814.8721313476562, + "learning_rate": 1.9592319324501264e-05, + "loss": 91.5139, + "step": 138960 + }, + { + "epoch": 0.5614563848139724, + "grad_norm": 886.6915283203125, + "learning_rate": 1.9589527381901176e-05, + "loss": 80.334, + "step": 138970 + }, + { + "epoch": 0.5614967860793401, + "grad_norm": 914.3416748046875, + "learning_rate": 1.958673544730345e-05, + "loss": 69.0419, + "step": 138980 + }, + { + "epoch": 0.5615371873447076, + "grad_norm": 1759.2318115234375, + "learning_rate": 1.9583943520762542e-05, + "loss": 86.9081, + "step": 138990 + }, + { + "epoch": 0.5615775886100752, + "grad_norm": 2400.169677734375, + "learning_rate": 1.9581151602332865e-05, + "loss": 99.2934, + "step": 139000 + }, + { + "epoch": 0.5616179898754429, + "grad_norm": 483.8739318847656, + "learning_rate": 1.957835969206885e-05, + "loss": 66.7319, + "step": 139010 + }, + { + "epoch": 0.5616583911408105, + "grad_norm": 877.3455200195312, + "learning_rate": 1.9575567790024935e-05, + "loss": 55.6987, + "step": 139020 + }, + { + "epoch": 0.5616987924061781, + "grad_norm": 283.93255615234375, + "learning_rate": 1.957277589625554e-05, + "loss": 96.261, + "step": 139030 + }, + { + "epoch": 0.5617391936715458, + "grad_norm": 771.5919189453125, + "learning_rate": 1.9569984010815107e-05, + "loss": 102.1399, + "step": 139040 + }, + { + "epoch": 0.5617795949369134, + "grad_norm": 422.96270751953125, + "learning_rate": 1.9567192133758045e-05, + "loss": 93.1836, + "step": 139050 + }, + { + "epoch": 0.5618199962022811, + "grad_norm": 473.31243896484375, + "learning_rate": 1.9564400265138804e-05, + "loss": 58.8977, + "step": 139060 + }, + { + "epoch": 0.5618603974676487, + "grad_norm": 572.6597290039062, + "learning_rate": 1.9561608405011803e-05, + "loss": 57.0641, + "step": 139070 + }, + { + "epoch": 0.5619007987330163, + "grad_norm": 715.2557373046875, + "learning_rate": 1.9558816553431467e-05, + "loss": 71.2273, + "step": 139080 + }, + { + "epoch": 0.561941199998384, + "grad_norm": 598.4008178710938, + "learning_rate": 1.9556024710452235e-05, + "loss": 101.2396, + "step": 139090 + }, + { + "epoch": 0.5619816012637516, + "grad_norm": 653.1583251953125, + "learning_rate": 1.955323287612853e-05, + "loss": 75.3337, + "step": 139100 + }, + { + "epoch": 0.5620220025291192, + "grad_norm": 424.2027587890625, + "learning_rate": 1.9550441050514775e-05, + "loss": 80.4858, + "step": 139110 + }, + { + "epoch": 0.5620624037944868, + "grad_norm": 822.8784790039062, + "learning_rate": 1.9547649233665412e-05, + "loss": 96.0229, + "step": 139120 + }, + { + "epoch": 0.5621028050598544, + "grad_norm": 703.2672119140625, + "learning_rate": 1.9544857425634848e-05, + "loss": 59.7413, + "step": 139130 + }, + { + "epoch": 0.5621432063252221, + "grad_norm": 474.5668640136719, + "learning_rate": 1.9542065626477533e-05, + "loss": 84.3597, + "step": 139140 + }, + { + "epoch": 0.5621836075905897, + "grad_norm": 652.31494140625, + "learning_rate": 1.953927383624788e-05, + "loss": 85.1592, + "step": 139150 + }, + { + "epoch": 0.5622240088559574, + "grad_norm": 770.6931762695312, + "learning_rate": 1.9536482055000315e-05, + "loss": 102.5341, + "step": 139160 + }, + { + "epoch": 0.562264410121325, + "grad_norm": 358.19140625, + "learning_rate": 1.953369028278928e-05, + "loss": 109.9257, + "step": 139170 + }, + { + "epoch": 0.5623048113866926, + "grad_norm": 681.1283569335938, + "learning_rate": 1.953089851966919e-05, + "loss": 76.3596, + "step": 139180 + }, + { + "epoch": 0.5623452126520603, + "grad_norm": 763.1312255859375, + "learning_rate": 1.9528106765694477e-05, + "loss": 93.1702, + "step": 139190 + }, + { + "epoch": 0.5623856139174279, + "grad_norm": 603.0381469726562, + "learning_rate": 1.9525315020919568e-05, + "loss": 113.3273, + "step": 139200 + }, + { + "epoch": 0.5624260151827956, + "grad_norm": 739.3974609375, + "learning_rate": 1.9522523285398872e-05, + "loss": 82.8822, + "step": 139210 + }, + { + "epoch": 0.5624664164481632, + "grad_norm": 1459.6053466796875, + "learning_rate": 1.9519731559186846e-05, + "loss": 62.3671, + "step": 139220 + }, + { + "epoch": 0.5625068177135308, + "grad_norm": 1146.0098876953125, + "learning_rate": 1.9516939842337897e-05, + "loss": 81.1082, + "step": 139230 + }, + { + "epoch": 0.5625472189788984, + "grad_norm": 918.2079467773438, + "learning_rate": 1.9514148134906448e-05, + "loss": 61.855, + "step": 139240 + }, + { + "epoch": 0.562587620244266, + "grad_norm": 519.98974609375, + "learning_rate": 1.951135643694694e-05, + "loss": 77.4491, + "step": 139250 + }, + { + "epoch": 0.5626280215096336, + "grad_norm": 740.7540893554688, + "learning_rate": 1.9508564748513786e-05, + "loss": 84.18, + "step": 139260 + }, + { + "epoch": 0.5626684227750013, + "grad_norm": 694.7831420898438, + "learning_rate": 1.9505773069661414e-05, + "loss": 72.1186, + "step": 139270 + }, + { + "epoch": 0.5627088240403689, + "grad_norm": 605.42724609375, + "learning_rate": 1.9502981400444256e-05, + "loss": 47.0672, + "step": 139280 + }, + { + "epoch": 0.5627492253057366, + "grad_norm": 532.3679809570312, + "learning_rate": 1.950018974091672e-05, + "loss": 88.1235, + "step": 139290 + }, + { + "epoch": 0.5627896265711042, + "grad_norm": 413.73760986328125, + "learning_rate": 1.9497398091133253e-05, + "loss": 62.0585, + "step": 139300 + }, + { + "epoch": 0.5628300278364718, + "grad_norm": 657.9105834960938, + "learning_rate": 1.949460645114827e-05, + "loss": 70.2252, + "step": 139310 + }, + { + "epoch": 0.5628704291018395, + "grad_norm": 647.60546875, + "learning_rate": 1.949181482101618e-05, + "loss": 65.5372, + "step": 139320 + }, + { + "epoch": 0.5629108303672071, + "grad_norm": 436.1578674316406, + "learning_rate": 1.9489023200791436e-05, + "loss": 77.5113, + "step": 139330 + }, + { + "epoch": 0.5629512316325748, + "grad_norm": 1117.3719482421875, + "learning_rate": 1.9486231590528445e-05, + "loss": 90.0521, + "step": 139340 + }, + { + "epoch": 0.5629916328979424, + "grad_norm": 715.5693969726562, + "learning_rate": 1.948343999028163e-05, + "loss": 110.142, + "step": 139350 + }, + { + "epoch": 0.56303203416331, + "grad_norm": 283.5367431640625, + "learning_rate": 1.948064840010542e-05, + "loss": 102.7243, + "step": 139360 + }, + { + "epoch": 0.5630724354286776, + "grad_norm": 567.209716796875, + "learning_rate": 1.9477856820054238e-05, + "loss": 76.5241, + "step": 139370 + }, + { + "epoch": 0.5631128366940452, + "grad_norm": 749.0012817382812, + "learning_rate": 1.9475065250182508e-05, + "loss": 69.7652, + "step": 139380 + }, + { + "epoch": 0.5631532379594129, + "grad_norm": 591.6264038085938, + "learning_rate": 1.947227369054465e-05, + "loss": 96.9027, + "step": 139390 + }, + { + "epoch": 0.5631936392247805, + "grad_norm": 374.5245666503906, + "learning_rate": 1.946948214119508e-05, + "loss": 68.8678, + "step": 139400 + }, + { + "epoch": 0.5632340404901481, + "grad_norm": 661.9298706054688, + "learning_rate": 1.946669060218824e-05, + "loss": 62.4733, + "step": 139410 + }, + { + "epoch": 0.5632744417555158, + "grad_norm": 632.8101806640625, + "learning_rate": 1.946389907357854e-05, + "loss": 103.8394, + "step": 139420 + }, + { + "epoch": 0.5633148430208834, + "grad_norm": 225.4717254638672, + "learning_rate": 1.9461107555420393e-05, + "loss": 61.9016, + "step": 139430 + }, + { + "epoch": 0.5633552442862511, + "grad_norm": 567.6002807617188, + "learning_rate": 1.9458316047768244e-05, + "loss": 81.1335, + "step": 139440 + }, + { + "epoch": 0.5633956455516187, + "grad_norm": 859.3453369140625, + "learning_rate": 1.945552455067649e-05, + "loss": 68.1027, + "step": 139450 + }, + { + "epoch": 0.5634360468169863, + "grad_norm": 548.083251953125, + "learning_rate": 1.945273306419958e-05, + "loss": 71.4762, + "step": 139460 + }, + { + "epoch": 0.563476448082354, + "grad_norm": 747.2737426757812, + "learning_rate": 1.9449941588391915e-05, + "loss": 70.2563, + "step": 139470 + }, + { + "epoch": 0.5635168493477216, + "grad_norm": 768.03076171875, + "learning_rate": 1.9447150123307915e-05, + "loss": 52.2655, + "step": 139480 + }, + { + "epoch": 0.5635572506130893, + "grad_norm": 432.1719970703125, + "learning_rate": 1.9444358669002016e-05, + "loss": 74.5828, + "step": 139490 + }, + { + "epoch": 0.5635976518784568, + "grad_norm": 643.6837768554688, + "learning_rate": 1.9441567225528628e-05, + "loss": 73.4758, + "step": 139500 + }, + { + "epoch": 0.5636380531438244, + "grad_norm": 668.2734985351562, + "learning_rate": 1.9438775792942168e-05, + "loss": 58.3996, + "step": 139510 + }, + { + "epoch": 0.5636784544091921, + "grad_norm": 873.0882568359375, + "learning_rate": 1.9435984371297073e-05, + "loss": 53.7582, + "step": 139520 + }, + { + "epoch": 0.5637188556745597, + "grad_norm": 711.1632080078125, + "learning_rate": 1.943319296064775e-05, + "loss": 51.5281, + "step": 139530 + }, + { + "epoch": 0.5637592569399273, + "grad_norm": 378.937744140625, + "learning_rate": 1.9430401561048623e-05, + "loss": 78.5477, + "step": 139540 + }, + { + "epoch": 0.563799658205295, + "grad_norm": 672.2481689453125, + "learning_rate": 1.942761017255411e-05, + "loss": 66.3423, + "step": 139550 + }, + { + "epoch": 0.5638400594706626, + "grad_norm": 649.1395874023438, + "learning_rate": 1.9424818795218624e-05, + "loss": 63.1461, + "step": 139560 + }, + { + "epoch": 0.5638804607360303, + "grad_norm": 768.9603271484375, + "learning_rate": 1.94220274290966e-05, + "loss": 69.8017, + "step": 139570 + }, + { + "epoch": 0.5639208620013979, + "grad_norm": 993.9566040039062, + "learning_rate": 1.9419236074242445e-05, + "loss": 81.8207, + "step": 139580 + }, + { + "epoch": 0.5639612632667655, + "grad_norm": 522.7901611328125, + "learning_rate": 1.9416444730710583e-05, + "loss": 72.2255, + "step": 139590 + }, + { + "epoch": 0.5640016645321332, + "grad_norm": 609.2791748046875, + "learning_rate": 1.9413653398555437e-05, + "loss": 66.2507, + "step": 139600 + }, + { + "epoch": 0.5640420657975008, + "grad_norm": 871.163330078125, + "learning_rate": 1.9410862077831415e-05, + "loss": 79.5609, + "step": 139610 + }, + { + "epoch": 0.5640824670628685, + "grad_norm": 519.2615356445312, + "learning_rate": 1.9408070768592942e-05, + "loss": 61.4586, + "step": 139620 + }, + { + "epoch": 0.564122868328236, + "grad_norm": 222.0197296142578, + "learning_rate": 1.940527947089443e-05, + "loss": 69.4452, + "step": 139630 + }, + { + "epoch": 0.5641632695936036, + "grad_norm": 518.34619140625, + "learning_rate": 1.9402488184790303e-05, + "loss": 77.2622, + "step": 139640 + }, + { + "epoch": 0.5642036708589713, + "grad_norm": 1244.0399169921875, + "learning_rate": 1.9399696910334978e-05, + "loss": 81.8412, + "step": 139650 + }, + { + "epoch": 0.5642440721243389, + "grad_norm": 630.2066040039062, + "learning_rate": 1.9396905647582866e-05, + "loss": 131.4054, + "step": 139660 + }, + { + "epoch": 0.5642844733897066, + "grad_norm": 1248.0445556640625, + "learning_rate": 1.939411439658839e-05, + "loss": 88.4464, + "step": 139670 + }, + { + "epoch": 0.5643248746550742, + "grad_norm": 339.4172058105469, + "learning_rate": 1.9391323157405974e-05, + "loss": 63.4698, + "step": 139680 + }, + { + "epoch": 0.5643652759204418, + "grad_norm": 761.1675415039062, + "learning_rate": 1.9388531930090014e-05, + "loss": 66.5725, + "step": 139690 + }, + { + "epoch": 0.5644056771858095, + "grad_norm": 1023.5862426757812, + "learning_rate": 1.9385740714694948e-05, + "loss": 71.9788, + "step": 139700 + }, + { + "epoch": 0.5644460784511771, + "grad_norm": 734.5411987304688, + "learning_rate": 1.9382949511275183e-05, + "loss": 70.1141, + "step": 139710 + }, + { + "epoch": 0.5644864797165448, + "grad_norm": 781.5546875, + "learning_rate": 1.9380158319885133e-05, + "loss": 147.3658, + "step": 139720 + }, + { + "epoch": 0.5645268809819124, + "grad_norm": 630.5901489257812, + "learning_rate": 1.9377367140579214e-05, + "loss": 54.7518, + "step": 139730 + }, + { + "epoch": 0.56456728224728, + "grad_norm": 1523.341552734375, + "learning_rate": 1.937457597341184e-05, + "loss": 100.7228, + "step": 139740 + }, + { + "epoch": 0.5646076835126476, + "grad_norm": 321.45458984375, + "learning_rate": 1.9371784818437436e-05, + "loss": 75.7267, + "step": 139750 + }, + { + "epoch": 0.5646480847780152, + "grad_norm": 601.003173828125, + "learning_rate": 1.936899367571041e-05, + "loss": 122.6097, + "step": 139760 + }, + { + "epoch": 0.5646884860433828, + "grad_norm": 780.8973999023438, + "learning_rate": 1.9366202545285168e-05, + "loss": 82.5952, + "step": 139770 + }, + { + "epoch": 0.5647288873087505, + "grad_norm": 693.6842651367188, + "learning_rate": 1.9363411427216146e-05, + "loss": 92.8148, + "step": 139780 + }, + { + "epoch": 0.5647692885741181, + "grad_norm": 561.1166381835938, + "learning_rate": 1.9360620321557742e-05, + "loss": 60.8503, + "step": 139790 + }, + { + "epoch": 0.5648096898394858, + "grad_norm": 322.07318115234375, + "learning_rate": 1.935782922836437e-05, + "loss": 82.6803, + "step": 139800 + }, + { + "epoch": 0.5648500911048534, + "grad_norm": 965.0286865234375, + "learning_rate": 1.9355038147690454e-05, + "loss": 98.778, + "step": 139810 + }, + { + "epoch": 0.564890492370221, + "grad_norm": 864.2620239257812, + "learning_rate": 1.9352247079590393e-05, + "loss": 119.2392, + "step": 139820 + }, + { + "epoch": 0.5649308936355887, + "grad_norm": 818.953369140625, + "learning_rate": 1.9349456024118617e-05, + "loss": 73.2691, + "step": 139830 + }, + { + "epoch": 0.5649712949009563, + "grad_norm": 670.4515991210938, + "learning_rate": 1.934666498132953e-05, + "loss": 90.3002, + "step": 139840 + }, + { + "epoch": 0.565011696166324, + "grad_norm": 1309.2353515625, + "learning_rate": 1.9343873951277535e-05, + "loss": 83.5955, + "step": 139850 + }, + { + "epoch": 0.5650520974316916, + "grad_norm": 632.1061401367188, + "learning_rate": 1.934108293401707e-05, + "loss": 70.4699, + "step": 139860 + }, + { + "epoch": 0.5650924986970592, + "grad_norm": 366.16326904296875, + "learning_rate": 1.9338291929602525e-05, + "loss": 68.3254, + "step": 139870 + }, + { + "epoch": 0.5651328999624268, + "grad_norm": 810.3923950195312, + "learning_rate": 1.933550093808832e-05, + "loss": 84.1174, + "step": 139880 + }, + { + "epoch": 0.5651733012277944, + "grad_norm": 1013.7683715820312, + "learning_rate": 1.9332709959528867e-05, + "loss": 81.0742, + "step": 139890 + }, + { + "epoch": 0.565213702493162, + "grad_norm": 545.1220092773438, + "learning_rate": 1.9329918993978573e-05, + "loss": 81.2843, + "step": 139900 + }, + { + "epoch": 0.5652541037585297, + "grad_norm": 595.3439331054688, + "learning_rate": 1.9327128041491855e-05, + "loss": 64.5413, + "step": 139910 + }, + { + "epoch": 0.5652945050238973, + "grad_norm": 542.1369018554688, + "learning_rate": 1.932433710212313e-05, + "loss": 72.7845, + "step": 139920 + }, + { + "epoch": 0.565334906289265, + "grad_norm": 864.304443359375, + "learning_rate": 1.9321546175926785e-05, + "loss": 81.1065, + "step": 139930 + }, + { + "epoch": 0.5653753075546326, + "grad_norm": 749.0389404296875, + "learning_rate": 1.931875526295726e-05, + "loss": 47.2802, + "step": 139940 + }, + { + "epoch": 0.5654157088200003, + "grad_norm": 553.1068725585938, + "learning_rate": 1.9315964363268952e-05, + "loss": 70.4722, + "step": 139950 + }, + { + "epoch": 0.5654561100853679, + "grad_norm": 816.0656127929688, + "learning_rate": 1.9313173476916266e-05, + "loss": 77.3222, + "step": 139960 + }, + { + "epoch": 0.5654965113507355, + "grad_norm": 802.2261962890625, + "learning_rate": 1.9310382603953615e-05, + "loss": 59.1459, + "step": 139970 + }, + { + "epoch": 0.5655369126161032, + "grad_norm": 494.85791015625, + "learning_rate": 1.930759174443541e-05, + "loss": 55.7241, + "step": 139980 + }, + { + "epoch": 0.5655773138814708, + "grad_norm": 463.2839660644531, + "learning_rate": 1.930480089841607e-05, + "loss": 58.3213, + "step": 139990 + }, + { + "epoch": 0.5656177151468385, + "grad_norm": 1272.323486328125, + "learning_rate": 1.930201006594999e-05, + "loss": 60.0281, + "step": 140000 + }, + { + "epoch": 0.565658116412206, + "grad_norm": 938.238525390625, + "learning_rate": 1.929921924709157e-05, + "loss": 95.0896, + "step": 140010 + }, + { + "epoch": 0.5656985176775736, + "grad_norm": 520.6534423828125, + "learning_rate": 1.9296428441895246e-05, + "loss": 82.2328, + "step": 140020 + }, + { + "epoch": 0.5657389189429413, + "grad_norm": 614.57666015625, + "learning_rate": 1.929363765041541e-05, + "loss": 55.0934, + "step": 140030 + }, + { + "epoch": 0.5657793202083089, + "grad_norm": 868.5747680664062, + "learning_rate": 1.9290846872706466e-05, + "loss": 58.8171, + "step": 140040 + }, + { + "epoch": 0.5658197214736765, + "grad_norm": 1543.462158203125, + "learning_rate": 1.928805610882283e-05, + "loss": 89.0703, + "step": 140050 + }, + { + "epoch": 0.5658601227390442, + "grad_norm": 878.2360229492188, + "learning_rate": 1.928526535881891e-05, + "loss": 78.1481, + "step": 140060 + }, + { + "epoch": 0.5659005240044118, + "grad_norm": 900.33056640625, + "learning_rate": 1.928247462274911e-05, + "loss": 76.0071, + "step": 140070 + }, + { + "epoch": 0.5659409252697795, + "grad_norm": 1182.1761474609375, + "learning_rate": 1.9279683900667837e-05, + "loss": 87.4837, + "step": 140080 + }, + { + "epoch": 0.5659813265351471, + "grad_norm": 729.5367431640625, + "learning_rate": 1.9276893192629488e-05, + "loss": 66.2515, + "step": 140090 + }, + { + "epoch": 0.5660217278005147, + "grad_norm": 769.9494018554688, + "learning_rate": 1.9274102498688487e-05, + "loss": 66.1782, + "step": 140100 + }, + { + "epoch": 0.5660621290658824, + "grad_norm": 609.9633178710938, + "learning_rate": 1.9271311818899222e-05, + "loss": 59.1239, + "step": 140110 + }, + { + "epoch": 0.56610253033125, + "grad_norm": 730.2531127929688, + "learning_rate": 1.926852115331612e-05, + "loss": 77.4415, + "step": 140120 + }, + { + "epoch": 0.5661429315966177, + "grad_norm": 875.6709594726562, + "learning_rate": 1.9265730501993574e-05, + "loss": 61.3772, + "step": 140130 + }, + { + "epoch": 0.5661833328619852, + "grad_norm": 1125.5684814453125, + "learning_rate": 1.9262939864985985e-05, + "loss": 143.7284, + "step": 140140 + }, + { + "epoch": 0.5662237341273528, + "grad_norm": 562.5559692382812, + "learning_rate": 1.9260149242347764e-05, + "loss": 85.7488, + "step": 140150 + }, + { + "epoch": 0.5662641353927205, + "grad_norm": 190.2505340576172, + "learning_rate": 1.9257358634133318e-05, + "loss": 56.7094, + "step": 140160 + }, + { + "epoch": 0.5663045366580881, + "grad_norm": 832.9662475585938, + "learning_rate": 1.925456804039704e-05, + "loss": 101.4851, + "step": 140170 + }, + { + "epoch": 0.5663449379234557, + "grad_norm": 825.4749755859375, + "learning_rate": 1.9251777461193347e-05, + "loss": 57.5899, + "step": 140180 + }, + { + "epoch": 0.5663853391888234, + "grad_norm": 615.4243774414062, + "learning_rate": 1.9248986896576633e-05, + "loss": 95.5183, + "step": 140190 + }, + { + "epoch": 0.566425740454191, + "grad_norm": 1180.0860595703125, + "learning_rate": 1.924619634660131e-05, + "loss": 94.7859, + "step": 140200 + }, + { + "epoch": 0.5664661417195587, + "grad_norm": 872.8829345703125, + "learning_rate": 1.924340581132178e-05, + "loss": 62.5829, + "step": 140210 + }, + { + "epoch": 0.5665065429849263, + "grad_norm": 543.8487548828125, + "learning_rate": 1.924061529079244e-05, + "loss": 60.7535, + "step": 140220 + }, + { + "epoch": 0.566546944250294, + "grad_norm": 565.9086303710938, + "learning_rate": 1.92378247850677e-05, + "loss": 76.0854, + "step": 140230 + }, + { + "epoch": 0.5665873455156616, + "grad_norm": 348.22943115234375, + "learning_rate": 1.9235034294201954e-05, + "loss": 51.2211, + "step": 140240 + }, + { + "epoch": 0.5666277467810292, + "grad_norm": 328.1255798339844, + "learning_rate": 1.9232243818249602e-05, + "loss": 59.3953, + "step": 140250 + }, + { + "epoch": 0.5666681480463968, + "grad_norm": 396.63641357421875, + "learning_rate": 1.922945335726506e-05, + "loss": 70.6812, + "step": 140260 + }, + { + "epoch": 0.5667085493117644, + "grad_norm": 464.57318115234375, + "learning_rate": 1.9226662911302717e-05, + "loss": 77.4287, + "step": 140270 + }, + { + "epoch": 0.566748950577132, + "grad_norm": 1355.751953125, + "learning_rate": 1.9223872480416983e-05, + "loss": 76.365, + "step": 140280 + }, + { + "epoch": 0.5667893518424997, + "grad_norm": 523.15625, + "learning_rate": 1.9221082064662257e-05, + "loss": 68.5946, + "step": 140290 + }, + { + "epoch": 0.5668297531078673, + "grad_norm": 4106.6201171875, + "learning_rate": 1.921829166409293e-05, + "loss": 106.9751, + "step": 140300 + }, + { + "epoch": 0.566870154373235, + "grad_norm": 300.9256896972656, + "learning_rate": 1.9215501278763414e-05, + "loss": 51.9378, + "step": 140310 + }, + { + "epoch": 0.5669105556386026, + "grad_norm": 589.6752319335938, + "learning_rate": 1.9212710908728104e-05, + "loss": 47.6711, + "step": 140320 + }, + { + "epoch": 0.5669509569039702, + "grad_norm": 2056.473388671875, + "learning_rate": 1.92099205540414e-05, + "loss": 96.5298, + "step": 140330 + }, + { + "epoch": 0.5669913581693379, + "grad_norm": 550.0347290039062, + "learning_rate": 1.9207130214757702e-05, + "loss": 83.4125, + "step": 140340 + }, + { + "epoch": 0.5670317594347055, + "grad_norm": 328.50140380859375, + "learning_rate": 1.9204339890931404e-05, + "loss": 107.9342, + "step": 140350 + }, + { + "epoch": 0.5670721607000732, + "grad_norm": 913.03955078125, + "learning_rate": 1.9201549582616915e-05, + "loss": 76.0977, + "step": 140360 + }, + { + "epoch": 0.5671125619654408, + "grad_norm": 729.4290161132812, + "learning_rate": 1.9198759289868628e-05, + "loss": 94.8281, + "step": 140370 + }, + { + "epoch": 0.5671529632308084, + "grad_norm": 872.1318969726562, + "learning_rate": 1.9195969012740933e-05, + "loss": 100.173, + "step": 140380 + }, + { + "epoch": 0.567193364496176, + "grad_norm": 850.9976196289062, + "learning_rate": 1.9193178751288247e-05, + "loss": 63.6978, + "step": 140390 + }, + { + "epoch": 0.5672337657615436, + "grad_norm": 621.26220703125, + "learning_rate": 1.9190388505564953e-05, + "loss": 78.2141, + "step": 140400 + }, + { + "epoch": 0.5672741670269112, + "grad_norm": 532.1051635742188, + "learning_rate": 1.9187598275625453e-05, + "loss": 67.2587, + "step": 140410 + }, + { + "epoch": 0.5673145682922789, + "grad_norm": 440.6077880859375, + "learning_rate": 1.918480806152414e-05, + "loss": 70.1051, + "step": 140420 + }, + { + "epoch": 0.5673549695576465, + "grad_norm": 571.1215209960938, + "learning_rate": 1.918201786331541e-05, + "loss": 58.7764, + "step": 140430 + }, + { + "epoch": 0.5673953708230142, + "grad_norm": 717.0340576171875, + "learning_rate": 1.9179227681053668e-05, + "loss": 102.2777, + "step": 140440 + }, + { + "epoch": 0.5674357720883818, + "grad_norm": 746.2987060546875, + "learning_rate": 1.917643751479331e-05, + "loss": 76.0516, + "step": 140450 + }, + { + "epoch": 0.5674761733537494, + "grad_norm": 1066.9892578125, + "learning_rate": 1.9173647364588714e-05, + "loss": 101.5061, + "step": 140460 + }, + { + "epoch": 0.5675165746191171, + "grad_norm": 836.2106323242188, + "learning_rate": 1.9170857230494296e-05, + "loss": 105.4848, + "step": 140470 + }, + { + "epoch": 0.5675569758844847, + "grad_norm": 590.7633666992188, + "learning_rate": 1.9168067112564445e-05, + "loss": 56.7123, + "step": 140480 + }, + { + "epoch": 0.5675973771498524, + "grad_norm": 784.605224609375, + "learning_rate": 1.9165277010853553e-05, + "loss": 71.9085, + "step": 140490 + }, + { + "epoch": 0.56763777841522, + "grad_norm": 330.14581298828125, + "learning_rate": 1.9162486925416014e-05, + "loss": 43.5335, + "step": 140500 + }, + { + "epoch": 0.5676781796805876, + "grad_norm": 711.858642578125, + "learning_rate": 1.9159696856306214e-05, + "loss": 95.8422, + "step": 140510 + }, + { + "epoch": 0.5677185809459552, + "grad_norm": 987.791748046875, + "learning_rate": 1.915690680357857e-05, + "loss": 76.2727, + "step": 140520 + }, + { + "epoch": 0.5677589822113228, + "grad_norm": 551.744873046875, + "learning_rate": 1.9154116767287462e-05, + "loss": 57.829, + "step": 140530 + }, + { + "epoch": 0.5677993834766905, + "grad_norm": 2740.55859375, + "learning_rate": 1.9151326747487272e-05, + "loss": 95.6681, + "step": 140540 + }, + { + "epoch": 0.5678397847420581, + "grad_norm": 1297.212646484375, + "learning_rate": 1.914853674423241e-05, + "loss": 71.396, + "step": 140550 + }, + { + "epoch": 0.5678801860074257, + "grad_norm": 665.0545654296875, + "learning_rate": 1.9145746757577267e-05, + "loss": 62.1481, + "step": 140560 + }, + { + "epoch": 0.5679205872727934, + "grad_norm": 770.4341430664062, + "learning_rate": 1.9142956787576224e-05, + "loss": 68.2378, + "step": 140570 + }, + { + "epoch": 0.567960988538161, + "grad_norm": 509.6136474609375, + "learning_rate": 1.9140166834283686e-05, + "loss": 59.2111, + "step": 140580 + }, + { + "epoch": 0.5680013898035287, + "grad_norm": 715.39111328125, + "learning_rate": 1.913737689775403e-05, + "loss": 74.0246, + "step": 140590 + }, + { + "epoch": 0.5680417910688963, + "grad_norm": 628.1365356445312, + "learning_rate": 1.9134586978041663e-05, + "loss": 72.7023, + "step": 140600 + }, + { + "epoch": 0.5680821923342639, + "grad_norm": 435.1589660644531, + "learning_rate": 1.9131797075200968e-05, + "loss": 71.1215, + "step": 140610 + }, + { + "epoch": 0.5681225935996316, + "grad_norm": 439.7783508300781, + "learning_rate": 1.9129007189286326e-05, + "loss": 60.5101, + "step": 140620 + }, + { + "epoch": 0.5681629948649992, + "grad_norm": 729.4354248046875, + "learning_rate": 1.912621732035215e-05, + "loss": 60.9191, + "step": 140630 + }, + { + "epoch": 0.5682033961303669, + "grad_norm": 586.9058837890625, + "learning_rate": 1.912342746845281e-05, + "loss": 93.2181, + "step": 140640 + }, + { + "epoch": 0.5682437973957344, + "grad_norm": 740.5455932617188, + "learning_rate": 1.9120637633642707e-05, + "loss": 93.6703, + "step": 140650 + }, + { + "epoch": 0.568284198661102, + "grad_norm": 187.19801330566406, + "learning_rate": 1.911784781597622e-05, + "loss": 53.4323, + "step": 140660 + }, + { + "epoch": 0.5683245999264697, + "grad_norm": 492.761474609375, + "learning_rate": 1.9115058015507748e-05, + "loss": 68.3847, + "step": 140670 + }, + { + "epoch": 0.5683650011918373, + "grad_norm": 760.5210571289062, + "learning_rate": 1.9112268232291677e-05, + "loss": 100.5512, + "step": 140680 + }, + { + "epoch": 0.568405402457205, + "grad_norm": 494.7441711425781, + "learning_rate": 1.9109478466382396e-05, + "loss": 56.7374, + "step": 140690 + }, + { + "epoch": 0.5684458037225726, + "grad_norm": 640.9754638671875, + "learning_rate": 1.9106688717834282e-05, + "loss": 54.2382, + "step": 140700 + }, + { + "epoch": 0.5684862049879402, + "grad_norm": 743.1951293945312, + "learning_rate": 1.9103898986701738e-05, + "loss": 74.263, + "step": 140710 + }, + { + "epoch": 0.5685266062533079, + "grad_norm": 794.0576171875, + "learning_rate": 1.9101109273039136e-05, + "loss": 55.4171, + "step": 140720 + }, + { + "epoch": 0.5685670075186755, + "grad_norm": 464.277099609375, + "learning_rate": 1.909831957690088e-05, + "loss": 66.6467, + "step": 140730 + }, + { + "epoch": 0.5686074087840431, + "grad_norm": 561.7435913085938, + "learning_rate": 1.909552989834135e-05, + "loss": 59.9699, + "step": 140740 + }, + { + "epoch": 0.5686478100494108, + "grad_norm": 841.3145141601562, + "learning_rate": 1.9092740237414926e-05, + "loss": 52.6747, + "step": 140750 + }, + { + "epoch": 0.5686882113147784, + "grad_norm": 940.7305908203125, + "learning_rate": 1.9089950594176e-05, + "loss": 66.3946, + "step": 140760 + }, + { + "epoch": 0.5687286125801461, + "grad_norm": 409.2223815917969, + "learning_rate": 1.908716096867896e-05, + "loss": 81.2109, + "step": 140770 + }, + { + "epoch": 0.5687690138455136, + "grad_norm": 461.3180236816406, + "learning_rate": 1.9084371360978175e-05, + "loss": 74.7036, + "step": 140780 + }, + { + "epoch": 0.5688094151108812, + "grad_norm": 2577.301025390625, + "learning_rate": 1.908158177112805e-05, + "loss": 81.6021, + "step": 140790 + }, + { + "epoch": 0.5688498163762489, + "grad_norm": 411.2657165527344, + "learning_rate": 1.9078792199182954e-05, + "loss": 78.4196, + "step": 140800 + }, + { + "epoch": 0.5688902176416165, + "grad_norm": 1559.02294921875, + "learning_rate": 1.907600264519729e-05, + "loss": 105.1656, + "step": 140810 + }, + { + "epoch": 0.5689306189069842, + "grad_norm": 1083.457763671875, + "learning_rate": 1.9073213109225425e-05, + "loss": 79.6497, + "step": 140820 + }, + { + "epoch": 0.5689710201723518, + "grad_norm": 430.65899658203125, + "learning_rate": 1.9070423591321743e-05, + "loss": 72.4747, + "step": 140830 + }, + { + "epoch": 0.5690114214377194, + "grad_norm": 608.1416625976562, + "learning_rate": 1.906763409154064e-05, + "loss": 68.2087, + "step": 140840 + }, + { + "epoch": 0.5690518227030871, + "grad_norm": 1325.940185546875, + "learning_rate": 1.9064844609936487e-05, + "loss": 71.1146, + "step": 140850 + }, + { + "epoch": 0.5690922239684547, + "grad_norm": 573.750244140625, + "learning_rate": 1.906205514656366e-05, + "loss": 71.8972, + "step": 140860 + }, + { + "epoch": 0.5691326252338224, + "grad_norm": 637.8819580078125, + "learning_rate": 1.9059265701476563e-05, + "loss": 76.8227, + "step": 140870 + }, + { + "epoch": 0.56917302649919, + "grad_norm": 1142.9871826171875, + "learning_rate": 1.9056476274729557e-05, + "loss": 93.6911, + "step": 140880 + }, + { + "epoch": 0.5692134277645576, + "grad_norm": 869.3905639648438, + "learning_rate": 1.905368686637704e-05, + "loss": 80.9835, + "step": 140890 + }, + { + "epoch": 0.5692538290299252, + "grad_norm": 731.1185302734375, + "learning_rate": 1.9050897476473383e-05, + "loss": 55.1841, + "step": 140900 + }, + { + "epoch": 0.5692942302952928, + "grad_norm": 509.15728759765625, + "learning_rate": 1.9048108105072963e-05, + "loss": 124.9174, + "step": 140910 + }, + { + "epoch": 0.5693346315606604, + "grad_norm": 3296.18505859375, + "learning_rate": 1.9045318752230172e-05, + "loss": 116.5406, + "step": 140920 + }, + { + "epoch": 0.5693750328260281, + "grad_norm": 799.5062255859375, + "learning_rate": 1.9042529417999384e-05, + "loss": 73.3172, + "step": 140930 + }, + { + "epoch": 0.5694154340913957, + "grad_norm": 412.4471435546875, + "learning_rate": 1.9039740102434967e-05, + "loss": 75.7084, + "step": 140940 + }, + { + "epoch": 0.5694558353567634, + "grad_norm": 876.3197021484375, + "learning_rate": 1.9036950805591322e-05, + "loss": 71.4234, + "step": 140950 + }, + { + "epoch": 0.569496236622131, + "grad_norm": 999.04638671875, + "learning_rate": 1.9034161527522807e-05, + "loss": 88.5623, + "step": 140960 + }, + { + "epoch": 0.5695366378874986, + "grad_norm": 556.821044921875, + "learning_rate": 1.903137226828382e-05, + "loss": 149.3803, + "step": 140970 + }, + { + "epoch": 0.5695770391528663, + "grad_norm": 718.3651123046875, + "learning_rate": 1.902858302792873e-05, + "loss": 97.0796, + "step": 140980 + }, + { + "epoch": 0.5696174404182339, + "grad_norm": 810.9666137695312, + "learning_rate": 1.902579380651191e-05, + "loss": 62.1747, + "step": 140990 + }, + { + "epoch": 0.5696578416836016, + "grad_norm": 363.26556396484375, + "learning_rate": 1.9023004604087735e-05, + "loss": 65.932, + "step": 141000 + }, + { + "epoch": 0.5696982429489692, + "grad_norm": 741.3404541015625, + "learning_rate": 1.9020215420710598e-05, + "loss": 71.1488, + "step": 141010 + }, + { + "epoch": 0.5697386442143368, + "grad_norm": 921.1856689453125, + "learning_rate": 1.901742625643486e-05, + "loss": 89.4201, + "step": 141020 + }, + { + "epoch": 0.5697790454797044, + "grad_norm": 1192.3790283203125, + "learning_rate": 1.901463711131491e-05, + "loss": 96.6122, + "step": 141030 + }, + { + "epoch": 0.569819446745072, + "grad_norm": 423.0237121582031, + "learning_rate": 1.9011847985405105e-05, + "loss": 49.6415, + "step": 141040 + }, + { + "epoch": 0.5698598480104397, + "grad_norm": 2430.500244140625, + "learning_rate": 1.9009058878759843e-05, + "loss": 86.1656, + "step": 141050 + }, + { + "epoch": 0.5699002492758073, + "grad_norm": 1206.6846923828125, + "learning_rate": 1.9006269791433488e-05, + "loss": 94.5546, + "step": 141060 + }, + { + "epoch": 0.5699406505411749, + "grad_norm": 344.2526550292969, + "learning_rate": 1.9003480723480402e-05, + "loss": 92.9017, + "step": 141070 + }, + { + "epoch": 0.5699810518065426, + "grad_norm": 555.0518798828125, + "learning_rate": 1.9000691674954987e-05, + "loss": 66.0136, + "step": 141080 + }, + { + "epoch": 0.5700214530719102, + "grad_norm": 677.0184936523438, + "learning_rate": 1.89979026459116e-05, + "loss": 108.7519, + "step": 141090 + }, + { + "epoch": 0.5700618543372779, + "grad_norm": 943.384521484375, + "learning_rate": 1.899511363640461e-05, + "loss": 72.1723, + "step": 141100 + }, + { + "epoch": 0.5701022556026455, + "grad_norm": 589.8657836914062, + "learning_rate": 1.8992324646488404e-05, + "loss": 57.4047, + "step": 141110 + }, + { + "epoch": 0.5701426568680131, + "grad_norm": 1307.1983642578125, + "learning_rate": 1.898953567621734e-05, + "loss": 75.1367, + "step": 141120 + }, + { + "epoch": 0.5701830581333808, + "grad_norm": 626.76416015625, + "learning_rate": 1.8986746725645806e-05, + "loss": 54.1932, + "step": 141130 + }, + { + "epoch": 0.5702234593987484, + "grad_norm": 843.2824096679688, + "learning_rate": 1.8983957794828168e-05, + "loss": 100.1062, + "step": 141140 + }, + { + "epoch": 0.570263860664116, + "grad_norm": 1050.9560546875, + "learning_rate": 1.8981168883818785e-05, + "loss": 74.3145, + "step": 141150 + }, + { + "epoch": 0.5703042619294836, + "grad_norm": 750.2484741210938, + "learning_rate": 1.8978379992672047e-05, + "loss": 69.5943, + "step": 141160 + }, + { + "epoch": 0.5703446631948512, + "grad_norm": 996.4127807617188, + "learning_rate": 1.8975591121442312e-05, + "loss": 51.6315, + "step": 141170 + }, + { + "epoch": 0.5703850644602189, + "grad_norm": 521.7628173828125, + "learning_rate": 1.8972802270183962e-05, + "loss": 105.2739, + "step": 141180 + }, + { + "epoch": 0.5704254657255865, + "grad_norm": 749.8202514648438, + "learning_rate": 1.8970013438951364e-05, + "loss": 70.0684, + "step": 141190 + }, + { + "epoch": 0.5704658669909541, + "grad_norm": 523.2435302734375, + "learning_rate": 1.896722462779887e-05, + "loss": 60.1772, + "step": 141200 + }, + { + "epoch": 0.5705062682563218, + "grad_norm": 807.6856079101562, + "learning_rate": 1.8964435836780874e-05, + "loss": 97.0728, + "step": 141210 + }, + { + "epoch": 0.5705466695216894, + "grad_norm": 845.9319458007812, + "learning_rate": 1.8961647065951734e-05, + "loss": 82.9636, + "step": 141220 + }, + { + "epoch": 0.5705870707870571, + "grad_norm": 815.2348022460938, + "learning_rate": 1.895885831536581e-05, + "loss": 98.3061, + "step": 141230 + }, + { + "epoch": 0.5706274720524247, + "grad_norm": 1186.4349365234375, + "learning_rate": 1.895606958507749e-05, + "loss": 85.0517, + "step": 141240 + }, + { + "epoch": 0.5706678733177923, + "grad_norm": 678.8312377929688, + "learning_rate": 1.8953280875141125e-05, + "loss": 63.8409, + "step": 141250 + }, + { + "epoch": 0.57070827458316, + "grad_norm": 773.8411254882812, + "learning_rate": 1.8950492185611094e-05, + "loss": 68.1726, + "step": 141260 + }, + { + "epoch": 0.5707486758485276, + "grad_norm": 585.3635864257812, + "learning_rate": 1.8947703516541755e-05, + "loss": 94.5351, + "step": 141270 + }, + { + "epoch": 0.5707890771138953, + "grad_norm": 541.0792236328125, + "learning_rate": 1.8944914867987472e-05, + "loss": 61.6232, + "step": 141280 + }, + { + "epoch": 0.5708294783792628, + "grad_norm": 899.8917236328125, + "learning_rate": 1.8942126240002626e-05, + "loss": 99.766, + "step": 141290 + }, + { + "epoch": 0.5708698796446304, + "grad_norm": 632.0695190429688, + "learning_rate": 1.893933763264157e-05, + "loss": 110.1575, + "step": 141300 + }, + { + "epoch": 0.5709102809099981, + "grad_norm": 538.776123046875, + "learning_rate": 1.8936549045958667e-05, + "loss": 47.7108, + "step": 141310 + }, + { + "epoch": 0.5709506821753657, + "grad_norm": 934.9037475585938, + "learning_rate": 1.8933760480008298e-05, + "loss": 85.4891, + "step": 141320 + }, + { + "epoch": 0.5709910834407333, + "grad_norm": 804.5138549804688, + "learning_rate": 1.893097193484481e-05, + "loss": 56.2758, + "step": 141330 + }, + { + "epoch": 0.571031484706101, + "grad_norm": 550.3965454101562, + "learning_rate": 1.8928183410522574e-05, + "loss": 83.7936, + "step": 141340 + }, + { + "epoch": 0.5710718859714686, + "grad_norm": 559.5126953125, + "learning_rate": 1.892539490709596e-05, + "loss": 63.421, + "step": 141350 + }, + { + "epoch": 0.5711122872368363, + "grad_norm": 674.39306640625, + "learning_rate": 1.892260642461932e-05, + "loss": 49.8573, + "step": 141360 + }, + { + "epoch": 0.5711526885022039, + "grad_norm": 2349.88623046875, + "learning_rate": 1.891981796314703e-05, + "loss": 99.444, + "step": 141370 + }, + { + "epoch": 0.5711930897675715, + "grad_norm": 1019.6990356445312, + "learning_rate": 1.8917029522733442e-05, + "loss": 69.9275, + "step": 141380 + }, + { + "epoch": 0.5712334910329392, + "grad_norm": 510.0473327636719, + "learning_rate": 1.8914241103432912e-05, + "loss": 77.0989, + "step": 141390 + }, + { + "epoch": 0.5712738922983068, + "grad_norm": 771.1107788085938, + "learning_rate": 1.891145270529982e-05, + "loss": 45.6757, + "step": 141400 + }, + { + "epoch": 0.5713142935636745, + "grad_norm": 709.2401733398438, + "learning_rate": 1.890866432838851e-05, + "loss": 79.5347, + "step": 141410 + }, + { + "epoch": 0.571354694829042, + "grad_norm": 614.134521484375, + "learning_rate": 1.890587597275336e-05, + "loss": 102.2105, + "step": 141420 + }, + { + "epoch": 0.5713950960944096, + "grad_norm": 782.2831420898438, + "learning_rate": 1.890308763844872e-05, + "loss": 66.5639, + "step": 141430 + }, + { + "epoch": 0.5714354973597773, + "grad_norm": 590.98388671875, + "learning_rate": 1.8900299325528948e-05, + "loss": 86.201, + "step": 141440 + }, + { + "epoch": 0.5714758986251449, + "grad_norm": 2549.42138671875, + "learning_rate": 1.8897511034048414e-05, + "loss": 125.5585, + "step": 141450 + }, + { + "epoch": 0.5715162998905126, + "grad_norm": 657.1383056640625, + "learning_rate": 1.8894722764061463e-05, + "loss": 80.3656, + "step": 141460 + }, + { + "epoch": 0.5715567011558802, + "grad_norm": 573.8141479492188, + "learning_rate": 1.8891934515622458e-05, + "loss": 58.5841, + "step": 141470 + }, + { + "epoch": 0.5715971024212478, + "grad_norm": 892.4143676757812, + "learning_rate": 1.8889146288785764e-05, + "loss": 65.7508, + "step": 141480 + }, + { + "epoch": 0.5716375036866155, + "grad_norm": 1070.6102294921875, + "learning_rate": 1.888635808360573e-05, + "loss": 80.5867, + "step": 141490 + }, + { + "epoch": 0.5716779049519831, + "grad_norm": 331.8288879394531, + "learning_rate": 1.8883569900136724e-05, + "loss": 51.1406, + "step": 141500 + }, + { + "epoch": 0.5717183062173508, + "grad_norm": 359.12701416015625, + "learning_rate": 1.88807817384331e-05, + "loss": 61.9231, + "step": 141510 + }, + { + "epoch": 0.5717587074827184, + "grad_norm": 1121.463623046875, + "learning_rate": 1.8877993598549207e-05, + "loss": 64.7936, + "step": 141520 + }, + { + "epoch": 0.571799108748086, + "grad_norm": 359.6466369628906, + "learning_rate": 1.887520548053941e-05, + "loss": 73.3771, + "step": 141530 + }, + { + "epoch": 0.5718395100134536, + "grad_norm": 1167.7994384765625, + "learning_rate": 1.8872417384458062e-05, + "loss": 77.6793, + "step": 141540 + }, + { + "epoch": 0.5718799112788212, + "grad_norm": 430.7785339355469, + "learning_rate": 1.886962931035951e-05, + "loss": 115.6723, + "step": 141550 + }, + { + "epoch": 0.5719203125441888, + "grad_norm": 687.2074584960938, + "learning_rate": 1.8866841258298126e-05, + "loss": 68.9608, + "step": 141560 + }, + { + "epoch": 0.5719607138095565, + "grad_norm": 1416.7017822265625, + "learning_rate": 1.8864053228328244e-05, + "loss": 61.7161, + "step": 141570 + }, + { + "epoch": 0.5720011150749241, + "grad_norm": 793.7147827148438, + "learning_rate": 1.8861265220504238e-05, + "loss": 73.7493, + "step": 141580 + }, + { + "epoch": 0.5720415163402918, + "grad_norm": 674.9739990234375, + "learning_rate": 1.8858477234880454e-05, + "loss": 70.7115, + "step": 141590 + }, + { + "epoch": 0.5720819176056594, + "grad_norm": 683.7545166015625, + "learning_rate": 1.885568927151124e-05, + "loss": 69.0697, + "step": 141600 + }, + { + "epoch": 0.572122318871027, + "grad_norm": 695.3665771484375, + "learning_rate": 1.8852901330450958e-05, + "loss": 96.6801, + "step": 141610 + }, + { + "epoch": 0.5721627201363947, + "grad_norm": 875.0771484375, + "learning_rate": 1.8850113411753947e-05, + "loss": 59.786, + "step": 141620 + }, + { + "epoch": 0.5722031214017623, + "grad_norm": 1779.846435546875, + "learning_rate": 1.884732551547457e-05, + "loss": 105.6403, + "step": 141630 + }, + { + "epoch": 0.57224352266713, + "grad_norm": 286.70343017578125, + "learning_rate": 1.8844537641667177e-05, + "loss": 66.2363, + "step": 141640 + }, + { + "epoch": 0.5722839239324976, + "grad_norm": 425.2462463378906, + "learning_rate": 1.884174979038611e-05, + "loss": 88.5446, + "step": 141650 + }, + { + "epoch": 0.5723243251978652, + "grad_norm": 621.2692260742188, + "learning_rate": 1.883896196168574e-05, + "loss": 61.4139, + "step": 141660 + }, + { + "epoch": 0.5723647264632328, + "grad_norm": 1243.365234375, + "learning_rate": 1.88361741556204e-05, + "loss": 89.697, + "step": 141670 + }, + { + "epoch": 0.5724051277286004, + "grad_norm": 1119.1502685546875, + "learning_rate": 1.8833386372244434e-05, + "loss": 78.194, + "step": 141680 + }, + { + "epoch": 0.5724455289939681, + "grad_norm": 815.9837036132812, + "learning_rate": 1.8830598611612215e-05, + "loss": 66.6096, + "step": 141690 + }, + { + "epoch": 0.5724859302593357, + "grad_norm": 3106.30224609375, + "learning_rate": 1.882781087377807e-05, + "loss": 124.164, + "step": 141700 + }, + { + "epoch": 0.5725263315247033, + "grad_norm": 769.7230834960938, + "learning_rate": 1.882502315879636e-05, + "loss": 90.7544, + "step": 141710 + }, + { + "epoch": 0.572566732790071, + "grad_norm": 562.7537231445312, + "learning_rate": 1.882223546672143e-05, + "loss": 130.4754, + "step": 141720 + }, + { + "epoch": 0.5726071340554386, + "grad_norm": 1006.8292236328125, + "learning_rate": 1.8819447797607616e-05, + "loss": 97.198, + "step": 141730 + }, + { + "epoch": 0.5726475353208063, + "grad_norm": 431.6899108886719, + "learning_rate": 1.8816660151509284e-05, + "loss": 61.3201, + "step": 141740 + }, + { + "epoch": 0.5726879365861739, + "grad_norm": 626.7484741210938, + "learning_rate": 1.8813872528480772e-05, + "loss": 74.0422, + "step": 141750 + }, + { + "epoch": 0.5727283378515415, + "grad_norm": 820.3584594726562, + "learning_rate": 1.8811084928576416e-05, + "loss": 108.2899, + "step": 141760 + }, + { + "epoch": 0.5727687391169092, + "grad_norm": 936.1619262695312, + "learning_rate": 1.880829735185058e-05, + "loss": 81.5253, + "step": 141770 + }, + { + "epoch": 0.5728091403822768, + "grad_norm": 361.2859802246094, + "learning_rate": 1.88055097983576e-05, + "loss": 86.5118, + "step": 141780 + }, + { + "epoch": 0.5728495416476445, + "grad_norm": 760.9664916992188, + "learning_rate": 1.8802722268151822e-05, + "loss": 59.9073, + "step": 141790 + }, + { + "epoch": 0.572889942913012, + "grad_norm": 1179.9278564453125, + "learning_rate": 1.879993476128759e-05, + "loss": 68.8156, + "step": 141800 + }, + { + "epoch": 0.5729303441783796, + "grad_norm": 1097.7713623046875, + "learning_rate": 1.879714727781924e-05, + "loss": 75.6188, + "step": 141810 + }, + { + "epoch": 0.5729707454437473, + "grad_norm": 690.8434448242188, + "learning_rate": 1.8794359817801134e-05, + "loss": 67.3939, + "step": 141820 + }, + { + "epoch": 0.5730111467091149, + "grad_norm": 1052.337890625, + "learning_rate": 1.87915723812876e-05, + "loss": 95.5038, + "step": 141830 + }, + { + "epoch": 0.5730515479744825, + "grad_norm": 515.8939208984375, + "learning_rate": 1.878878496833298e-05, + "loss": 95.1526, + "step": 141840 + }, + { + "epoch": 0.5730919492398502, + "grad_norm": 750.8078002929688, + "learning_rate": 1.8785997578991625e-05, + "loss": 42.7537, + "step": 141850 + }, + { + "epoch": 0.5731323505052178, + "grad_norm": 776.8842163085938, + "learning_rate": 1.878321021331787e-05, + "loss": 89.8238, + "step": 141860 + }, + { + "epoch": 0.5731727517705855, + "grad_norm": 879.0947875976562, + "learning_rate": 1.8780422871366063e-05, + "loss": 68.4682, + "step": 141870 + }, + { + "epoch": 0.5732131530359531, + "grad_norm": 587.8779296875, + "learning_rate": 1.877763555319054e-05, + "loss": 60.0314, + "step": 141880 + }, + { + "epoch": 0.5732535543013207, + "grad_norm": 262.74395751953125, + "learning_rate": 1.877484825884563e-05, + "loss": 83.2265, + "step": 141890 + }, + { + "epoch": 0.5732939555666884, + "grad_norm": 721.5126953125, + "learning_rate": 1.8772060988385694e-05, + "loss": 47.226, + "step": 141900 + }, + { + "epoch": 0.573334356832056, + "grad_norm": 1016.8367309570312, + "learning_rate": 1.876927374186506e-05, + "loss": 67.9147, + "step": 141910 + }, + { + "epoch": 0.5733747580974237, + "grad_norm": 685.9984741210938, + "learning_rate": 1.8766486519338064e-05, + "loss": 55.4851, + "step": 141920 + }, + { + "epoch": 0.5734151593627912, + "grad_norm": 678.8402099609375, + "learning_rate": 1.8763699320859054e-05, + "loss": 68.2892, + "step": 141930 + }, + { + "epoch": 0.5734555606281588, + "grad_norm": 1346.9200439453125, + "learning_rate": 1.876091214648236e-05, + "loss": 76.7028, + "step": 141940 + }, + { + "epoch": 0.5734959618935265, + "grad_norm": 693.3425903320312, + "learning_rate": 1.8758124996262322e-05, + "loss": 73.7648, + "step": 141950 + }, + { + "epoch": 0.5735363631588941, + "grad_norm": 763.3579711914062, + "learning_rate": 1.8755337870253272e-05, + "loss": 68.7933, + "step": 141960 + }, + { + "epoch": 0.5735767644242618, + "grad_norm": 291.6490173339844, + "learning_rate": 1.8752550768509555e-05, + "loss": 58.9611, + "step": 141970 + }, + { + "epoch": 0.5736171656896294, + "grad_norm": 593.1737060546875, + "learning_rate": 1.8749763691085505e-05, + "loss": 53.2585, + "step": 141980 + }, + { + "epoch": 0.573657566954997, + "grad_norm": 370.7059326171875, + "learning_rate": 1.8746976638035455e-05, + "loss": 59.8078, + "step": 141990 + }, + { + "epoch": 0.5736979682203647, + "grad_norm": 780.2083740234375, + "learning_rate": 1.8744189609413733e-05, + "loss": 65.1724, + "step": 142000 + }, + { + "epoch": 0.5737383694857323, + "grad_norm": 804.9729614257812, + "learning_rate": 1.874140260527469e-05, + "loss": 78.7023, + "step": 142010 + }, + { + "epoch": 0.5737787707511, + "grad_norm": 795.3954467773438, + "learning_rate": 1.873861562567264e-05, + "loss": 73.4646, + "step": 142020 + }, + { + "epoch": 0.5738191720164676, + "grad_norm": 1019.785888671875, + "learning_rate": 1.873582867066194e-05, + "loss": 89.9083, + "step": 142030 + }, + { + "epoch": 0.5738595732818352, + "grad_norm": 919.9898681640625, + "learning_rate": 1.873304174029691e-05, + "loss": 84.7395, + "step": 142040 + }, + { + "epoch": 0.5738999745472029, + "grad_norm": 642.4674682617188, + "learning_rate": 1.873025483463188e-05, + "loss": 117.1326, + "step": 142050 + }, + { + "epoch": 0.5739403758125704, + "grad_norm": 978.3419189453125, + "learning_rate": 1.872746795372119e-05, + "loss": 77.751, + "step": 142060 + }, + { + "epoch": 0.573980777077938, + "grad_norm": 898.3775024414062, + "learning_rate": 1.8724681097619163e-05, + "loss": 77.3829, + "step": 142070 + }, + { + "epoch": 0.5740211783433057, + "grad_norm": 639.0759887695312, + "learning_rate": 1.872189426638013e-05, + "loss": 66.429, + "step": 142080 + }, + { + "epoch": 0.5740615796086733, + "grad_norm": 1034.8231201171875, + "learning_rate": 1.8719107460058435e-05, + "loss": 90.6254, + "step": 142090 + }, + { + "epoch": 0.574101980874041, + "grad_norm": 544.7758178710938, + "learning_rate": 1.871632067870839e-05, + "loss": 71.2211, + "step": 142100 + }, + { + "epoch": 0.5741423821394086, + "grad_norm": 714.7717895507812, + "learning_rate": 1.871353392238434e-05, + "loss": 70.8381, + "step": 142110 + }, + { + "epoch": 0.5741827834047762, + "grad_norm": 1970.2132568359375, + "learning_rate": 1.8710747191140613e-05, + "loss": 121.378, + "step": 142120 + }, + { + "epoch": 0.5742231846701439, + "grad_norm": 824.0568237304688, + "learning_rate": 1.8707960485031528e-05, + "loss": 90.6517, + "step": 142130 + }, + { + "epoch": 0.5742635859355115, + "grad_norm": 808.2223510742188, + "learning_rate": 1.870517380411142e-05, + "loss": 49.366, + "step": 142140 + }, + { + "epoch": 0.5743039872008792, + "grad_norm": 729.1856079101562, + "learning_rate": 1.870238714843461e-05, + "loss": 63.7178, + "step": 142150 + }, + { + "epoch": 0.5743443884662468, + "grad_norm": 607.4038696289062, + "learning_rate": 1.869960051805544e-05, + "loss": 81.9089, + "step": 142160 + }, + { + "epoch": 0.5743847897316144, + "grad_norm": 610.1832885742188, + "learning_rate": 1.869681391302822e-05, + "loss": 74.9509, + "step": 142170 + }, + { + "epoch": 0.574425190996982, + "grad_norm": 721.153564453125, + "learning_rate": 1.8694027333407284e-05, + "loss": 73.0223, + "step": 142180 + }, + { + "epoch": 0.5744655922623496, + "grad_norm": 751.1221313476562, + "learning_rate": 1.869124077924696e-05, + "loss": 98.0287, + "step": 142190 + }, + { + "epoch": 0.5745059935277173, + "grad_norm": 275.489013671875, + "learning_rate": 1.8688454250601575e-05, + "loss": 54.8347, + "step": 142200 + }, + { + "epoch": 0.5745463947930849, + "grad_norm": 1163.93701171875, + "learning_rate": 1.8685667747525444e-05, + "loss": 79.1063, + "step": 142210 + }, + { + "epoch": 0.5745867960584525, + "grad_norm": 676.1515502929688, + "learning_rate": 1.8682881270072903e-05, + "loss": 105.3958, + "step": 142220 + }, + { + "epoch": 0.5746271973238202, + "grad_norm": 985.1818237304688, + "learning_rate": 1.8680094818298257e-05, + "loss": 72.4714, + "step": 142230 + }, + { + "epoch": 0.5746675985891878, + "grad_norm": 651.6032104492188, + "learning_rate": 1.8677308392255852e-05, + "loss": 83.5585, + "step": 142240 + }, + { + "epoch": 0.5747079998545555, + "grad_norm": 921.6570434570312, + "learning_rate": 1.8674521992000003e-05, + "loss": 115.9725, + "step": 142250 + }, + { + "epoch": 0.5747484011199231, + "grad_norm": 386.1569519042969, + "learning_rate": 1.867173561758502e-05, + "loss": 75.3406, + "step": 142260 + }, + { + "epoch": 0.5747888023852907, + "grad_norm": 1255.7938232421875, + "learning_rate": 1.8668949269065244e-05, + "loss": 71.8363, + "step": 142270 + }, + { + "epoch": 0.5748292036506584, + "grad_norm": 828.3392333984375, + "learning_rate": 1.8666162946494985e-05, + "loss": 104.6959, + "step": 142280 + }, + { + "epoch": 0.574869604916026, + "grad_norm": 973.8176879882812, + "learning_rate": 1.8663376649928563e-05, + "loss": 57.6206, + "step": 142290 + }, + { + "epoch": 0.5749100061813937, + "grad_norm": 1101.08154296875, + "learning_rate": 1.8660590379420306e-05, + "loss": 110.4452, + "step": 142300 + }, + { + "epoch": 0.5749504074467612, + "grad_norm": 824.0100708007812, + "learning_rate": 1.8657804135024523e-05, + "loss": 48.7732, + "step": 142310 + }, + { + "epoch": 0.5749908087121288, + "grad_norm": 828.7144775390625, + "learning_rate": 1.8655017916795546e-05, + "loss": 80.8455, + "step": 142320 + }, + { + "epoch": 0.5750312099774965, + "grad_norm": 1066.981689453125, + "learning_rate": 1.8652231724787687e-05, + "loss": 78.6015, + "step": 142330 + }, + { + "epoch": 0.5750716112428641, + "grad_norm": 575.275146484375, + "learning_rate": 1.8649445559055258e-05, + "loss": 105.0819, + "step": 142340 + }, + { + "epoch": 0.5751120125082317, + "grad_norm": 566.4312744140625, + "learning_rate": 1.8646659419652587e-05, + "loss": 54.6703, + "step": 142350 + }, + { + "epoch": 0.5751524137735994, + "grad_norm": 761.6423950195312, + "learning_rate": 1.864387330663399e-05, + "loss": 66.369, + "step": 142360 + }, + { + "epoch": 0.575192815038967, + "grad_norm": 942.744384765625, + "learning_rate": 1.8641087220053774e-05, + "loss": 75.2686, + "step": 142370 + }, + { + "epoch": 0.5752332163043347, + "grad_norm": 651.730224609375, + "learning_rate": 1.863830115996627e-05, + "loss": 104.8366, + "step": 142380 + }, + { + "epoch": 0.5752736175697023, + "grad_norm": 444.5345764160156, + "learning_rate": 1.863551512642578e-05, + "loss": 82.2108, + "step": 142390 + }, + { + "epoch": 0.5753140188350699, + "grad_norm": 821.3854370117188, + "learning_rate": 1.863272911948663e-05, + "loss": 69.7542, + "step": 142400 + }, + { + "epoch": 0.5753544201004376, + "grad_norm": 269.88128662109375, + "learning_rate": 1.862994313920313e-05, + "loss": 66.5986, + "step": 142410 + }, + { + "epoch": 0.5753948213658052, + "grad_norm": 829.3333740234375, + "learning_rate": 1.8627157185629584e-05, + "loss": 72.3048, + "step": 142420 + }, + { + "epoch": 0.5754352226311729, + "grad_norm": 555.0773315429688, + "learning_rate": 1.8624371258820327e-05, + "loss": 66.7821, + "step": 142430 + }, + { + "epoch": 0.5754756238965404, + "grad_norm": 483.8691101074219, + "learning_rate": 1.8621585358829655e-05, + "loss": 37.4953, + "step": 142440 + }, + { + "epoch": 0.575516025161908, + "grad_norm": 815.98095703125, + "learning_rate": 1.8618799485711886e-05, + "loss": 69.6825, + "step": 142450 + }, + { + "epoch": 0.5755564264272757, + "grad_norm": 777.53369140625, + "learning_rate": 1.8616013639521332e-05, + "loss": 72.3883, + "step": 142460 + }, + { + "epoch": 0.5755968276926433, + "grad_norm": 926.8099975585938, + "learning_rate": 1.8613227820312307e-05, + "loss": 81.8359, + "step": 142470 + }, + { + "epoch": 0.575637228958011, + "grad_norm": 476.4912109375, + "learning_rate": 1.861044202813912e-05, + "loss": 46.7047, + "step": 142480 + }, + { + "epoch": 0.5756776302233786, + "grad_norm": 529.5849609375, + "learning_rate": 1.8607656263056085e-05, + "loss": 63.7852, + "step": 142490 + }, + { + "epoch": 0.5757180314887462, + "grad_norm": 604.9124145507812, + "learning_rate": 1.8604870525117496e-05, + "loss": 61.0133, + "step": 142500 + }, + { + "epoch": 0.5757584327541139, + "grad_norm": 658.4378051757812, + "learning_rate": 1.8602084814377685e-05, + "loss": 43.2187, + "step": 142510 + }, + { + "epoch": 0.5757988340194815, + "grad_norm": 510.539306640625, + "learning_rate": 1.8599299130890948e-05, + "loss": 70.6078, + "step": 142520 + }, + { + "epoch": 0.5758392352848491, + "grad_norm": 879.8707275390625, + "learning_rate": 1.8596513474711585e-05, + "loss": 80.9975, + "step": 142530 + }, + { + "epoch": 0.5758796365502168, + "grad_norm": 842.3397216796875, + "learning_rate": 1.8593727845893927e-05, + "loss": 87.3781, + "step": 142540 + }, + { + "epoch": 0.5759200378155844, + "grad_norm": 743.3448486328125, + "learning_rate": 1.8590942244492262e-05, + "loss": 59.6961, + "step": 142550 + }, + { + "epoch": 0.5759604390809521, + "grad_norm": 541.29248046875, + "learning_rate": 1.858815667056091e-05, + "loss": 89.6266, + "step": 142560 + }, + { + "epoch": 0.5760008403463196, + "grad_norm": 690.689208984375, + "learning_rate": 1.858537112415417e-05, + "loss": 60.6213, + "step": 142570 + }, + { + "epoch": 0.5760412416116872, + "grad_norm": 415.4001159667969, + "learning_rate": 1.858258560532634e-05, + "loss": 72.4854, + "step": 142580 + }, + { + "epoch": 0.5760816428770549, + "grad_norm": 942.4613647460938, + "learning_rate": 1.857980011413174e-05, + "loss": 77.0593, + "step": 142590 + }, + { + "epoch": 0.5761220441424225, + "grad_norm": 559.9432373046875, + "learning_rate": 1.857701465062467e-05, + "loss": 86.7117, + "step": 142600 + }, + { + "epoch": 0.5761624454077902, + "grad_norm": 505.4812927246094, + "learning_rate": 1.857422921485942e-05, + "loss": 62.6065, + "step": 142610 + }, + { + "epoch": 0.5762028466731578, + "grad_norm": 739.94921875, + "learning_rate": 1.857144380689032e-05, + "loss": 73.7185, + "step": 142620 + }, + { + "epoch": 0.5762432479385254, + "grad_norm": 588.62255859375, + "learning_rate": 1.8568658426771647e-05, + "loss": 86.3797, + "step": 142630 + }, + { + "epoch": 0.5762836492038931, + "grad_norm": 744.6373291015625, + "learning_rate": 1.856587307455772e-05, + "loss": 91.2223, + "step": 142640 + }, + { + "epoch": 0.5763240504692607, + "grad_norm": 780.9818115234375, + "learning_rate": 1.8563087750302838e-05, + "loss": 75.3883, + "step": 142650 + }, + { + "epoch": 0.5763644517346284, + "grad_norm": 409.5501403808594, + "learning_rate": 1.8560302454061296e-05, + "loss": 60.0537, + "step": 142660 + }, + { + "epoch": 0.576404852999996, + "grad_norm": 1153.296630859375, + "learning_rate": 1.8557517185887404e-05, + "loss": 76.6469, + "step": 142670 + }, + { + "epoch": 0.5764452542653636, + "grad_norm": 577.1732788085938, + "learning_rate": 1.855473194583545e-05, + "loss": 61.4833, + "step": 142680 + }, + { + "epoch": 0.5764856555307313, + "grad_norm": 923.1431884765625, + "learning_rate": 1.8551946733959748e-05, + "loss": 82.2054, + "step": 142690 + }, + { + "epoch": 0.5765260567960988, + "grad_norm": 691.478515625, + "learning_rate": 1.8549161550314587e-05, + "loss": 89.3962, + "step": 142700 + }, + { + "epoch": 0.5765664580614664, + "grad_norm": 713.4063110351562, + "learning_rate": 1.8546376394954262e-05, + "loss": 86.2593, + "step": 142710 + }, + { + "epoch": 0.5766068593268341, + "grad_norm": 993.2153930664062, + "learning_rate": 1.8543591267933088e-05, + "loss": 61.2955, + "step": 142720 + }, + { + "epoch": 0.5766472605922017, + "grad_norm": 384.67694091796875, + "learning_rate": 1.854080616930535e-05, + "loss": 66.1534, + "step": 142730 + }, + { + "epoch": 0.5766876618575694, + "grad_norm": 712.9164428710938, + "learning_rate": 1.8538021099125344e-05, + "loss": 71.6839, + "step": 142740 + }, + { + "epoch": 0.576728063122937, + "grad_norm": 1494.7860107421875, + "learning_rate": 1.8535236057447374e-05, + "loss": 85.6443, + "step": 142750 + }, + { + "epoch": 0.5767684643883046, + "grad_norm": 1009.9266357421875, + "learning_rate": 1.853245104432572e-05, + "loss": 58.847, + "step": 142760 + }, + { + "epoch": 0.5768088656536723, + "grad_norm": 637.132080078125, + "learning_rate": 1.85296660598147e-05, + "loss": 87.1532, + "step": 142770 + }, + { + "epoch": 0.5768492669190399, + "grad_norm": 671.129150390625, + "learning_rate": 1.8526881103968593e-05, + "loss": 78.5222, + "step": 142780 + }, + { + "epoch": 0.5768896681844076, + "grad_norm": 854.4932861328125, + "learning_rate": 1.852409617684169e-05, + "loss": 102.3753, + "step": 142790 + }, + { + "epoch": 0.5769300694497752, + "grad_norm": 1861.621337890625, + "learning_rate": 1.8521311278488304e-05, + "loss": 95.2062, + "step": 142800 + }, + { + "epoch": 0.5769704707151428, + "grad_norm": 691.3572998046875, + "learning_rate": 1.8518526408962713e-05, + "loss": 110.1118, + "step": 142810 + }, + { + "epoch": 0.5770108719805104, + "grad_norm": 492.190673828125, + "learning_rate": 1.8515741568319207e-05, + "loss": 83.5378, + "step": 142820 + }, + { + "epoch": 0.577051273245878, + "grad_norm": 678.4166870117188, + "learning_rate": 1.8512956756612092e-05, + "loss": 76.5917, + "step": 142830 + }, + { + "epoch": 0.5770916745112457, + "grad_norm": 261.902099609375, + "learning_rate": 1.8510171973895635e-05, + "loss": 76.7446, + "step": 142840 + }, + { + "epoch": 0.5771320757766133, + "grad_norm": 503.24005126953125, + "learning_rate": 1.8507387220224158e-05, + "loss": 80.9701, + "step": 142850 + }, + { + "epoch": 0.5771724770419809, + "grad_norm": 860.4577026367188, + "learning_rate": 1.8504602495651932e-05, + "loss": 73.6624, + "step": 142860 + }, + { + "epoch": 0.5772128783073486, + "grad_norm": 468.2400817871094, + "learning_rate": 1.8501817800233245e-05, + "loss": 74.9966, + "step": 142870 + }, + { + "epoch": 0.5772532795727162, + "grad_norm": 690.9278564453125, + "learning_rate": 1.8499033134022395e-05, + "loss": 92.719, + "step": 142880 + }, + { + "epoch": 0.5772936808380839, + "grad_norm": 385.7921447753906, + "learning_rate": 1.849624849707367e-05, + "loss": 68.3431, + "step": 142890 + }, + { + "epoch": 0.5773340821034515, + "grad_norm": 880.9039306640625, + "learning_rate": 1.849346388944135e-05, + "loss": 84.1611, + "step": 142900 + }, + { + "epoch": 0.5773744833688191, + "grad_norm": 993.9801025390625, + "learning_rate": 1.849067931117973e-05, + "loss": 93.7249, + "step": 142910 + }, + { + "epoch": 0.5774148846341868, + "grad_norm": 721.9262084960938, + "learning_rate": 1.8487894762343086e-05, + "loss": 69.4166, + "step": 142920 + }, + { + "epoch": 0.5774552858995544, + "grad_norm": 1370.159423828125, + "learning_rate": 1.848511024298572e-05, + "loss": 80.6301, + "step": 142930 + }, + { + "epoch": 0.5774956871649221, + "grad_norm": 612.9443969726562, + "learning_rate": 1.8482325753161913e-05, + "loss": 56.4578, + "step": 142940 + }, + { + "epoch": 0.5775360884302896, + "grad_norm": 971.3619995117188, + "learning_rate": 1.8479541292925935e-05, + "loss": 87.6358, + "step": 142950 + }, + { + "epoch": 0.5775764896956572, + "grad_norm": 1130.362060546875, + "learning_rate": 1.8476756862332092e-05, + "loss": 73.8865, + "step": 142960 + }, + { + "epoch": 0.5776168909610249, + "grad_norm": 650.0335693359375, + "learning_rate": 1.847397246143466e-05, + "loss": 61.2788, + "step": 142970 + }, + { + "epoch": 0.5776572922263925, + "grad_norm": 689.062744140625, + "learning_rate": 1.847118809028791e-05, + "loss": 47.4355, + "step": 142980 + }, + { + "epoch": 0.5776976934917601, + "grad_norm": 1461.896240234375, + "learning_rate": 1.846840374894614e-05, + "loss": 76.3775, + "step": 142990 + }, + { + "epoch": 0.5777380947571278, + "grad_norm": 945.4819946289062, + "learning_rate": 1.846561943746363e-05, + "loss": 75.8783, + "step": 143000 + }, + { + "epoch": 0.5777784960224954, + "grad_norm": 483.4527893066406, + "learning_rate": 1.8462835155894657e-05, + "loss": 69.5701, + "step": 143010 + }, + { + "epoch": 0.5778188972878631, + "grad_norm": 581.1792602539062, + "learning_rate": 1.8460050904293506e-05, + "loss": 100.3118, + "step": 143020 + }, + { + "epoch": 0.5778592985532307, + "grad_norm": 337.6043701171875, + "learning_rate": 1.8457266682714448e-05, + "loss": 74.1299, + "step": 143030 + }, + { + "epoch": 0.5778996998185983, + "grad_norm": 757.7178955078125, + "learning_rate": 1.845448249121178e-05, + "loss": 60.3371, + "step": 143040 + }, + { + "epoch": 0.577940101083966, + "grad_norm": 1008.6424560546875, + "learning_rate": 1.8451698329839772e-05, + "loss": 66.0117, + "step": 143050 + }, + { + "epoch": 0.5779805023493336, + "grad_norm": 512.9010009765625, + "learning_rate": 1.8448914198652693e-05, + "loss": 86.8163, + "step": 143060 + }, + { + "epoch": 0.5780209036147013, + "grad_norm": 369.0415954589844, + "learning_rate": 1.844613009770484e-05, + "loss": 72.1168, + "step": 143070 + }, + { + "epoch": 0.5780613048800688, + "grad_norm": 415.4058532714844, + "learning_rate": 1.844334602705047e-05, + "loss": 62.7026, + "step": 143080 + }, + { + "epoch": 0.5781017061454364, + "grad_norm": 631.0258178710938, + "learning_rate": 1.844056198674388e-05, + "loss": 66.0469, + "step": 143090 + }, + { + "epoch": 0.5781421074108041, + "grad_norm": 684.9761352539062, + "learning_rate": 1.8437777976839336e-05, + "loss": 93.0086, + "step": 143100 + }, + { + "epoch": 0.5781825086761717, + "grad_norm": 434.4012451171875, + "learning_rate": 1.8434993997391107e-05, + "loss": 72.8754, + "step": 143110 + }, + { + "epoch": 0.5782229099415394, + "grad_norm": 456.5398254394531, + "learning_rate": 1.8432210048453483e-05, + "loss": 57.3578, + "step": 143120 + }, + { + "epoch": 0.578263311206907, + "grad_norm": 674.2822875976562, + "learning_rate": 1.842942613008073e-05, + "loss": 69.7925, + "step": 143130 + }, + { + "epoch": 0.5783037124722746, + "grad_norm": 427.42138671875, + "learning_rate": 1.8426642242327116e-05, + "loss": 67.8984, + "step": 143140 + }, + { + "epoch": 0.5783441137376423, + "grad_norm": 1045.4957275390625, + "learning_rate": 1.8423858385246932e-05, + "loss": 77.0586, + "step": 143150 + }, + { + "epoch": 0.5783845150030099, + "grad_norm": 495.0683898925781, + "learning_rate": 1.8421074558894434e-05, + "loss": 52.1009, + "step": 143160 + }, + { + "epoch": 0.5784249162683776, + "grad_norm": 1379.06689453125, + "learning_rate": 1.8418290763323905e-05, + "loss": 62.1075, + "step": 143170 + }, + { + "epoch": 0.5784653175337452, + "grad_norm": 1056.2852783203125, + "learning_rate": 1.8415506998589615e-05, + "loss": 143.4947, + "step": 143180 + }, + { + "epoch": 0.5785057187991128, + "grad_norm": 896.133544921875, + "learning_rate": 1.8412723264745816e-05, + "loss": 62.1788, + "step": 143190 + }, + { + "epoch": 0.5785461200644805, + "grad_norm": 794.690185546875, + "learning_rate": 1.8409939561846808e-05, + "loss": 74.403, + "step": 143200 + }, + { + "epoch": 0.578586521329848, + "grad_norm": 777.2190551757812, + "learning_rate": 1.8407155889946837e-05, + "loss": 84.124, + "step": 143210 + }, + { + "epoch": 0.5786269225952156, + "grad_norm": 447.62261962890625, + "learning_rate": 1.8404372249100192e-05, + "loss": 41.5061, + "step": 143220 + }, + { + "epoch": 0.5786673238605833, + "grad_norm": 524.050537109375, + "learning_rate": 1.840158863936113e-05, + "loss": 82.55, + "step": 143230 + }, + { + "epoch": 0.5787077251259509, + "grad_norm": 1260.6102294921875, + "learning_rate": 1.8398805060783916e-05, + "loss": 85.7669, + "step": 143240 + }, + { + "epoch": 0.5787481263913186, + "grad_norm": 774.8618774414062, + "learning_rate": 1.8396021513422826e-05, + "loss": 85.6528, + "step": 143250 + }, + { + "epoch": 0.5787885276566862, + "grad_norm": 468.5274353027344, + "learning_rate": 1.839323799733212e-05, + "loss": 75.7079, + "step": 143260 + }, + { + "epoch": 0.5788289289220538, + "grad_norm": 797.4677124023438, + "learning_rate": 1.8390454512566064e-05, + "loss": 81.9463, + "step": 143270 + }, + { + "epoch": 0.5788693301874215, + "grad_norm": 671.010986328125, + "learning_rate": 1.838767105917893e-05, + "loss": 83.7733, + "step": 143280 + }, + { + "epoch": 0.5789097314527891, + "grad_norm": 630.0704345703125, + "learning_rate": 1.8384887637224972e-05, + "loss": 78.0152, + "step": 143290 + }, + { + "epoch": 0.5789501327181568, + "grad_norm": 783.2952880859375, + "learning_rate": 1.8382104246758467e-05, + "loss": 71.4273, + "step": 143300 + }, + { + "epoch": 0.5789905339835244, + "grad_norm": 641.6611328125, + "learning_rate": 1.8379320887833677e-05, + "loss": 101.8572, + "step": 143310 + }, + { + "epoch": 0.579030935248892, + "grad_norm": 972.37353515625, + "learning_rate": 1.8376537560504853e-05, + "loss": 52.7884, + "step": 143320 + }, + { + "epoch": 0.5790713365142596, + "grad_norm": 624.6841430664062, + "learning_rate": 1.8373754264826268e-05, + "loss": 59.8015, + "step": 143330 + }, + { + "epoch": 0.5791117377796272, + "grad_norm": 348.30816650390625, + "learning_rate": 1.837097100085218e-05, + "loss": 90.1917, + "step": 143340 + }, + { + "epoch": 0.5791521390449949, + "grad_norm": 353.26922607421875, + "learning_rate": 1.8368187768636852e-05, + "loss": 62.1037, + "step": 143350 + }, + { + "epoch": 0.5791925403103625, + "grad_norm": 783.35693359375, + "learning_rate": 1.8365404568234546e-05, + "loss": 90.1408, + "step": 143360 + }, + { + "epoch": 0.5792329415757301, + "grad_norm": 1107.7266845703125, + "learning_rate": 1.836262139969951e-05, + "loss": 77.9026, + "step": 143370 + }, + { + "epoch": 0.5792733428410978, + "grad_norm": 408.0915832519531, + "learning_rate": 1.835983826308602e-05, + "loss": 52.9526, + "step": 143380 + }, + { + "epoch": 0.5793137441064654, + "grad_norm": 988.4971923828125, + "learning_rate": 1.8357055158448332e-05, + "loss": 85.0524, + "step": 143390 + }, + { + "epoch": 0.579354145371833, + "grad_norm": 565.4129638671875, + "learning_rate": 1.835427208584069e-05, + "loss": 58.5024, + "step": 143400 + }, + { + "epoch": 0.5793945466372007, + "grad_norm": 875.9247436523438, + "learning_rate": 1.8351489045317366e-05, + "loss": 85.685, + "step": 143410 + }, + { + "epoch": 0.5794349479025683, + "grad_norm": 694.6620483398438, + "learning_rate": 1.8348706036932614e-05, + "loss": 75.3954, + "step": 143420 + }, + { + "epoch": 0.579475349167936, + "grad_norm": 974.8397827148438, + "learning_rate": 1.834592306074068e-05, + "loss": 73.8769, + "step": 143430 + }, + { + "epoch": 0.5795157504333036, + "grad_norm": 582.9255981445312, + "learning_rate": 1.8343140116795838e-05, + "loss": 62.6175, + "step": 143440 + }, + { + "epoch": 0.5795561516986713, + "grad_norm": 429.4660339355469, + "learning_rate": 1.8340357205152322e-05, + "loss": 87.8255, + "step": 143450 + }, + { + "epoch": 0.5795965529640388, + "grad_norm": 588.3342895507812, + "learning_rate": 1.83375743258644e-05, + "loss": 76.5006, + "step": 143460 + }, + { + "epoch": 0.5796369542294064, + "grad_norm": 1295.525146484375, + "learning_rate": 1.833479147898633e-05, + "loss": 69.4349, + "step": 143470 + }, + { + "epoch": 0.5796773554947741, + "grad_norm": 954.9663696289062, + "learning_rate": 1.8332008664572345e-05, + "loss": 58.0516, + "step": 143480 + }, + { + "epoch": 0.5797177567601417, + "grad_norm": 758.15966796875, + "learning_rate": 1.832922588267672e-05, + "loss": 82.0252, + "step": 143490 + }, + { + "epoch": 0.5797581580255093, + "grad_norm": 433.24200439453125, + "learning_rate": 1.8326443133353695e-05, + "loss": 83.7892, + "step": 143500 + }, + { + "epoch": 0.579798559290877, + "grad_norm": 634.7221069335938, + "learning_rate": 1.832366041665752e-05, + "loss": 53.2224, + "step": 143510 + }, + { + "epoch": 0.5798389605562446, + "grad_norm": 828.9877319335938, + "learning_rate": 1.8320877732642452e-05, + "loss": 117.4559, + "step": 143520 + }, + { + "epoch": 0.5798793618216123, + "grad_norm": 1352.4969482421875, + "learning_rate": 1.8318095081362725e-05, + "loss": 76.4574, + "step": 143530 + }, + { + "epoch": 0.5799197630869799, + "grad_norm": 585.8112182617188, + "learning_rate": 1.8315312462872613e-05, + "loss": 68.183, + "step": 143540 + }, + { + "epoch": 0.5799601643523475, + "grad_norm": 495.07025146484375, + "learning_rate": 1.8312529877226352e-05, + "loss": 78.6922, + "step": 143550 + }, + { + "epoch": 0.5800005656177152, + "grad_norm": 1005.1109619140625, + "learning_rate": 1.830974732447818e-05, + "loss": 80.6573, + "step": 143560 + }, + { + "epoch": 0.5800409668830828, + "grad_norm": 956.5674438476562, + "learning_rate": 1.830696480468236e-05, + "loss": 62.6958, + "step": 143570 + }, + { + "epoch": 0.5800813681484505, + "grad_norm": 708.8758544921875, + "learning_rate": 1.8304182317893136e-05, + "loss": 61.1954, + "step": 143580 + }, + { + "epoch": 0.580121769413818, + "grad_norm": 537.5917358398438, + "learning_rate": 1.830139986416475e-05, + "loss": 37.4972, + "step": 143590 + }, + { + "epoch": 0.5801621706791856, + "grad_norm": 920.7310791015625, + "learning_rate": 1.829861744355144e-05, + "loss": 66.0846, + "step": 143600 + }, + { + "epoch": 0.5802025719445533, + "grad_norm": 696.39794921875, + "learning_rate": 1.8295835056107463e-05, + "loss": 60.8338, + "step": 143610 + }, + { + "epoch": 0.5802429732099209, + "grad_norm": 864.6860961914062, + "learning_rate": 1.8293052701887063e-05, + "loss": 73.7724, + "step": 143620 + }, + { + "epoch": 0.5802833744752885, + "grad_norm": 413.5692443847656, + "learning_rate": 1.829027038094448e-05, + "loss": 59.1924, + "step": 143630 + }, + { + "epoch": 0.5803237757406562, + "grad_norm": 1076.6142578125, + "learning_rate": 1.828748809333395e-05, + "loss": 67.6973, + "step": 143640 + }, + { + "epoch": 0.5803641770060238, + "grad_norm": 327.4098815917969, + "learning_rate": 1.8284705839109725e-05, + "loss": 98.0332, + "step": 143650 + }, + { + "epoch": 0.5804045782713915, + "grad_norm": 841.1143798828125, + "learning_rate": 1.8281923618326043e-05, + "loss": 96.0397, + "step": 143660 + }, + { + "epoch": 0.5804449795367591, + "grad_norm": 1392.0682373046875, + "learning_rate": 1.8279141431037138e-05, + "loss": 76.7044, + "step": 143670 + }, + { + "epoch": 0.5804853808021267, + "grad_norm": 589.6026000976562, + "learning_rate": 1.8276359277297267e-05, + "loss": 55.1987, + "step": 143680 + }, + { + "epoch": 0.5805257820674944, + "grad_norm": 531.2316284179688, + "learning_rate": 1.8273577157160652e-05, + "loss": 69.0289, + "step": 143690 + }, + { + "epoch": 0.580566183332862, + "grad_norm": 719.670166015625, + "learning_rate": 1.8270795070681546e-05, + "loss": 93.0705, + "step": 143700 + }, + { + "epoch": 0.5806065845982297, + "grad_norm": 901.572509765625, + "learning_rate": 1.826801301791418e-05, + "loss": 55.1883, + "step": 143710 + }, + { + "epoch": 0.5806469858635972, + "grad_norm": 936.93603515625, + "learning_rate": 1.826523099891278e-05, + "loss": 70.3211, + "step": 143720 + }, + { + "epoch": 0.5806873871289648, + "grad_norm": 398.4930114746094, + "learning_rate": 1.8262449013731607e-05, + "loss": 72.6429, + "step": 143730 + }, + { + "epoch": 0.5807277883943325, + "grad_norm": 503.37738037109375, + "learning_rate": 1.8259667062424872e-05, + "loss": 49.4543, + "step": 143740 + }, + { + "epoch": 0.5807681896597001, + "grad_norm": 206.21829223632812, + "learning_rate": 1.8256885145046837e-05, + "loss": 45.9162, + "step": 143750 + }, + { + "epoch": 0.5808085909250678, + "grad_norm": 582.5980224609375, + "learning_rate": 1.825410326165172e-05, + "loss": 88.8725, + "step": 143760 + }, + { + "epoch": 0.5808489921904354, + "grad_norm": 824.5928344726562, + "learning_rate": 1.8251321412293758e-05, + "loss": 89.7028, + "step": 143770 + }, + { + "epoch": 0.580889393455803, + "grad_norm": 319.433837890625, + "learning_rate": 1.8248539597027188e-05, + "loss": 76.6962, + "step": 143780 + }, + { + "epoch": 0.5809297947211707, + "grad_norm": 750.9281616210938, + "learning_rate": 1.824575781590624e-05, + "loss": 55.8232, + "step": 143790 + }, + { + "epoch": 0.5809701959865383, + "grad_norm": 901.997802734375, + "learning_rate": 1.8242976068985137e-05, + "loss": 65.2963, + "step": 143800 + }, + { + "epoch": 0.581010597251906, + "grad_norm": 354.4429016113281, + "learning_rate": 1.824019435631813e-05, + "loss": 71.6046, + "step": 143810 + }, + { + "epoch": 0.5810509985172736, + "grad_norm": 1403.99853515625, + "learning_rate": 1.823741267795943e-05, + "loss": 91.7052, + "step": 143820 + }, + { + "epoch": 0.5810913997826412, + "grad_norm": 860.6673583984375, + "learning_rate": 1.823463103396329e-05, + "loss": 65.5947, + "step": 143830 + }, + { + "epoch": 0.5811318010480089, + "grad_norm": 745.9188842773438, + "learning_rate": 1.823184942438392e-05, + "loss": 59.8402, + "step": 143840 + }, + { + "epoch": 0.5811722023133764, + "grad_norm": 1065.3251953125, + "learning_rate": 1.8229067849275556e-05, + "loss": 64.3331, + "step": 143850 + }, + { + "epoch": 0.581212603578744, + "grad_norm": 834.0921630859375, + "learning_rate": 1.822628630869243e-05, + "loss": 89.9459, + "step": 143860 + }, + { + "epoch": 0.5812530048441117, + "grad_norm": 474.15771484375, + "learning_rate": 1.8223504802688767e-05, + "loss": 68.5555, + "step": 143870 + }, + { + "epoch": 0.5812934061094793, + "grad_norm": 636.53955078125, + "learning_rate": 1.822072333131878e-05, + "loss": 67.4284, + "step": 143880 + }, + { + "epoch": 0.581333807374847, + "grad_norm": 588.023193359375, + "learning_rate": 1.821794189463672e-05, + "loss": 50.1576, + "step": 143890 + }, + { + "epoch": 0.5813742086402146, + "grad_norm": 502.2118835449219, + "learning_rate": 1.821516049269679e-05, + "loss": 80.6926, + "step": 143900 + }, + { + "epoch": 0.5814146099055822, + "grad_norm": 957.3973999023438, + "learning_rate": 1.8212379125553235e-05, + "loss": 57.7234, + "step": 143910 + }, + { + "epoch": 0.5814550111709499, + "grad_norm": 793.348388671875, + "learning_rate": 1.8209597793260268e-05, + "loss": 69.1899, + "step": 143920 + }, + { + "epoch": 0.5814954124363175, + "grad_norm": 323.1958312988281, + "learning_rate": 1.820681649587211e-05, + "loss": 72.989, + "step": 143930 + }, + { + "epoch": 0.5815358137016852, + "grad_norm": 350.43975830078125, + "learning_rate": 1.8204035233442988e-05, + "loss": 135.2075, + "step": 143940 + }, + { + "epoch": 0.5815762149670528, + "grad_norm": 911.21044921875, + "learning_rate": 1.8201254006027126e-05, + "loss": 76.9373, + "step": 143950 + }, + { + "epoch": 0.5816166162324204, + "grad_norm": 1151.969970703125, + "learning_rate": 1.8198472813678737e-05, + "loss": 79.3872, + "step": 143960 + }, + { + "epoch": 0.581657017497788, + "grad_norm": 834.4447021484375, + "learning_rate": 1.8195691656452057e-05, + "loss": 99.0614, + "step": 143970 + }, + { + "epoch": 0.5816974187631556, + "grad_norm": 616.169677734375, + "learning_rate": 1.8192910534401286e-05, + "loss": 66.0762, + "step": 143980 + }, + { + "epoch": 0.5817378200285233, + "grad_norm": 472.40679931640625, + "learning_rate": 1.8190129447580666e-05, + "loss": 90.4043, + "step": 143990 + }, + { + "epoch": 0.5817782212938909, + "grad_norm": 622.4987182617188, + "learning_rate": 1.8187348396044402e-05, + "loss": 66.0046, + "step": 144000 + }, + { + "epoch": 0.5818186225592585, + "grad_norm": 1133.1392822265625, + "learning_rate": 1.8184567379846706e-05, + "loss": 62.7294, + "step": 144010 + }, + { + "epoch": 0.5818590238246262, + "grad_norm": 875.8475341796875, + "learning_rate": 1.8181786399041813e-05, + "loss": 83.607, + "step": 144020 + }, + { + "epoch": 0.5818994250899938, + "grad_norm": 1125.701904296875, + "learning_rate": 1.8179005453683925e-05, + "loss": 86.2657, + "step": 144030 + }, + { + "epoch": 0.5819398263553615, + "grad_norm": 882.1409912109375, + "learning_rate": 1.8176224543827264e-05, + "loss": 66.4059, + "step": 144040 + }, + { + "epoch": 0.5819802276207291, + "grad_norm": 476.163330078125, + "learning_rate": 1.817344366952605e-05, + "loss": 90.994, + "step": 144050 + }, + { + "epoch": 0.5820206288860967, + "grad_norm": 636.8788452148438, + "learning_rate": 1.817066283083448e-05, + "loss": 51.6003, + "step": 144060 + }, + { + "epoch": 0.5820610301514644, + "grad_norm": 724.1245727539062, + "learning_rate": 1.816788202780679e-05, + "loss": 82.4386, + "step": 144070 + }, + { + "epoch": 0.582101431416832, + "grad_norm": 488.21728515625, + "learning_rate": 1.8165101260497183e-05, + "loss": 54.2722, + "step": 144080 + }, + { + "epoch": 0.5821418326821997, + "grad_norm": 745.7999267578125, + "learning_rate": 1.8162320528959863e-05, + "loss": 61.3434, + "step": 144090 + }, + { + "epoch": 0.5821822339475672, + "grad_norm": 852.4511108398438, + "learning_rate": 1.815953983324906e-05, + "loss": 87.7434, + "step": 144100 + }, + { + "epoch": 0.5822226352129348, + "grad_norm": 542.41796875, + "learning_rate": 1.8156759173418974e-05, + "loss": 81.4378, + "step": 144110 + }, + { + "epoch": 0.5822630364783025, + "grad_norm": 883.736083984375, + "learning_rate": 1.815397854952381e-05, + "loss": 94.3239, + "step": 144120 + }, + { + "epoch": 0.5823034377436701, + "grad_norm": 378.1841125488281, + "learning_rate": 1.8151197961617794e-05, + "loss": 81.662, + "step": 144130 + }, + { + "epoch": 0.5823438390090377, + "grad_norm": 1164.244140625, + "learning_rate": 1.8148417409755116e-05, + "loss": 80.5117, + "step": 144140 + }, + { + "epoch": 0.5823842402744054, + "grad_norm": 878.5115356445312, + "learning_rate": 1.814563689399e-05, + "loss": 53.1486, + "step": 144150 + }, + { + "epoch": 0.582424641539773, + "grad_norm": 473.8120422363281, + "learning_rate": 1.8142856414376648e-05, + "loss": 119.4852, + "step": 144160 + }, + { + "epoch": 0.5824650428051407, + "grad_norm": 1467.2423095703125, + "learning_rate": 1.814007597096926e-05, + "loss": 69.3578, + "step": 144170 + }, + { + "epoch": 0.5825054440705083, + "grad_norm": 1082.0887451171875, + "learning_rate": 1.8137295563822057e-05, + "loss": 92.3233, + "step": 144180 + }, + { + "epoch": 0.582545845335876, + "grad_norm": 669.7514038085938, + "learning_rate": 1.8134515192989232e-05, + "loss": 69.6717, + "step": 144190 + }, + { + "epoch": 0.5825862466012436, + "grad_norm": 597.5444946289062, + "learning_rate": 1.8131734858524993e-05, + "loss": 69.7234, + "step": 144200 + }, + { + "epoch": 0.5826266478666112, + "grad_norm": 806.731689453125, + "learning_rate": 1.8128954560483547e-05, + "loss": 68.0268, + "step": 144210 + }, + { + "epoch": 0.5826670491319789, + "grad_norm": 559.9693603515625, + "learning_rate": 1.8126174298919087e-05, + "loss": 80.1606, + "step": 144220 + }, + { + "epoch": 0.5827074503973464, + "grad_norm": 899.262939453125, + "learning_rate": 1.8123394073885834e-05, + "loss": 66.2466, + "step": 144230 + }, + { + "epoch": 0.582747851662714, + "grad_norm": 788.6441650390625, + "learning_rate": 1.812061388543798e-05, + "loss": 76.7915, + "step": 144240 + }, + { + "epoch": 0.5827882529280817, + "grad_norm": 425.7440490722656, + "learning_rate": 1.8117833733629715e-05, + "loss": 77.5345, + "step": 144250 + }, + { + "epoch": 0.5828286541934493, + "grad_norm": 985.9098510742188, + "learning_rate": 1.811505361851526e-05, + "loss": 106.0092, + "step": 144260 + }, + { + "epoch": 0.582869055458817, + "grad_norm": 356.7967224121094, + "learning_rate": 1.81122735401488e-05, + "loss": 44.4209, + "step": 144270 + }, + { + "epoch": 0.5829094567241846, + "grad_norm": 443.036376953125, + "learning_rate": 1.8109493498584542e-05, + "loss": 87.7187, + "step": 144280 + }, + { + "epoch": 0.5829498579895522, + "grad_norm": 872.9697265625, + "learning_rate": 1.810671349387668e-05, + "loss": 64.9654, + "step": 144290 + }, + { + "epoch": 0.5829902592549199, + "grad_norm": 437.36767578125, + "learning_rate": 1.8103933526079412e-05, + "loss": 62.6141, + "step": 144300 + }, + { + "epoch": 0.5830306605202875, + "grad_norm": 949.9866333007812, + "learning_rate": 1.8101153595246942e-05, + "loss": 87.6072, + "step": 144310 + }, + { + "epoch": 0.5830710617856552, + "grad_norm": 549.2392578125, + "learning_rate": 1.8098373701433458e-05, + "loss": 84.1185, + "step": 144320 + }, + { + "epoch": 0.5831114630510228, + "grad_norm": 459.0373840332031, + "learning_rate": 1.8095593844693152e-05, + "loss": 51.4175, + "step": 144330 + }, + { + "epoch": 0.5831518643163904, + "grad_norm": 1022.5594482421875, + "learning_rate": 1.809281402508023e-05, + "loss": 78.2401, + "step": 144340 + }, + { + "epoch": 0.5831922655817581, + "grad_norm": 607.5216064453125, + "learning_rate": 1.8090034242648875e-05, + "loss": 70.4678, + "step": 144350 + }, + { + "epoch": 0.5832326668471256, + "grad_norm": 932.8378295898438, + "learning_rate": 1.808725449745329e-05, + "loss": 69.4081, + "step": 144360 + }, + { + "epoch": 0.5832730681124932, + "grad_norm": 420.1859130859375, + "learning_rate": 1.808447478954767e-05, + "loss": 67.725, + "step": 144370 + }, + { + "epoch": 0.5833134693778609, + "grad_norm": 991.890625, + "learning_rate": 1.8081695118986187e-05, + "loss": 79.0976, + "step": 144380 + }, + { + "epoch": 0.5833538706432285, + "grad_norm": 436.71405029296875, + "learning_rate": 1.8078915485823057e-05, + "loss": 81.9165, + "step": 144390 + }, + { + "epoch": 0.5833942719085962, + "grad_norm": 785.7042846679688, + "learning_rate": 1.8076135890112457e-05, + "loss": 108.0431, + "step": 144400 + }, + { + "epoch": 0.5834346731739638, + "grad_norm": 849.3135375976562, + "learning_rate": 1.8073356331908568e-05, + "loss": 74.8375, + "step": 144410 + }, + { + "epoch": 0.5834750744393314, + "grad_norm": 710.7310791015625, + "learning_rate": 1.8070576811265596e-05, + "loss": 70.6979, + "step": 144420 + }, + { + "epoch": 0.5835154757046991, + "grad_norm": 554.1605224609375, + "learning_rate": 1.8067797328237717e-05, + "loss": 42.3252, + "step": 144430 + }, + { + "epoch": 0.5835558769700667, + "grad_norm": 438.814453125, + "learning_rate": 1.806501788287913e-05, + "loss": 84.9278, + "step": 144440 + }, + { + "epoch": 0.5835962782354344, + "grad_norm": 622.88134765625, + "learning_rate": 1.8062238475244022e-05, + "loss": 66.237, + "step": 144450 + }, + { + "epoch": 0.583636679500802, + "grad_norm": 527.3923950195312, + "learning_rate": 1.8059459105386562e-05, + "loss": 65.5786, + "step": 144460 + }, + { + "epoch": 0.5836770807661696, + "grad_norm": 642.645263671875, + "learning_rate": 1.805667977336095e-05, + "loss": 73.4835, + "step": 144470 + }, + { + "epoch": 0.5837174820315373, + "grad_norm": 716.1331176757812, + "learning_rate": 1.805390047922137e-05, + "loss": 101.7311, + "step": 144480 + }, + { + "epoch": 0.5837578832969048, + "grad_norm": 749.9345092773438, + "learning_rate": 1.805112122302199e-05, + "loss": 48.748, + "step": 144490 + }, + { + "epoch": 0.5837982845622725, + "grad_norm": 1246.85205078125, + "learning_rate": 1.8048342004817015e-05, + "loss": 85.1446, + "step": 144500 + }, + { + "epoch": 0.5838386858276401, + "grad_norm": 688.3642578125, + "learning_rate": 1.804556282466061e-05, + "loss": 68.337, + "step": 144510 + }, + { + "epoch": 0.5838790870930077, + "grad_norm": 562.5650634765625, + "learning_rate": 1.804278368260697e-05, + "loss": 95.1201, + "step": 144520 + }, + { + "epoch": 0.5839194883583754, + "grad_norm": 375.51409912109375, + "learning_rate": 1.804000457871027e-05, + "loss": 49.1695, + "step": 144530 + }, + { + "epoch": 0.583959889623743, + "grad_norm": 810.967529296875, + "learning_rate": 1.8037225513024687e-05, + "loss": 54.6727, + "step": 144540 + }, + { + "epoch": 0.5840002908891107, + "grad_norm": 1960.6898193359375, + "learning_rate": 1.803444648560441e-05, + "loss": 79.6208, + "step": 144550 + }, + { + "epoch": 0.5840406921544783, + "grad_norm": 555.3248291015625, + "learning_rate": 1.8031667496503607e-05, + "loss": 59.3557, + "step": 144560 + }, + { + "epoch": 0.5840810934198459, + "grad_norm": 1003.27392578125, + "learning_rate": 1.802888854577645e-05, + "loss": 85.5891, + "step": 144570 + }, + { + "epoch": 0.5841214946852136, + "grad_norm": 727.3766479492188, + "learning_rate": 1.802610963347714e-05, + "loss": 69.544, + "step": 144580 + }, + { + "epoch": 0.5841618959505812, + "grad_norm": 860.7554321289062, + "learning_rate": 1.8023330759659824e-05, + "loss": 91.0683, + "step": 144590 + }, + { + "epoch": 0.5842022972159489, + "grad_norm": 747.0311889648438, + "learning_rate": 1.80205519243787e-05, + "loss": 52.4011, + "step": 144600 + }, + { + "epoch": 0.5842426984813164, + "grad_norm": 1274.44482421875, + "learning_rate": 1.801777312768794e-05, + "loss": 66.446, + "step": 144610 + }, + { + "epoch": 0.584283099746684, + "grad_norm": 494.4084167480469, + "learning_rate": 1.801499436964171e-05, + "loss": 71.1086, + "step": 144620 + }, + { + "epoch": 0.5843235010120517, + "grad_norm": 717.718505859375, + "learning_rate": 1.801221565029418e-05, + "loss": 84.0222, + "step": 144630 + }, + { + "epoch": 0.5843639022774193, + "grad_norm": 424.7037658691406, + "learning_rate": 1.800943696969954e-05, + "loss": 73.1394, + "step": 144640 + }, + { + "epoch": 0.5844043035427869, + "grad_norm": 1156.7694091796875, + "learning_rate": 1.8006658327911946e-05, + "loss": 82.7744, + "step": 144650 + }, + { + "epoch": 0.5844447048081546, + "grad_norm": 1147.0478515625, + "learning_rate": 1.8003879724985577e-05, + "loss": 78.9101, + "step": 144660 + }, + { + "epoch": 0.5844851060735222, + "grad_norm": 621.8722534179688, + "learning_rate": 1.8001101160974592e-05, + "loss": 59.5338, + "step": 144670 + }, + { + "epoch": 0.5845255073388899, + "grad_norm": 925.1968994140625, + "learning_rate": 1.7998322635933177e-05, + "loss": 67.5347, + "step": 144680 + }, + { + "epoch": 0.5845659086042575, + "grad_norm": 926.365234375, + "learning_rate": 1.7995544149915495e-05, + "loss": 75.7633, + "step": 144690 + }, + { + "epoch": 0.5846063098696251, + "grad_norm": 978.8847045898438, + "learning_rate": 1.7992765702975702e-05, + "loss": 68.4061, + "step": 144700 + }, + { + "epoch": 0.5846467111349928, + "grad_norm": 1541.949462890625, + "learning_rate": 1.7989987295167983e-05, + "loss": 85.3786, + "step": 144710 + }, + { + "epoch": 0.5846871124003604, + "grad_norm": 823.48291015625, + "learning_rate": 1.7987208926546496e-05, + "loss": 57.6601, + "step": 144720 + }, + { + "epoch": 0.5847275136657281, + "grad_norm": 1333.08984375, + "learning_rate": 1.7984430597165403e-05, + "loss": 66.687, + "step": 144730 + }, + { + "epoch": 0.5847679149310956, + "grad_norm": 482.9292907714844, + "learning_rate": 1.798165230707888e-05, + "loss": 79.4282, + "step": 144740 + }, + { + "epoch": 0.5848083161964632, + "grad_norm": 713.89697265625, + "learning_rate": 1.7978874056341075e-05, + "loss": 78.433, + "step": 144750 + }, + { + "epoch": 0.5848487174618309, + "grad_norm": 670.4993286132812, + "learning_rate": 1.797609584500617e-05, + "loss": 62.4626, + "step": 144760 + }, + { + "epoch": 0.5848891187271985, + "grad_norm": 549.1185913085938, + "learning_rate": 1.7973317673128314e-05, + "loss": 88.8455, + "step": 144770 + }, + { + "epoch": 0.5849295199925661, + "grad_norm": 1020.6071166992188, + "learning_rate": 1.7970539540761666e-05, + "loss": 69.9518, + "step": 144780 + }, + { + "epoch": 0.5849699212579338, + "grad_norm": 496.9346618652344, + "learning_rate": 1.7967761447960406e-05, + "loss": 33.0423, + "step": 144790 + }, + { + "epoch": 0.5850103225233014, + "grad_norm": 509.3022155761719, + "learning_rate": 1.7964983394778677e-05, + "loss": 77.3507, + "step": 144800 + }, + { + "epoch": 0.5850507237886691, + "grad_norm": 992.3700561523438, + "learning_rate": 1.7962205381270647e-05, + "loss": 75.3283, + "step": 144810 + }, + { + "epoch": 0.5850911250540367, + "grad_norm": 1461.7364501953125, + "learning_rate": 1.7959427407490475e-05, + "loss": 102.6315, + "step": 144820 + }, + { + "epoch": 0.5851315263194043, + "grad_norm": 969.3984375, + "learning_rate": 1.7956649473492306e-05, + "loss": 67.4022, + "step": 144830 + }, + { + "epoch": 0.585171927584772, + "grad_norm": 506.9152526855469, + "learning_rate": 1.795387157933032e-05, + "loss": 104.7117, + "step": 144840 + }, + { + "epoch": 0.5852123288501396, + "grad_norm": 1165.9378662109375, + "learning_rate": 1.7951093725058657e-05, + "loss": 71.6852, + "step": 144850 + }, + { + "epoch": 0.5852527301155073, + "grad_norm": 949.6725463867188, + "learning_rate": 1.7948315910731468e-05, + "loss": 59.3072, + "step": 144860 + }, + { + "epoch": 0.5852931313808748, + "grad_norm": 628.11376953125, + "learning_rate": 1.7945538136402927e-05, + "loss": 78.5801, + "step": 144870 + }, + { + "epoch": 0.5853335326462424, + "grad_norm": 1170.85791015625, + "learning_rate": 1.794276040212717e-05, + "loss": 96.2942, + "step": 144880 + }, + { + "epoch": 0.5853739339116101, + "grad_norm": 697.0115356445312, + "learning_rate": 1.793998270795837e-05, + "loss": 71.4787, + "step": 144890 + }, + { + "epoch": 0.5854143351769777, + "grad_norm": 629.6251831054688, + "learning_rate": 1.7937205053950658e-05, + "loss": 105.1703, + "step": 144900 + }, + { + "epoch": 0.5854547364423454, + "grad_norm": 653.2718505859375, + "learning_rate": 1.7934427440158194e-05, + "loss": 67.8922, + "step": 144910 + }, + { + "epoch": 0.585495137707713, + "grad_norm": 827.2119750976562, + "learning_rate": 1.7931649866635136e-05, + "loss": 59.6107, + "step": 144920 + }, + { + "epoch": 0.5855355389730806, + "grad_norm": 765.3534545898438, + "learning_rate": 1.7928872333435628e-05, + "loss": 85.3652, + "step": 144930 + }, + { + "epoch": 0.5855759402384483, + "grad_norm": 1301.1514892578125, + "learning_rate": 1.7926094840613814e-05, + "loss": 87.1713, + "step": 144940 + }, + { + "epoch": 0.5856163415038159, + "grad_norm": 610.5664672851562, + "learning_rate": 1.7923317388223858e-05, + "loss": 66.9968, + "step": 144950 + }, + { + "epoch": 0.5856567427691836, + "grad_norm": 518.5410766601562, + "learning_rate": 1.792053997631989e-05, + "loss": 62.8869, + "step": 144960 + }, + { + "epoch": 0.5856971440345512, + "grad_norm": 371.25213623046875, + "learning_rate": 1.791776260495607e-05, + "loss": 63.456, + "step": 144970 + }, + { + "epoch": 0.5857375452999188, + "grad_norm": 688.3592529296875, + "learning_rate": 1.7914985274186543e-05, + "loss": 51.1553, + "step": 144980 + }, + { + "epoch": 0.5857779465652865, + "grad_norm": 560.9042358398438, + "learning_rate": 1.7912207984065446e-05, + "loss": 68.2676, + "step": 144990 + }, + { + "epoch": 0.585818347830654, + "grad_norm": 1206.9537353515625, + "learning_rate": 1.7909430734646936e-05, + "loss": 73.0278, + "step": 145000 + }, + { + "epoch": 0.5858587490960216, + "grad_norm": 653.492919921875, + "learning_rate": 1.7906653525985147e-05, + "loss": 67.3937, + "step": 145010 + }, + { + "epoch": 0.5858991503613893, + "grad_norm": 454.8931579589844, + "learning_rate": 1.7903876358134218e-05, + "loss": 58.4344, + "step": 145020 + }, + { + "epoch": 0.5859395516267569, + "grad_norm": 518.9111328125, + "learning_rate": 1.790109923114831e-05, + "loss": 64.6435, + "step": 145030 + }, + { + "epoch": 0.5859799528921246, + "grad_norm": 910.3766479492188, + "learning_rate": 1.789832214508154e-05, + "loss": 91.9562, + "step": 145040 + }, + { + "epoch": 0.5860203541574922, + "grad_norm": 455.383056640625, + "learning_rate": 1.7895545099988073e-05, + "loss": 68.9192, + "step": 145050 + }, + { + "epoch": 0.5860607554228598, + "grad_norm": 1083.9915771484375, + "learning_rate": 1.7892768095922034e-05, + "loss": 90.4845, + "step": 145060 + }, + { + "epoch": 0.5861011566882275, + "grad_norm": 366.7750244140625, + "learning_rate": 1.7889991132937567e-05, + "loss": 52.581, + "step": 145070 + }, + { + "epoch": 0.5861415579535951, + "grad_norm": 555.3107299804688, + "learning_rate": 1.7887214211088812e-05, + "loss": 97.9957, + "step": 145080 + }, + { + "epoch": 0.5861819592189628, + "grad_norm": 555.0896606445312, + "learning_rate": 1.78844373304299e-05, + "loss": 83.2203, + "step": 145090 + }, + { + "epoch": 0.5862223604843304, + "grad_norm": 722.1892700195312, + "learning_rate": 1.7881660491014966e-05, + "loss": 64.7811, + "step": 145100 + }, + { + "epoch": 0.586262761749698, + "grad_norm": 810.1192016601562, + "learning_rate": 1.7878883692898158e-05, + "loss": 80.1366, + "step": 145110 + }, + { + "epoch": 0.5863031630150657, + "grad_norm": 749.8634033203125, + "learning_rate": 1.78761069361336e-05, + "loss": 82.1193, + "step": 145120 + }, + { + "epoch": 0.5863435642804332, + "grad_norm": 1170.0008544921875, + "learning_rate": 1.7873330220775437e-05, + "loss": 89.0059, + "step": 145130 + }, + { + "epoch": 0.5863839655458009, + "grad_norm": 954.974609375, + "learning_rate": 1.7870553546877792e-05, + "loss": 83.1496, + "step": 145140 + }, + { + "epoch": 0.5864243668111685, + "grad_norm": 747.7681884765625, + "learning_rate": 1.7867776914494805e-05, + "loss": 83.2544, + "step": 145150 + }, + { + "epoch": 0.5864647680765361, + "grad_norm": 634.8065795898438, + "learning_rate": 1.7865000323680606e-05, + "loss": 75.5848, + "step": 145160 + }, + { + "epoch": 0.5865051693419038, + "grad_norm": 1184.8255615234375, + "learning_rate": 1.786222377448932e-05, + "loss": 85.4279, + "step": 145170 + }, + { + "epoch": 0.5865455706072714, + "grad_norm": 704.1266479492188, + "learning_rate": 1.785944726697508e-05, + "loss": 74.7246, + "step": 145180 + }, + { + "epoch": 0.5865859718726391, + "grad_norm": 894.9578247070312, + "learning_rate": 1.7856670801192025e-05, + "loss": 68.4385, + "step": 145190 + }, + { + "epoch": 0.5866263731380067, + "grad_norm": 686.3607788085938, + "learning_rate": 1.7853894377194267e-05, + "loss": 88.0327, + "step": 145200 + }, + { + "epoch": 0.5866667744033743, + "grad_norm": 804.268798828125, + "learning_rate": 1.7851117995035952e-05, + "loss": 78.0007, + "step": 145210 + }, + { + "epoch": 0.586707175668742, + "grad_norm": 501.18170166015625, + "learning_rate": 1.78483416547712e-05, + "loss": 48.69, + "step": 145220 + }, + { + "epoch": 0.5867475769341096, + "grad_norm": 759.3004760742188, + "learning_rate": 1.7845565356454126e-05, + "loss": 65.0538, + "step": 145230 + }, + { + "epoch": 0.5867879781994773, + "grad_norm": 829.37548828125, + "learning_rate": 1.784278910013887e-05, + "loss": 103.924, + "step": 145240 + }, + { + "epoch": 0.5868283794648448, + "grad_norm": 829.8699340820312, + "learning_rate": 1.7840012885879543e-05, + "loss": 69.9416, + "step": 145250 + }, + { + "epoch": 0.5868687807302124, + "grad_norm": 821.4551391601562, + "learning_rate": 1.7837236713730284e-05, + "loss": 72.0421, + "step": 145260 + }, + { + "epoch": 0.5869091819955801, + "grad_norm": 823.4691162109375, + "learning_rate": 1.783446058374521e-05, + "loss": 124.1585, + "step": 145270 + }, + { + "epoch": 0.5869495832609477, + "grad_norm": 738.3078002929688, + "learning_rate": 1.7831684495978435e-05, + "loss": 53.218, + "step": 145280 + }, + { + "epoch": 0.5869899845263153, + "grad_norm": 1908.673095703125, + "learning_rate": 1.7828908450484095e-05, + "loss": 77.1071, + "step": 145290 + }, + { + "epoch": 0.587030385791683, + "grad_norm": 543.4962768554688, + "learning_rate": 1.7826132447316303e-05, + "loss": 68.8561, + "step": 145300 + }, + { + "epoch": 0.5870707870570506, + "grad_norm": 738.4700317382812, + "learning_rate": 1.7823356486529168e-05, + "loss": 78.9929, + "step": 145310 + }, + { + "epoch": 0.5871111883224183, + "grad_norm": 552.8862915039062, + "learning_rate": 1.7820580568176828e-05, + "loss": 57.158, + "step": 145320 + }, + { + "epoch": 0.5871515895877859, + "grad_norm": 734.3196411132812, + "learning_rate": 1.781780469231339e-05, + "loss": 64.4843, + "step": 145330 + }, + { + "epoch": 0.5871919908531535, + "grad_norm": 1125.4459228515625, + "learning_rate": 1.7815028858992972e-05, + "loss": 101.3399, + "step": 145340 + }, + { + "epoch": 0.5872323921185212, + "grad_norm": 374.02874755859375, + "learning_rate": 1.7812253068269697e-05, + "loss": 72.9662, + "step": 145350 + }, + { + "epoch": 0.5872727933838888, + "grad_norm": 905.4326171875, + "learning_rate": 1.780947732019766e-05, + "loss": 72.3633, + "step": 145360 + }, + { + "epoch": 0.5873131946492565, + "grad_norm": 849.2272338867188, + "learning_rate": 1.7806701614831004e-05, + "loss": 62.703, + "step": 145370 + }, + { + "epoch": 0.587353595914624, + "grad_norm": 660.7412719726562, + "learning_rate": 1.7803925952223827e-05, + "loss": 66.7798, + "step": 145380 + }, + { + "epoch": 0.5873939971799916, + "grad_norm": 668.4657592773438, + "learning_rate": 1.7801150332430238e-05, + "loss": 49.7362, + "step": 145390 + }, + { + "epoch": 0.5874343984453593, + "grad_norm": 1178.5489501953125, + "learning_rate": 1.779837475550436e-05, + "loss": 74.5895, + "step": 145400 + }, + { + "epoch": 0.5874747997107269, + "grad_norm": 1023.1177978515625, + "learning_rate": 1.7795599221500298e-05, + "loss": 98.3769, + "step": 145410 + }, + { + "epoch": 0.5875152009760946, + "grad_norm": 656.2416381835938, + "learning_rate": 1.7792823730472167e-05, + "loss": 49.7235, + "step": 145420 + }, + { + "epoch": 0.5875556022414622, + "grad_norm": 717.4990234375, + "learning_rate": 1.7790048282474075e-05, + "loss": 69.1352, + "step": 145430 + }, + { + "epoch": 0.5875960035068298, + "grad_norm": 760.0736694335938, + "learning_rate": 1.778727287756012e-05, + "loss": 62.798, + "step": 145440 + }, + { + "epoch": 0.5876364047721975, + "grad_norm": 754.7506713867188, + "learning_rate": 1.778449751578443e-05, + "loss": 58.7358, + "step": 145450 + }, + { + "epoch": 0.5876768060375651, + "grad_norm": 2618.21337890625, + "learning_rate": 1.7781722197201098e-05, + "loss": 55.0575, + "step": 145460 + }, + { + "epoch": 0.5877172073029328, + "grad_norm": 256.0317077636719, + "learning_rate": 1.7778946921864228e-05, + "loss": 52.1169, + "step": 145470 + }, + { + "epoch": 0.5877576085683004, + "grad_norm": 1069.7344970703125, + "learning_rate": 1.777617168982794e-05, + "loss": 110.0804, + "step": 145480 + }, + { + "epoch": 0.587798009833668, + "grad_norm": 822.4365234375, + "learning_rate": 1.7773396501146326e-05, + "loss": 89.8978, + "step": 145490 + }, + { + "epoch": 0.5878384110990357, + "grad_norm": 720.0077514648438, + "learning_rate": 1.7770621355873493e-05, + "loss": 79.3296, + "step": 145500 + }, + { + "epoch": 0.5878788123644032, + "grad_norm": 807.1599731445312, + "learning_rate": 1.776784625406355e-05, + "loss": 92.1307, + "step": 145510 + }, + { + "epoch": 0.5879192136297708, + "grad_norm": 890.0990600585938, + "learning_rate": 1.7765071195770582e-05, + "loss": 60.8189, + "step": 145520 + }, + { + "epoch": 0.5879596148951385, + "grad_norm": 347.5273132324219, + "learning_rate": 1.7762296181048712e-05, + "loss": 47.307, + "step": 145530 + }, + { + "epoch": 0.5880000161605061, + "grad_norm": 246.97401428222656, + "learning_rate": 1.775952120995203e-05, + "loss": 68.0006, + "step": 145540 + }, + { + "epoch": 0.5880404174258738, + "grad_norm": 568.8831787109375, + "learning_rate": 1.7756746282534624e-05, + "loss": 42.8832, + "step": 145550 + }, + { + "epoch": 0.5880808186912414, + "grad_norm": 1793.7362060546875, + "learning_rate": 1.7753971398850614e-05, + "loss": 94.3363, + "step": 145560 + }, + { + "epoch": 0.588121219956609, + "grad_norm": 673.98974609375, + "learning_rate": 1.7751196558954083e-05, + "loss": 74.9455, + "step": 145570 + }, + { + "epoch": 0.5881616212219767, + "grad_norm": 508.4811706542969, + "learning_rate": 1.774842176289914e-05, + "loss": 50.9419, + "step": 145580 + }, + { + "epoch": 0.5882020224873443, + "grad_norm": 158.41859436035156, + "learning_rate": 1.774564701073987e-05, + "loss": 50.902, + "step": 145590 + }, + { + "epoch": 0.588242423752712, + "grad_norm": 630.345947265625, + "learning_rate": 1.7742872302530366e-05, + "loss": 105.3083, + "step": 145600 + }, + { + "epoch": 0.5882828250180796, + "grad_norm": 546.78759765625, + "learning_rate": 1.774009763832474e-05, + "loss": 60.5665, + "step": 145610 + }, + { + "epoch": 0.5883232262834472, + "grad_norm": 322.5182189941406, + "learning_rate": 1.773732301817707e-05, + "loss": 66.3719, + "step": 145620 + }, + { + "epoch": 0.5883636275488149, + "grad_norm": 622.3362426757812, + "learning_rate": 1.7734548442141443e-05, + "loss": 89.7099, + "step": 145630 + }, + { + "epoch": 0.5884040288141824, + "grad_norm": 570.447998046875, + "learning_rate": 1.773177391027197e-05, + "loss": 54.4511, + "step": 145640 + }, + { + "epoch": 0.58844443007955, + "grad_norm": 812.7616577148438, + "learning_rate": 1.7728999422622725e-05, + "loss": 97.0963, + "step": 145650 + }, + { + "epoch": 0.5884848313449177, + "grad_norm": 605.4265747070312, + "learning_rate": 1.7726224979247816e-05, + "loss": 79.1807, + "step": 145660 + }, + { + "epoch": 0.5885252326102853, + "grad_norm": 857.1360473632812, + "learning_rate": 1.772345058020132e-05, + "loss": 87.8014, + "step": 145670 + }, + { + "epoch": 0.588565633875653, + "grad_norm": 478.56475830078125, + "learning_rate": 1.772067622553732e-05, + "loss": 55.6908, + "step": 145680 + }, + { + "epoch": 0.5886060351410206, + "grad_norm": 453.3831787109375, + "learning_rate": 1.771790191530992e-05, + "loss": 73.3436, + "step": 145690 + }, + { + "epoch": 0.5886464364063883, + "grad_norm": 1316.352294921875, + "learning_rate": 1.7715127649573195e-05, + "loss": 73.4792, + "step": 145700 + }, + { + "epoch": 0.5886868376717559, + "grad_norm": 440.4425048828125, + "learning_rate": 1.7712353428381222e-05, + "loss": 87.9735, + "step": 145710 + }, + { + "epoch": 0.5887272389371235, + "grad_norm": 1067.4669189453125, + "learning_rate": 1.7709579251788106e-05, + "loss": 79.8878, + "step": 145720 + }, + { + "epoch": 0.5887676402024912, + "grad_norm": 446.2625732421875, + "learning_rate": 1.7706805119847913e-05, + "loss": 132.4472, + "step": 145730 + }, + { + "epoch": 0.5888080414678588, + "grad_norm": 1106.883056640625, + "learning_rate": 1.7704031032614743e-05, + "loss": 74.7087, + "step": 145740 + }, + { + "epoch": 0.5888484427332265, + "grad_norm": 498.2120056152344, + "learning_rate": 1.7701256990142673e-05, + "loss": 70.1246, + "step": 145750 + }, + { + "epoch": 0.5888888439985941, + "grad_norm": 581.0377197265625, + "learning_rate": 1.769848299248577e-05, + "loss": 54.5305, + "step": 145760 + }, + { + "epoch": 0.5889292452639616, + "grad_norm": 525.7845458984375, + "learning_rate": 1.769570903969814e-05, + "loss": 58.2111, + "step": 145770 + }, + { + "epoch": 0.5889696465293293, + "grad_norm": 1251.365478515625, + "learning_rate": 1.7692935131833835e-05, + "loss": 71.7591, + "step": 145780 + }, + { + "epoch": 0.5890100477946969, + "grad_norm": 1095.7332763671875, + "learning_rate": 1.7690161268946956e-05, + "loss": 102.3529, + "step": 145790 + }, + { + "epoch": 0.5890504490600645, + "grad_norm": 496.4130859375, + "learning_rate": 1.7687387451091574e-05, + "loss": 67.8657, + "step": 145800 + }, + { + "epoch": 0.5890908503254322, + "grad_norm": 393.1575622558594, + "learning_rate": 1.7684613678321754e-05, + "loss": 50.9579, + "step": 145810 + }, + { + "epoch": 0.5891312515907998, + "grad_norm": 447.20782470703125, + "learning_rate": 1.768183995069159e-05, + "loss": 81.4233, + "step": 145820 + }, + { + "epoch": 0.5891716528561675, + "grad_norm": 1058.236572265625, + "learning_rate": 1.7679066268255153e-05, + "loss": 81.4142, + "step": 145830 + }, + { + "epoch": 0.5892120541215351, + "grad_norm": 288.72247314453125, + "learning_rate": 1.7676292631066505e-05, + "loss": 70.1489, + "step": 145840 + }, + { + "epoch": 0.5892524553869027, + "grad_norm": 554.0906982421875, + "learning_rate": 1.7673519039179737e-05, + "loss": 64.4645, + "step": 145850 + }, + { + "epoch": 0.5892928566522704, + "grad_norm": 338.9024658203125, + "learning_rate": 1.76707454926489e-05, + "loss": 86.4073, + "step": 145860 + }, + { + "epoch": 0.589333257917638, + "grad_norm": 785.1167602539062, + "learning_rate": 1.7667971991528093e-05, + "loss": 85.5108, + "step": 145870 + }, + { + "epoch": 0.5893736591830057, + "grad_norm": 1078.187255859375, + "learning_rate": 1.766519853587137e-05, + "loss": 95.2095, + "step": 145880 + }, + { + "epoch": 0.5894140604483732, + "grad_norm": 442.7991638183594, + "learning_rate": 1.7662425125732795e-05, + "loss": 103.3176, + "step": 145890 + }, + { + "epoch": 0.5894544617137408, + "grad_norm": 601.4415893554688, + "learning_rate": 1.7659651761166455e-05, + "loss": 79.7564, + "step": 145900 + }, + { + "epoch": 0.5894948629791085, + "grad_norm": 608.6932983398438, + "learning_rate": 1.7656878442226405e-05, + "loss": 71.7742, + "step": 145910 + }, + { + "epoch": 0.5895352642444761, + "grad_norm": 358.49786376953125, + "learning_rate": 1.7654105168966716e-05, + "loss": 50.2679, + "step": 145920 + }, + { + "epoch": 0.5895756655098437, + "grad_norm": 874.8716430664062, + "learning_rate": 1.7651331941441454e-05, + "loss": 79.5436, + "step": 145930 + }, + { + "epoch": 0.5896160667752114, + "grad_norm": 817.2099609375, + "learning_rate": 1.7648558759704685e-05, + "loss": 81.4711, + "step": 145940 + }, + { + "epoch": 0.589656468040579, + "grad_norm": 580.0381469726562, + "learning_rate": 1.7645785623810474e-05, + "loss": 49.3006, + "step": 145950 + }, + { + "epoch": 0.5896968693059467, + "grad_norm": 338.4220275878906, + "learning_rate": 1.764301253381289e-05, + "loss": 40.5888, + "step": 145960 + }, + { + "epoch": 0.5897372705713143, + "grad_norm": 1191.3814697265625, + "learning_rate": 1.7640239489765977e-05, + "loss": 97.9339, + "step": 145970 + }, + { + "epoch": 0.589777671836682, + "grad_norm": 777.9380493164062, + "learning_rate": 1.763746649172382e-05, + "loss": 78.4636, + "step": 145980 + }, + { + "epoch": 0.5898180731020496, + "grad_norm": 1673.8741455078125, + "learning_rate": 1.763469353974047e-05, + "loss": 71.923, + "step": 145990 + }, + { + "epoch": 0.5898584743674172, + "grad_norm": 801.3467407226562, + "learning_rate": 1.7631920633869982e-05, + "loss": 72.6109, + "step": 146000 + }, + { + "epoch": 0.5898988756327849, + "grad_norm": 622.2783203125, + "learning_rate": 1.7629147774166425e-05, + "loss": 59.5957, + "step": 146010 + }, + { + "epoch": 0.5899392768981524, + "grad_norm": 601.508544921875, + "learning_rate": 1.762637496068385e-05, + "loss": 87.813, + "step": 146020 + }, + { + "epoch": 0.58997967816352, + "grad_norm": 794.74609375, + "learning_rate": 1.7623602193476322e-05, + "loss": 80.2616, + "step": 146030 + }, + { + "epoch": 0.5900200794288877, + "grad_norm": 1114.5274658203125, + "learning_rate": 1.7620829472597888e-05, + "loss": 79.7825, + "step": 146040 + }, + { + "epoch": 0.5900604806942553, + "grad_norm": 1153.5853271484375, + "learning_rate": 1.7618056798102605e-05, + "loss": 104.1345, + "step": 146050 + }, + { + "epoch": 0.590100881959623, + "grad_norm": 376.9637145996094, + "learning_rate": 1.761528417004454e-05, + "loss": 64.4012, + "step": 146060 + }, + { + "epoch": 0.5901412832249906, + "grad_norm": 1041.8194580078125, + "learning_rate": 1.7612511588477734e-05, + "loss": 66.8794, + "step": 146070 + }, + { + "epoch": 0.5901816844903582, + "grad_norm": 388.336669921875, + "learning_rate": 1.7609739053456238e-05, + "loss": 61.1805, + "step": 146080 + }, + { + "epoch": 0.5902220857557259, + "grad_norm": 464.2691650390625, + "learning_rate": 1.7606966565034117e-05, + "loss": 70.1488, + "step": 146090 + }, + { + "epoch": 0.5902624870210935, + "grad_norm": 260.5770263671875, + "learning_rate": 1.7604194123265412e-05, + "loss": 57.7043, + "step": 146100 + }, + { + "epoch": 0.5903028882864612, + "grad_norm": 876.9478759765625, + "learning_rate": 1.7601421728204177e-05, + "loss": 69.2016, + "step": 146110 + }, + { + "epoch": 0.5903432895518288, + "grad_norm": 457.0534973144531, + "learning_rate": 1.7598649379904464e-05, + "loss": 80.8823, + "step": 146120 + }, + { + "epoch": 0.5903836908171964, + "grad_norm": 1060.378662109375, + "learning_rate": 1.759587707842031e-05, + "loss": 62.144, + "step": 146130 + }, + { + "epoch": 0.5904240920825641, + "grad_norm": 775.0464477539062, + "learning_rate": 1.7593104823805773e-05, + "loss": 124.283, + "step": 146140 + }, + { + "epoch": 0.5904644933479316, + "grad_norm": 460.2830505371094, + "learning_rate": 1.75903326161149e-05, + "loss": 57.3976, + "step": 146150 + }, + { + "epoch": 0.5905048946132992, + "grad_norm": 602.5078125, + "learning_rate": 1.7587560455401726e-05, + "loss": 75.4462, + "step": 146160 + }, + { + "epoch": 0.5905452958786669, + "grad_norm": 794.4515991210938, + "learning_rate": 1.758478834172031e-05, + "loss": 67.4368, + "step": 146170 + }, + { + "epoch": 0.5905856971440345, + "grad_norm": 602.0757446289062, + "learning_rate": 1.7582016275124683e-05, + "loss": 84.6061, + "step": 146180 + }, + { + "epoch": 0.5906260984094022, + "grad_norm": 538.54736328125, + "learning_rate": 1.75792442556689e-05, + "loss": 112.4922, + "step": 146190 + }, + { + "epoch": 0.5906664996747698, + "grad_norm": 2468.697021484375, + "learning_rate": 1.7576472283406996e-05, + "loss": 89.5048, + "step": 146200 + }, + { + "epoch": 0.5907069009401374, + "grad_norm": 393.3752136230469, + "learning_rate": 1.7573700358393004e-05, + "loss": 62.5045, + "step": 146210 + }, + { + "epoch": 0.5907473022055051, + "grad_norm": 398.9742736816406, + "learning_rate": 1.757092848068098e-05, + "loss": 78.7436, + "step": 146220 + }, + { + "epoch": 0.5907877034708727, + "grad_norm": 323.978515625, + "learning_rate": 1.7568156650324956e-05, + "loss": 59.189, + "step": 146230 + }, + { + "epoch": 0.5908281047362404, + "grad_norm": 705.6653442382812, + "learning_rate": 1.7565384867378962e-05, + "loss": 65.9922, + "step": 146240 + }, + { + "epoch": 0.590868506001608, + "grad_norm": 585.89208984375, + "learning_rate": 1.7562613131897056e-05, + "loss": 60.6047, + "step": 146250 + }, + { + "epoch": 0.5909089072669756, + "grad_norm": 889.1756591796875, + "learning_rate": 1.755984144393325e-05, + "loss": 91.782, + "step": 146260 + }, + { + "epoch": 0.5909493085323433, + "grad_norm": 787.3633422851562, + "learning_rate": 1.7557069803541597e-05, + "loss": 48.8094, + "step": 146270 + }, + { + "epoch": 0.5909897097977108, + "grad_norm": 1008.7278442382812, + "learning_rate": 1.755429821077613e-05, + "loss": 69.1598, + "step": 146280 + }, + { + "epoch": 0.5910301110630785, + "grad_norm": 460.74432373046875, + "learning_rate": 1.755152666569087e-05, + "loss": 66.7781, + "step": 146290 + }, + { + "epoch": 0.5910705123284461, + "grad_norm": 720.199462890625, + "learning_rate": 1.7548755168339867e-05, + "loss": 92.3362, + "step": 146300 + }, + { + "epoch": 0.5911109135938137, + "grad_norm": 668.6510620117188, + "learning_rate": 1.7545983718777134e-05, + "loss": 73.0472, + "step": 146310 + }, + { + "epoch": 0.5911513148591814, + "grad_norm": 1309.1951904296875, + "learning_rate": 1.754321231705672e-05, + "loss": 103.5635, + "step": 146320 + }, + { + "epoch": 0.591191716124549, + "grad_norm": 427.2793884277344, + "learning_rate": 1.7540440963232645e-05, + "loss": 93.8133, + "step": 146330 + }, + { + "epoch": 0.5912321173899167, + "grad_norm": 768.4281005859375, + "learning_rate": 1.7537669657358935e-05, + "loss": 54.5935, + "step": 146340 + }, + { + "epoch": 0.5912725186552843, + "grad_norm": 972.925048828125, + "learning_rate": 1.7534898399489627e-05, + "loss": 97.7206, + "step": 146350 + }, + { + "epoch": 0.5913129199206519, + "grad_norm": 593.6405639648438, + "learning_rate": 1.7532127189678746e-05, + "loss": 69.1214, + "step": 146360 + }, + { + "epoch": 0.5913533211860196, + "grad_norm": 862.364990234375, + "learning_rate": 1.7529356027980312e-05, + "loss": 75.6045, + "step": 146370 + }, + { + "epoch": 0.5913937224513872, + "grad_norm": 735.4135131835938, + "learning_rate": 1.752658491444836e-05, + "loss": 77.0273, + "step": 146380 + }, + { + "epoch": 0.5914341237167549, + "grad_norm": 726.6267700195312, + "learning_rate": 1.7523813849136898e-05, + "loss": 98.3608, + "step": 146390 + }, + { + "epoch": 0.5914745249821224, + "grad_norm": 540.3102416992188, + "learning_rate": 1.7521042832099965e-05, + "loss": 71.4258, + "step": 146400 + }, + { + "epoch": 0.59151492624749, + "grad_norm": 1372.041748046875, + "learning_rate": 1.7518271863391585e-05, + "loss": 68.2445, + "step": 146410 + }, + { + "epoch": 0.5915553275128577, + "grad_norm": 876.8826904296875, + "learning_rate": 1.751550094306576e-05, + "loss": 91.5656, + "step": 146420 + }, + { + "epoch": 0.5915957287782253, + "grad_norm": 457.3896179199219, + "learning_rate": 1.7512730071176532e-05, + "loss": 67.4063, + "step": 146430 + }, + { + "epoch": 0.591636130043593, + "grad_norm": 1053.20751953125, + "learning_rate": 1.750995924777791e-05, + "loss": 70.3738, + "step": 146440 + }, + { + "epoch": 0.5916765313089606, + "grad_norm": 1129.4222412109375, + "learning_rate": 1.7507188472923913e-05, + "loss": 95.5725, + "step": 146450 + }, + { + "epoch": 0.5917169325743282, + "grad_norm": 727.7415161132812, + "learning_rate": 1.7504417746668567e-05, + "loss": 56.8704, + "step": 146460 + }, + { + "epoch": 0.5917573338396959, + "grad_norm": 553.1613159179688, + "learning_rate": 1.750164706906587e-05, + "loss": 60.9341, + "step": 146470 + }, + { + "epoch": 0.5917977351050635, + "grad_norm": 1069.7200927734375, + "learning_rate": 1.749887644016986e-05, + "loss": 99.09, + "step": 146480 + }, + { + "epoch": 0.5918381363704311, + "grad_norm": 566.828125, + "learning_rate": 1.7496105860034538e-05, + "loss": 151.816, + "step": 146490 + }, + { + "epoch": 0.5918785376357988, + "grad_norm": 1138.055908203125, + "learning_rate": 1.7493335328713913e-05, + "loss": 69.5627, + "step": 146500 + }, + { + "epoch": 0.5919189389011664, + "grad_norm": 215.02374267578125, + "learning_rate": 1.7490564846262018e-05, + "loss": 55.4141, + "step": 146510 + }, + { + "epoch": 0.5919593401665341, + "grad_norm": 921.4228515625, + "learning_rate": 1.7487794412732852e-05, + "loss": 97.4198, + "step": 146520 + }, + { + "epoch": 0.5919997414319016, + "grad_norm": 226.65817260742188, + "learning_rate": 1.7485024028180422e-05, + "loss": 52.253, + "step": 146530 + }, + { + "epoch": 0.5920401426972692, + "grad_norm": 559.7610473632812, + "learning_rate": 1.7482253692658748e-05, + "loss": 63.176, + "step": 146540 + }, + { + "epoch": 0.5920805439626369, + "grad_norm": 1606.0496826171875, + "learning_rate": 1.7479483406221827e-05, + "loss": 94.8073, + "step": 146550 + }, + { + "epoch": 0.5921209452280045, + "grad_norm": 942.89501953125, + "learning_rate": 1.7476713168923683e-05, + "loss": 68.0815, + "step": 146560 + }, + { + "epoch": 0.5921613464933722, + "grad_norm": 773.52587890625, + "learning_rate": 1.7473942980818315e-05, + "loss": 69.2198, + "step": 146570 + }, + { + "epoch": 0.5922017477587398, + "grad_norm": 678.4070434570312, + "learning_rate": 1.747117284195972e-05, + "loss": 82.3037, + "step": 146580 + }, + { + "epoch": 0.5922421490241074, + "grad_norm": 1182.5390625, + "learning_rate": 1.746840275240192e-05, + "loss": 117.3757, + "step": 146590 + }, + { + "epoch": 0.5922825502894751, + "grad_norm": 310.9961242675781, + "learning_rate": 1.746563271219891e-05, + "loss": 75.0537, + "step": 146600 + }, + { + "epoch": 0.5923229515548427, + "grad_norm": 813.69580078125, + "learning_rate": 1.7462862721404698e-05, + "loss": 88.4872, + "step": 146610 + }, + { + "epoch": 0.5923633528202104, + "grad_norm": 489.7834167480469, + "learning_rate": 1.7460092780073276e-05, + "loss": 65.5015, + "step": 146620 + }, + { + "epoch": 0.592403754085578, + "grad_norm": 920.3594970703125, + "learning_rate": 1.7457322888258657e-05, + "loss": 84.5881, + "step": 146630 + }, + { + "epoch": 0.5924441553509456, + "grad_norm": 931.6600341796875, + "learning_rate": 1.745455304601484e-05, + "loss": 69.4401, + "step": 146640 + }, + { + "epoch": 0.5924845566163133, + "grad_norm": 547.105224609375, + "learning_rate": 1.7451783253395823e-05, + "loss": 67.3051, + "step": 146650 + }, + { + "epoch": 0.5925249578816808, + "grad_norm": 551.242431640625, + "learning_rate": 1.7449013510455594e-05, + "loss": 90.9196, + "step": 146660 + }, + { + "epoch": 0.5925653591470484, + "grad_norm": 1166.263427734375, + "learning_rate": 1.744624381724817e-05, + "loss": 74.1566, + "step": 146670 + }, + { + "epoch": 0.5926057604124161, + "grad_norm": 1780.3721923828125, + "learning_rate": 1.744347417382753e-05, + "loss": 83.7526, + "step": 146680 + }, + { + "epoch": 0.5926461616777837, + "grad_norm": 561.63671875, + "learning_rate": 1.7440704580247677e-05, + "loss": 72.3664, + "step": 146690 + }, + { + "epoch": 0.5926865629431514, + "grad_norm": 1763.5169677734375, + "learning_rate": 1.7437935036562615e-05, + "loss": 83.9129, + "step": 146700 + }, + { + "epoch": 0.592726964208519, + "grad_norm": 464.652587890625, + "learning_rate": 1.743516554282632e-05, + "loss": 53.1704, + "step": 146710 + }, + { + "epoch": 0.5927673654738866, + "grad_norm": 894.641845703125, + "learning_rate": 1.7432396099092797e-05, + "loss": 86.845, + "step": 146720 + }, + { + "epoch": 0.5928077667392543, + "grad_norm": 690.5438232421875, + "learning_rate": 1.7429626705416036e-05, + "loss": 68.024, + "step": 146730 + }, + { + "epoch": 0.5928481680046219, + "grad_norm": 533.3489379882812, + "learning_rate": 1.742685736185002e-05, + "loss": 57.0943, + "step": 146740 + }, + { + "epoch": 0.5928885692699896, + "grad_norm": 752.5744018554688, + "learning_rate": 1.7424088068448748e-05, + "loss": 79.738, + "step": 146750 + }, + { + "epoch": 0.5929289705353572, + "grad_norm": 946.10400390625, + "learning_rate": 1.7421318825266208e-05, + "loss": 71.3252, + "step": 146760 + }, + { + "epoch": 0.5929693718007248, + "grad_norm": 405.4425964355469, + "learning_rate": 1.7418549632356374e-05, + "loss": 56.068, + "step": 146770 + }, + { + "epoch": 0.5930097730660925, + "grad_norm": 1184.525146484375, + "learning_rate": 1.7415780489773256e-05, + "loss": 68.6661, + "step": 146780 + }, + { + "epoch": 0.59305017433146, + "grad_norm": 635.7056884765625, + "learning_rate": 1.7413011397570822e-05, + "loss": 67.7103, + "step": 146790 + }, + { + "epoch": 0.5930905755968277, + "grad_norm": 1173.7064208984375, + "learning_rate": 1.7410242355803064e-05, + "loss": 69.5874, + "step": 146800 + }, + { + "epoch": 0.5931309768621953, + "grad_norm": 464.9791564941406, + "learning_rate": 1.7407473364523968e-05, + "loss": 67.0796, + "step": 146810 + }, + { + "epoch": 0.5931713781275629, + "grad_norm": 740.3363647460938, + "learning_rate": 1.7404704423787504e-05, + "loss": 88.0486, + "step": 146820 + }, + { + "epoch": 0.5932117793929306, + "grad_norm": 446.4461669921875, + "learning_rate": 1.7401935533647673e-05, + "loss": 69.1149, + "step": 146830 + }, + { + "epoch": 0.5932521806582982, + "grad_norm": 1347.965087890625, + "learning_rate": 1.7399166694158437e-05, + "loss": 73.8308, + "step": 146840 + }, + { + "epoch": 0.5932925819236659, + "grad_norm": 389.855712890625, + "learning_rate": 1.7396397905373792e-05, + "loss": 62.4153, + "step": 146850 + }, + { + "epoch": 0.5933329831890335, + "grad_norm": 537.658935546875, + "learning_rate": 1.739362916734771e-05, + "loss": 136.4589, + "step": 146860 + }, + { + "epoch": 0.5933733844544011, + "grad_norm": 491.0473327636719, + "learning_rate": 1.7390860480134172e-05, + "loss": 86.9497, + "step": 146870 + }, + { + "epoch": 0.5934137857197688, + "grad_norm": 667.936767578125, + "learning_rate": 1.738809184378715e-05, + "loss": 48.33, + "step": 146880 + }, + { + "epoch": 0.5934541869851364, + "grad_norm": 1717.7926025390625, + "learning_rate": 1.7385323258360625e-05, + "loss": 113.5553, + "step": 146890 + }, + { + "epoch": 0.593494588250504, + "grad_norm": 741.8560791015625, + "learning_rate": 1.738255472390856e-05, + "loss": 78.1057, + "step": 146900 + }, + { + "epoch": 0.5935349895158717, + "grad_norm": 995.4171142578125, + "learning_rate": 1.7379786240484943e-05, + "loss": 87.8734, + "step": 146910 + }, + { + "epoch": 0.5935753907812392, + "grad_norm": 618.7332153320312, + "learning_rate": 1.7377017808143736e-05, + "loss": 64.544, + "step": 146920 + }, + { + "epoch": 0.5936157920466069, + "grad_norm": 620.2158203125, + "learning_rate": 1.737424942693893e-05, + "loss": 52.6593, + "step": 146930 + }, + { + "epoch": 0.5936561933119745, + "grad_norm": 577.082275390625, + "learning_rate": 1.737148109692448e-05, + "loss": 88.9247, + "step": 146940 + }, + { + "epoch": 0.5936965945773421, + "grad_norm": 582.5877075195312, + "learning_rate": 1.7368712818154353e-05, + "loss": 48.6099, + "step": 146950 + }, + { + "epoch": 0.5937369958427098, + "grad_norm": 644.0592651367188, + "learning_rate": 1.7365944590682525e-05, + "loss": 72.1474, + "step": 146960 + }, + { + "epoch": 0.5937773971080774, + "grad_norm": 655.4508666992188, + "learning_rate": 1.736317641456297e-05, + "loss": 79.1985, + "step": 146970 + }, + { + "epoch": 0.5938177983734451, + "grad_norm": 3051.212890625, + "learning_rate": 1.7360408289849642e-05, + "loss": 93.9393, + "step": 146980 + }, + { + "epoch": 0.5938581996388127, + "grad_norm": 510.1966552734375, + "learning_rate": 1.735764021659652e-05, + "loss": 104.2481, + "step": 146990 + }, + { + "epoch": 0.5938986009041803, + "grad_norm": 1298.47705078125, + "learning_rate": 1.735487219485755e-05, + "loss": 65.3021, + "step": 147000 + }, + { + "epoch": 0.593939002169548, + "grad_norm": 928.4220581054688, + "learning_rate": 1.7352104224686718e-05, + "loss": 87.348, + "step": 147010 + }, + { + "epoch": 0.5939794034349156, + "grad_norm": 512.177978515625, + "learning_rate": 1.734933630613798e-05, + "loss": 74.004, + "step": 147020 + }, + { + "epoch": 0.5940198047002833, + "grad_norm": 852.3275756835938, + "learning_rate": 1.7346568439265286e-05, + "loss": 70.5542, + "step": 147030 + }, + { + "epoch": 0.5940602059656508, + "grad_norm": 572.235595703125, + "learning_rate": 1.7343800624122612e-05, + "loss": 63.9276, + "step": 147040 + }, + { + "epoch": 0.5941006072310184, + "grad_norm": 478.9875183105469, + "learning_rate": 1.7341032860763917e-05, + "loss": 84.4886, + "step": 147050 + }, + { + "epoch": 0.5941410084963861, + "grad_norm": 944.3619384765625, + "learning_rate": 1.7338265149243147e-05, + "loss": 55.3053, + "step": 147060 + }, + { + "epoch": 0.5941814097617537, + "grad_norm": 624.8078002929688, + "learning_rate": 1.7335497489614275e-05, + "loss": 65.403, + "step": 147070 + }, + { + "epoch": 0.5942218110271213, + "grad_norm": 415.78765869140625, + "learning_rate": 1.733272988193124e-05, + "loss": 66.3103, + "step": 147080 + }, + { + "epoch": 0.594262212292489, + "grad_norm": 1339.9478759765625, + "learning_rate": 1.732996232624802e-05, + "loss": 62.572, + "step": 147090 + }, + { + "epoch": 0.5943026135578566, + "grad_norm": 884.2012939453125, + "learning_rate": 1.7327194822618557e-05, + "loss": 76.0782, + "step": 147100 + }, + { + "epoch": 0.5943430148232243, + "grad_norm": 226.78929138183594, + "learning_rate": 1.73244273710968e-05, + "loss": 72.2347, + "step": 147110 + }, + { + "epoch": 0.5943834160885919, + "grad_norm": 883.3411865234375, + "learning_rate": 1.732165997173672e-05, + "loss": 69.9417, + "step": 147120 + }, + { + "epoch": 0.5944238173539595, + "grad_norm": 762.4244995117188, + "learning_rate": 1.7318892624592255e-05, + "loss": 66.2668, + "step": 147130 + }, + { + "epoch": 0.5944642186193272, + "grad_norm": 723.5715942382812, + "learning_rate": 1.7316125329717353e-05, + "loss": 69.6436, + "step": 147140 + }, + { + "epoch": 0.5945046198846948, + "grad_norm": 378.53924560546875, + "learning_rate": 1.7313358087165973e-05, + "loss": 73.5496, + "step": 147150 + }, + { + "epoch": 0.5945450211500625, + "grad_norm": 1145.11669921875, + "learning_rate": 1.7310590896992058e-05, + "loss": 65.1495, + "step": 147160 + }, + { + "epoch": 0.59458542241543, + "grad_norm": 1016.6676025390625, + "learning_rate": 1.730782375924956e-05, + "loss": 62.8379, + "step": 147170 + }, + { + "epoch": 0.5946258236807976, + "grad_norm": 505.88580322265625, + "learning_rate": 1.7305056673992425e-05, + "loss": 57.7066, + "step": 147180 + }, + { + "epoch": 0.5946662249461653, + "grad_norm": 193.69969177246094, + "learning_rate": 1.7302289641274592e-05, + "loss": 75.7668, + "step": 147190 + }, + { + "epoch": 0.5947066262115329, + "grad_norm": 995.0844116210938, + "learning_rate": 1.7299522661150018e-05, + "loss": 86.3613, + "step": 147200 + }, + { + "epoch": 0.5947470274769006, + "grad_norm": 631.1360473632812, + "learning_rate": 1.729675573367264e-05, + "loss": 55.0956, + "step": 147210 + }, + { + "epoch": 0.5947874287422682, + "grad_norm": 480.0137634277344, + "learning_rate": 1.7293988858896397e-05, + "loss": 70.3519, + "step": 147220 + }, + { + "epoch": 0.5948278300076358, + "grad_norm": 610.8419189453125, + "learning_rate": 1.729122203687524e-05, + "loss": 51.659, + "step": 147230 + }, + { + "epoch": 0.5948682312730035, + "grad_norm": 3206.985107421875, + "learning_rate": 1.7288455267663095e-05, + "loss": 55.3405, + "step": 147240 + }, + { + "epoch": 0.5949086325383711, + "grad_norm": 705.2640380859375, + "learning_rate": 1.7285688551313915e-05, + "loss": 69.3455, + "step": 147250 + }, + { + "epoch": 0.5949490338037388, + "grad_norm": 570.6190185546875, + "learning_rate": 1.7282921887881637e-05, + "loss": 74.599, + "step": 147260 + }, + { + "epoch": 0.5949894350691064, + "grad_norm": 987.9166259765625, + "learning_rate": 1.7280155277420188e-05, + "loss": 88.0392, + "step": 147270 + }, + { + "epoch": 0.595029836334474, + "grad_norm": 407.3558654785156, + "learning_rate": 1.727738871998352e-05, + "loss": 59.3372, + "step": 147280 + }, + { + "epoch": 0.5950702375998417, + "grad_norm": 1111.18115234375, + "learning_rate": 1.727462221562556e-05, + "loss": 82.8718, + "step": 147290 + }, + { + "epoch": 0.5951106388652092, + "grad_norm": 870.8165893554688, + "learning_rate": 1.7271855764400235e-05, + "loss": 68.5738, + "step": 147300 + }, + { + "epoch": 0.5951510401305768, + "grad_norm": 1145.36865234375, + "learning_rate": 1.7269089366361493e-05, + "loss": 78.1963, + "step": 147310 + }, + { + "epoch": 0.5951914413959445, + "grad_norm": 426.14080810546875, + "learning_rate": 1.7266323021563257e-05, + "loss": 73.0056, + "step": 147320 + }, + { + "epoch": 0.5952318426613121, + "grad_norm": 597.8048095703125, + "learning_rate": 1.7263556730059467e-05, + "loss": 66.0293, + "step": 147330 + }, + { + "epoch": 0.5952722439266798, + "grad_norm": 1036.9482421875, + "learning_rate": 1.7260790491904045e-05, + "loss": 73.7305, + "step": 147340 + }, + { + "epoch": 0.5953126451920474, + "grad_norm": 974.8992919921875, + "learning_rate": 1.7258024307150917e-05, + "loss": 91.319, + "step": 147350 + }, + { + "epoch": 0.595353046457415, + "grad_norm": 269.26031494140625, + "learning_rate": 1.725525817585402e-05, + "loss": 59.2016, + "step": 147360 + }, + { + "epoch": 0.5953934477227827, + "grad_norm": 520.81396484375, + "learning_rate": 1.7252492098067275e-05, + "loss": 84.1744, + "step": 147370 + }, + { + "epoch": 0.5954338489881503, + "grad_norm": 395.3334045410156, + "learning_rate": 1.7249726073844617e-05, + "loss": 77.6984, + "step": 147380 + }, + { + "epoch": 0.595474250253518, + "grad_norm": 907.1693725585938, + "learning_rate": 1.7246960103239967e-05, + "loss": 55.7083, + "step": 147390 + }, + { + "epoch": 0.5955146515188856, + "grad_norm": 497.7479553222656, + "learning_rate": 1.724419418630724e-05, + "loss": 71.6253, + "step": 147400 + }, + { + "epoch": 0.5955550527842532, + "grad_norm": 2011.2945556640625, + "learning_rate": 1.724142832310037e-05, + "loss": 73.5244, + "step": 147410 + }, + { + "epoch": 0.5955954540496209, + "grad_norm": 528.300537109375, + "learning_rate": 1.723866251367328e-05, + "loss": 62.3247, + "step": 147420 + }, + { + "epoch": 0.5956358553149884, + "grad_norm": 1009.927978515625, + "learning_rate": 1.7235896758079873e-05, + "loss": 64.5913, + "step": 147430 + }, + { + "epoch": 0.5956762565803561, + "grad_norm": 520.3082275390625, + "learning_rate": 1.7233131056374092e-05, + "loss": 109.698, + "step": 147440 + }, + { + "epoch": 0.5957166578457237, + "grad_norm": 538.5592651367188, + "learning_rate": 1.7230365408609837e-05, + "loss": 55.0315, + "step": 147450 + }, + { + "epoch": 0.5957570591110913, + "grad_norm": 1443.870361328125, + "learning_rate": 1.722759981484104e-05, + "loss": 77.413, + "step": 147460 + }, + { + "epoch": 0.595797460376459, + "grad_norm": 975.79150390625, + "learning_rate": 1.7224834275121615e-05, + "loss": 68.1835, + "step": 147470 + }, + { + "epoch": 0.5958378616418266, + "grad_norm": 976.2315673828125, + "learning_rate": 1.722206878950547e-05, + "loss": 49.298, + "step": 147480 + }, + { + "epoch": 0.5958782629071943, + "grad_norm": 593.62353515625, + "learning_rate": 1.721930335804653e-05, + "loss": 76.0799, + "step": 147490 + }, + { + "epoch": 0.5959186641725619, + "grad_norm": 784.1069946289062, + "learning_rate": 1.72165379807987e-05, + "loss": 81.8833, + "step": 147500 + }, + { + "epoch": 0.5959590654379295, + "grad_norm": 773.3692016601562, + "learning_rate": 1.7213772657815885e-05, + "loss": 75.2865, + "step": 147510 + }, + { + "epoch": 0.5959994667032972, + "grad_norm": 257.25958251953125, + "learning_rate": 1.721100738915202e-05, + "loss": 58.2252, + "step": 147520 + }, + { + "epoch": 0.5960398679686648, + "grad_norm": 1096.3355712890625, + "learning_rate": 1.7208242174860988e-05, + "loss": 92.7476, + "step": 147530 + }, + { + "epoch": 0.5960802692340325, + "grad_norm": 613.7579345703125, + "learning_rate": 1.7205477014996722e-05, + "loss": 49.1383, + "step": 147540 + }, + { + "epoch": 0.5961206704994001, + "grad_norm": 236.681396484375, + "learning_rate": 1.7202711909613124e-05, + "loss": 62.9969, + "step": 147550 + }, + { + "epoch": 0.5961610717647676, + "grad_norm": 293.57647705078125, + "learning_rate": 1.719994685876409e-05, + "loss": 65.2842, + "step": 147560 + }, + { + "epoch": 0.5962014730301353, + "grad_norm": 527.6851196289062, + "learning_rate": 1.7197181862503536e-05, + "loss": 58.2052, + "step": 147570 + }, + { + "epoch": 0.5962418742955029, + "grad_norm": 689.8975830078125, + "learning_rate": 1.7194416920885363e-05, + "loss": 65.4701, + "step": 147580 + }, + { + "epoch": 0.5962822755608705, + "grad_norm": 453.6103820800781, + "learning_rate": 1.719165203396348e-05, + "loss": 68.4591, + "step": 147590 + }, + { + "epoch": 0.5963226768262382, + "grad_norm": 459.6171569824219, + "learning_rate": 1.7188887201791785e-05, + "loss": 74.3295, + "step": 147600 + }, + { + "epoch": 0.5963630780916058, + "grad_norm": 683.1412963867188, + "learning_rate": 1.7186122424424173e-05, + "loss": 69.6316, + "step": 147610 + }, + { + "epoch": 0.5964034793569735, + "grad_norm": 740.3372802734375, + "learning_rate": 1.7183357701914565e-05, + "loss": 55.2786, + "step": 147620 + }, + { + "epoch": 0.5964438806223411, + "grad_norm": 616.0841064453125, + "learning_rate": 1.7180593034316846e-05, + "loss": 82.9074, + "step": 147630 + }, + { + "epoch": 0.5964842818877087, + "grad_norm": 407.3956604003906, + "learning_rate": 1.717782842168491e-05, + "loss": 63.37, + "step": 147640 + }, + { + "epoch": 0.5965246831530764, + "grad_norm": 1087.0396728515625, + "learning_rate": 1.7175063864072668e-05, + "loss": 84.1552, + "step": 147650 + }, + { + "epoch": 0.596565084418444, + "grad_norm": 273.6052551269531, + "learning_rate": 1.7172299361534017e-05, + "loss": 58.5531, + "step": 147660 + }, + { + "epoch": 0.5966054856838117, + "grad_norm": 615.1221313476562, + "learning_rate": 1.7169534914122834e-05, + "loss": 57.3368, + "step": 147670 + }, + { + "epoch": 0.5966458869491792, + "grad_norm": 1619.8370361328125, + "learning_rate": 1.7166770521893033e-05, + "loss": 99.3724, + "step": 147680 + }, + { + "epoch": 0.5966862882145468, + "grad_norm": 572.9837646484375, + "learning_rate": 1.716400618489849e-05, + "loss": 44.5536, + "step": 147690 + }, + { + "epoch": 0.5967266894799145, + "grad_norm": 718.277099609375, + "learning_rate": 1.7161241903193112e-05, + "loss": 73.5003, + "step": 147700 + }, + { + "epoch": 0.5967670907452821, + "grad_norm": 868.3510131835938, + "learning_rate": 1.7158477676830793e-05, + "loss": 109.1606, + "step": 147710 + }, + { + "epoch": 0.5968074920106498, + "grad_norm": 816.0554809570312, + "learning_rate": 1.7155713505865398e-05, + "loss": 67.9373, + "step": 147720 + }, + { + "epoch": 0.5968478932760174, + "grad_norm": 924.0003662109375, + "learning_rate": 1.7152949390350845e-05, + "loss": 70.331, + "step": 147730 + }, + { + "epoch": 0.596888294541385, + "grad_norm": 513.5234985351562, + "learning_rate": 1.7150185330341006e-05, + "loss": 63.7445, + "step": 147740 + }, + { + "epoch": 0.5969286958067527, + "grad_norm": 1347.4267578125, + "learning_rate": 1.714742132588977e-05, + "loss": 88.9849, + "step": 147750 + }, + { + "epoch": 0.5969690970721203, + "grad_norm": 389.75244140625, + "learning_rate": 1.7144657377051025e-05, + "loss": 92.9308, + "step": 147760 + }, + { + "epoch": 0.597009498337488, + "grad_norm": 1425.78662109375, + "learning_rate": 1.714189348387865e-05, + "loss": 85.084, + "step": 147770 + }, + { + "epoch": 0.5970498996028556, + "grad_norm": 555.8831787109375, + "learning_rate": 1.713912964642654e-05, + "loss": 75.6753, + "step": 147780 + }, + { + "epoch": 0.5970903008682232, + "grad_norm": 1471.3504638671875, + "learning_rate": 1.713636586474857e-05, + "loss": 80.0551, + "step": 147790 + }, + { + "epoch": 0.5971307021335909, + "grad_norm": 938.2869873046875, + "learning_rate": 1.7133602138898614e-05, + "loss": 63.268, + "step": 147800 + }, + { + "epoch": 0.5971711033989584, + "grad_norm": 374.8011474609375, + "learning_rate": 1.7130838468930568e-05, + "loss": 73.2149, + "step": 147810 + }, + { + "epoch": 0.597211504664326, + "grad_norm": 443.7825012207031, + "learning_rate": 1.7128074854898304e-05, + "loss": 67.7705, + "step": 147820 + }, + { + "epoch": 0.5972519059296937, + "grad_norm": 933.133544921875, + "learning_rate": 1.7125311296855694e-05, + "loss": 71.3835, + "step": 147830 + }, + { + "epoch": 0.5972923071950613, + "grad_norm": 1244.5897216796875, + "learning_rate": 1.712254779485662e-05, + "loss": 88.3013, + "step": 147840 + }, + { + "epoch": 0.597332708460429, + "grad_norm": 753.3154907226562, + "learning_rate": 1.7119784348954956e-05, + "loss": 51.9022, + "step": 147850 + }, + { + "epoch": 0.5973731097257966, + "grad_norm": 709.4983520507812, + "learning_rate": 1.7117020959204586e-05, + "loss": 95.1047, + "step": 147860 + }, + { + "epoch": 0.5974135109911642, + "grad_norm": 1201.85888671875, + "learning_rate": 1.7114257625659378e-05, + "loss": 53.7885, + "step": 147870 + }, + { + "epoch": 0.5974539122565319, + "grad_norm": 1321.2769775390625, + "learning_rate": 1.7111494348373193e-05, + "loss": 65.4099, + "step": 147880 + }, + { + "epoch": 0.5974943135218995, + "grad_norm": 817.5372924804688, + "learning_rate": 1.710873112739992e-05, + "loss": 76.558, + "step": 147890 + }, + { + "epoch": 0.5975347147872672, + "grad_norm": 544.0048828125, + "learning_rate": 1.7105967962793418e-05, + "loss": 58.1795, + "step": 147900 + }, + { + "epoch": 0.5975751160526348, + "grad_norm": 2335.971435546875, + "learning_rate": 1.710320485460757e-05, + "loss": 76.8315, + "step": 147910 + }, + { + "epoch": 0.5976155173180024, + "grad_norm": 975.2866821289062, + "learning_rate": 1.7100441802896224e-05, + "loss": 66.7206, + "step": 147920 + }, + { + "epoch": 0.5976559185833701, + "grad_norm": 517.52490234375, + "learning_rate": 1.709767880771326e-05, + "loss": 68.2696, + "step": 147930 + }, + { + "epoch": 0.5976963198487376, + "grad_norm": 769.9826049804688, + "learning_rate": 1.7094915869112547e-05, + "loss": 72.8856, + "step": 147940 + }, + { + "epoch": 0.5977367211141053, + "grad_norm": 583.7584838867188, + "learning_rate": 1.7092152987147946e-05, + "loss": 72.7571, + "step": 147950 + }, + { + "epoch": 0.5977771223794729, + "grad_norm": 915.959228515625, + "learning_rate": 1.7089390161873307e-05, + "loss": 87.7981, + "step": 147960 + }, + { + "epoch": 0.5978175236448405, + "grad_norm": 463.0899658203125, + "learning_rate": 1.7086627393342518e-05, + "loss": 59.3288, + "step": 147970 + }, + { + "epoch": 0.5978579249102082, + "grad_norm": 522.1674194335938, + "learning_rate": 1.7083864681609417e-05, + "loss": 76.1139, + "step": 147980 + }, + { + "epoch": 0.5978983261755758, + "grad_norm": 1146.5792236328125, + "learning_rate": 1.7081102026727884e-05, + "loss": 106.9242, + "step": 147990 + }, + { + "epoch": 0.5979387274409435, + "grad_norm": 1191.81201171875, + "learning_rate": 1.707833942875177e-05, + "loss": 74.0179, + "step": 148000 + }, + { + "epoch": 0.5979791287063111, + "grad_norm": 1079.9111328125, + "learning_rate": 1.707557688773493e-05, + "loss": 72.7232, + "step": 148010 + }, + { + "epoch": 0.5980195299716787, + "grad_norm": 749.3920288085938, + "learning_rate": 1.7072814403731226e-05, + "loss": 80.7503, + "step": 148020 + }, + { + "epoch": 0.5980599312370464, + "grad_norm": 694.7001342773438, + "learning_rate": 1.7070051976794516e-05, + "loss": 54.186, + "step": 148030 + }, + { + "epoch": 0.598100332502414, + "grad_norm": 453.3927307128906, + "learning_rate": 1.7067289606978638e-05, + "loss": 63.7547, + "step": 148040 + }, + { + "epoch": 0.5981407337677817, + "grad_norm": 464.644287109375, + "learning_rate": 1.706452729433747e-05, + "loss": 97.2118, + "step": 148050 + }, + { + "epoch": 0.5981811350331493, + "grad_norm": 927.4556884765625, + "learning_rate": 1.7061765038924844e-05, + "loss": 79.3567, + "step": 148060 + }, + { + "epoch": 0.5982215362985168, + "grad_norm": 828.9855346679688, + "learning_rate": 1.705900284079463e-05, + "loss": 54.1809, + "step": 148070 + }, + { + "epoch": 0.5982619375638845, + "grad_norm": 289.38189697265625, + "learning_rate": 1.7056240700000673e-05, + "loss": 52.734, + "step": 148080 + }, + { + "epoch": 0.5983023388292521, + "grad_norm": 578.916259765625, + "learning_rate": 1.7053478616596812e-05, + "loss": 93.1198, + "step": 148090 + }, + { + "epoch": 0.5983427400946197, + "grad_norm": 1535.6571044921875, + "learning_rate": 1.7050716590636907e-05, + "loss": 77.5704, + "step": 148100 + }, + { + "epoch": 0.5983831413599874, + "grad_norm": 740.9009399414062, + "learning_rate": 1.7047954622174798e-05, + "loss": 104.8098, + "step": 148110 + }, + { + "epoch": 0.598423542625355, + "grad_norm": 1276.4541015625, + "learning_rate": 1.704519271126433e-05, + "loss": 73.293, + "step": 148120 + }, + { + "epoch": 0.5984639438907227, + "grad_norm": 2103.44677734375, + "learning_rate": 1.7042430857959357e-05, + "loss": 70.5448, + "step": 148130 + }, + { + "epoch": 0.5985043451560903, + "grad_norm": 1134.3985595703125, + "learning_rate": 1.7039669062313707e-05, + "loss": 69.7892, + "step": 148140 + }, + { + "epoch": 0.5985447464214579, + "grad_norm": 898.209716796875, + "learning_rate": 1.7036907324381244e-05, + "loss": 110.402, + "step": 148150 + }, + { + "epoch": 0.5985851476868256, + "grad_norm": 493.082763671875, + "learning_rate": 1.7034145644215797e-05, + "loss": 93.9417, + "step": 148160 + }, + { + "epoch": 0.5986255489521932, + "grad_norm": 559.3487548828125, + "learning_rate": 1.70313840218712e-05, + "loss": 88.1282, + "step": 148170 + }, + { + "epoch": 0.5986659502175609, + "grad_norm": 647.854248046875, + "learning_rate": 1.702862245740131e-05, + "loss": 67.1309, + "step": 148180 + }, + { + "epoch": 0.5987063514829285, + "grad_norm": 575.4199829101562, + "learning_rate": 1.7025860950859955e-05, + "loss": 88.3943, + "step": 148190 + }, + { + "epoch": 0.598746752748296, + "grad_norm": 1435.1593017578125, + "learning_rate": 1.7023099502300957e-05, + "loss": 93.3704, + "step": 148200 + }, + { + "epoch": 0.5987871540136637, + "grad_norm": 450.0898132324219, + "learning_rate": 1.7020338111778182e-05, + "loss": 59.4998, + "step": 148210 + }, + { + "epoch": 0.5988275552790313, + "grad_norm": 665.2924194335938, + "learning_rate": 1.701757677934544e-05, + "loss": 70.7557, + "step": 148220 + }, + { + "epoch": 0.598867956544399, + "grad_norm": 782.1422729492188, + "learning_rate": 1.7014815505056578e-05, + "loss": 101.6979, + "step": 148230 + }, + { + "epoch": 0.5989083578097666, + "grad_norm": 366.3778076171875, + "learning_rate": 1.7012054288965426e-05, + "loss": 88.6864, + "step": 148240 + }, + { + "epoch": 0.5989487590751342, + "grad_norm": 638.1792602539062, + "learning_rate": 1.7009293131125814e-05, + "loss": 86.5797, + "step": 148250 + }, + { + "epoch": 0.5989891603405019, + "grad_norm": 414.1332092285156, + "learning_rate": 1.7006532031591566e-05, + "loss": 95.3565, + "step": 148260 + }, + { + "epoch": 0.5990295616058695, + "grad_norm": 380.7497253417969, + "learning_rate": 1.7003770990416526e-05, + "loss": 70.6532, + "step": 148270 + }, + { + "epoch": 0.5990699628712371, + "grad_norm": 456.8861999511719, + "learning_rate": 1.7001010007654507e-05, + "loss": 65.8566, + "step": 148280 + }, + { + "epoch": 0.5991103641366048, + "grad_norm": 311.1480407714844, + "learning_rate": 1.6998249083359348e-05, + "loss": 72.0137, + "step": 148290 + }, + { + "epoch": 0.5991507654019724, + "grad_norm": 740.9043579101562, + "learning_rate": 1.699548821758486e-05, + "loss": 76.9674, + "step": 148300 + }, + { + "epoch": 0.5991911666673401, + "grad_norm": 490.6070556640625, + "learning_rate": 1.6992727410384883e-05, + "loss": 57.8674, + "step": 148310 + }, + { + "epoch": 0.5992315679327076, + "grad_norm": 309.7311706542969, + "learning_rate": 1.6989966661813234e-05, + "loss": 77.5289, + "step": 148320 + }, + { + "epoch": 0.5992719691980752, + "grad_norm": 713.9699096679688, + "learning_rate": 1.6987205971923723e-05, + "loss": 58.6614, + "step": 148330 + }, + { + "epoch": 0.5993123704634429, + "grad_norm": 939.8273315429688, + "learning_rate": 1.6984445340770194e-05, + "loss": 41.4446, + "step": 148340 + }, + { + "epoch": 0.5993527717288105, + "grad_norm": 749.8041381835938, + "learning_rate": 1.6981684768406453e-05, + "loss": 70.7726, + "step": 148350 + }, + { + "epoch": 0.5993931729941782, + "grad_norm": 714.794677734375, + "learning_rate": 1.6978924254886325e-05, + "loss": 74.9261, + "step": 148360 + }, + { + "epoch": 0.5994335742595458, + "grad_norm": 620.7046508789062, + "learning_rate": 1.697616380026362e-05, + "loss": 74.4319, + "step": 148370 + }, + { + "epoch": 0.5994739755249134, + "grad_norm": 672.2181396484375, + "learning_rate": 1.6973403404592156e-05, + "loss": 59.1693, + "step": 148380 + }, + { + "epoch": 0.5995143767902811, + "grad_norm": 553.2500610351562, + "learning_rate": 1.6970643067925755e-05, + "loss": 88.7304, + "step": 148390 + }, + { + "epoch": 0.5995547780556487, + "grad_norm": 943.9002075195312, + "learning_rate": 1.6967882790318228e-05, + "loss": 74.264, + "step": 148400 + }, + { + "epoch": 0.5995951793210164, + "grad_norm": 733.78173828125, + "learning_rate": 1.696512257182338e-05, + "loss": 79.6656, + "step": 148410 + }, + { + "epoch": 0.599635580586384, + "grad_norm": 902.254150390625, + "learning_rate": 1.6962362412495035e-05, + "loss": 73.0769, + "step": 148420 + }, + { + "epoch": 0.5996759818517516, + "grad_norm": 2017.380126953125, + "learning_rate": 1.6959602312387e-05, + "loss": 77.2503, + "step": 148430 + }, + { + "epoch": 0.5997163831171193, + "grad_norm": 551.3301391601562, + "learning_rate": 1.695684227155308e-05, + "loss": 53.7849, + "step": 148440 + }, + { + "epoch": 0.5997567843824868, + "grad_norm": 451.1525573730469, + "learning_rate": 1.695408229004709e-05, + "loss": 85.2148, + "step": 148450 + }, + { + "epoch": 0.5997971856478544, + "grad_norm": 517.2169799804688, + "learning_rate": 1.695132236792283e-05, + "loss": 73.9456, + "step": 148460 + }, + { + "epoch": 0.5998375869132221, + "grad_norm": 477.7053527832031, + "learning_rate": 1.694856250523411e-05, + "loss": 65.9845, + "step": 148470 + }, + { + "epoch": 0.5998779881785897, + "grad_norm": 431.2756652832031, + "learning_rate": 1.6945802702034742e-05, + "loss": 67.5896, + "step": 148480 + }, + { + "epoch": 0.5999183894439574, + "grad_norm": 557.8097534179688, + "learning_rate": 1.694304295837851e-05, + "loss": 84.375, + "step": 148490 + }, + { + "epoch": 0.599958790709325, + "grad_norm": 962.8433227539062, + "learning_rate": 1.694028327431924e-05, + "loss": 60.8197, + "step": 148500 + }, + { + "epoch": 0.5999991919746926, + "grad_norm": 554.6680908203125, + "learning_rate": 1.693752364991072e-05, + "loss": 62.773, + "step": 148510 + }, + { + "epoch": 0.6000395932400603, + "grad_norm": 722.2630004882812, + "learning_rate": 1.6934764085206755e-05, + "loss": 136.4827, + "step": 148520 + }, + { + "epoch": 0.6000799945054279, + "grad_norm": 610.8651123046875, + "learning_rate": 1.6932004580261144e-05, + "loss": 79.0329, + "step": 148530 + }, + { + "epoch": 0.6001203957707956, + "grad_norm": 601.0133666992188, + "learning_rate": 1.692924513512767e-05, + "loss": 57.669, + "step": 148540 + }, + { + "epoch": 0.6001607970361632, + "grad_norm": 750.1552124023438, + "learning_rate": 1.692648574986016e-05, + "loss": 66.805, + "step": 148550 + }, + { + "epoch": 0.6002011983015308, + "grad_norm": 943.43115234375, + "learning_rate": 1.6923726424512384e-05, + "loss": 104.3416, + "step": 148560 + }, + { + "epoch": 0.6002415995668985, + "grad_norm": 214.06201171875, + "learning_rate": 1.692096715913814e-05, + "loss": 53.0416, + "step": 148570 + }, + { + "epoch": 0.600282000832266, + "grad_norm": 590.0076293945312, + "learning_rate": 1.6918207953791238e-05, + "loss": 70.425, + "step": 148580 + }, + { + "epoch": 0.6003224020976337, + "grad_norm": 745.4481201171875, + "learning_rate": 1.691544880852545e-05, + "loss": 93.5681, + "step": 148590 + }, + { + "epoch": 0.6003628033630013, + "grad_norm": 656.1057739257812, + "learning_rate": 1.691268972339458e-05, + "loss": 94.9707, + "step": 148600 + }, + { + "epoch": 0.6004032046283689, + "grad_norm": 1634.199462890625, + "learning_rate": 1.6909930698452412e-05, + "loss": 94.3945, + "step": 148610 + }, + { + "epoch": 0.6004436058937366, + "grad_norm": 898.1805419921875, + "learning_rate": 1.6907171733752733e-05, + "loss": 76.5257, + "step": 148620 + }, + { + "epoch": 0.6004840071591042, + "grad_norm": 976.9075317382812, + "learning_rate": 1.6904412829349342e-05, + "loss": 58.0059, + "step": 148630 + }, + { + "epoch": 0.6005244084244719, + "grad_norm": 444.93487548828125, + "learning_rate": 1.690165398529601e-05, + "loss": 60.5809, + "step": 148640 + }, + { + "epoch": 0.6005648096898395, + "grad_norm": 515.6249389648438, + "learning_rate": 1.6898895201646525e-05, + "loss": 62.9544, + "step": 148650 + }, + { + "epoch": 0.6006052109552071, + "grad_norm": 886.9664306640625, + "learning_rate": 1.6896136478454682e-05, + "loss": 50.5031, + "step": 148660 + }, + { + "epoch": 0.6006456122205748, + "grad_norm": 642.3487548828125, + "learning_rate": 1.6893377815774247e-05, + "loss": 79.2724, + "step": 148670 + }, + { + "epoch": 0.6006860134859424, + "grad_norm": 879.63525390625, + "learning_rate": 1.6890619213659022e-05, + "loss": 67.925, + "step": 148680 + }, + { + "epoch": 0.6007264147513101, + "grad_norm": 452.7861633300781, + "learning_rate": 1.6887860672162774e-05, + "loss": 89.25, + "step": 148690 + }, + { + "epoch": 0.6007668160166777, + "grad_norm": 192.8931884765625, + "learning_rate": 1.688510219133928e-05, + "loss": 104.1191, + "step": 148700 + }, + { + "epoch": 0.6008072172820452, + "grad_norm": 713.500244140625, + "learning_rate": 1.6882343771242327e-05, + "loss": 58.4715, + "step": 148710 + }, + { + "epoch": 0.6008476185474129, + "grad_norm": 1062.9622802734375, + "learning_rate": 1.6879585411925687e-05, + "loss": 114.1169, + "step": 148720 + }, + { + "epoch": 0.6008880198127805, + "grad_norm": 796.5606689453125, + "learning_rate": 1.6876827113443127e-05, + "loss": 86.7251, + "step": 148730 + }, + { + "epoch": 0.6009284210781481, + "grad_norm": 668.8825073242188, + "learning_rate": 1.6874068875848443e-05, + "loss": 61.1327, + "step": 148740 + }, + { + "epoch": 0.6009688223435158, + "grad_norm": 640.49755859375, + "learning_rate": 1.687131069919538e-05, + "loss": 53.2531, + "step": 148750 + }, + { + "epoch": 0.6010092236088834, + "grad_norm": 622.39111328125, + "learning_rate": 1.686855258353774e-05, + "loss": 69.0504, + "step": 148760 + }, + { + "epoch": 0.6010496248742511, + "grad_norm": 999.5111694335938, + "learning_rate": 1.6865794528929277e-05, + "loss": 90.7448, + "step": 148770 + }, + { + "epoch": 0.6010900261396187, + "grad_norm": 406.6826477050781, + "learning_rate": 1.686303653542376e-05, + "loss": 64.1658, + "step": 148780 + }, + { + "epoch": 0.6011304274049863, + "grad_norm": 1930.0606689453125, + "learning_rate": 1.686027860307496e-05, + "loss": 121.9979, + "step": 148790 + }, + { + "epoch": 0.601170828670354, + "grad_norm": 839.324951171875, + "learning_rate": 1.6857520731936654e-05, + "loss": 58.9795, + "step": 148800 + }, + { + "epoch": 0.6012112299357216, + "grad_norm": 917.3912353515625, + "learning_rate": 1.6854762922062583e-05, + "loss": 66.7212, + "step": 148810 + }, + { + "epoch": 0.6012516312010893, + "grad_norm": 320.3462219238281, + "learning_rate": 1.685200517350654e-05, + "loss": 64.1424, + "step": 148820 + }, + { + "epoch": 0.6012920324664568, + "grad_norm": 1003.1431274414062, + "learning_rate": 1.6849247486322272e-05, + "loss": 54.0899, + "step": 148830 + }, + { + "epoch": 0.6013324337318244, + "grad_norm": 744.8280639648438, + "learning_rate": 1.684648986056355e-05, + "loss": 71.261, + "step": 148840 + }, + { + "epoch": 0.6013728349971921, + "grad_norm": 498.3786315917969, + "learning_rate": 1.6843732296284133e-05, + "loss": 64.8549, + "step": 148850 + }, + { + "epoch": 0.6014132362625597, + "grad_norm": 891.4931640625, + "learning_rate": 1.6840974793537774e-05, + "loss": 104.6242, + "step": 148860 + }, + { + "epoch": 0.6014536375279274, + "grad_norm": 349.5879821777344, + "learning_rate": 1.6838217352378243e-05, + "loss": 52.6752, + "step": 148870 + }, + { + "epoch": 0.601494038793295, + "grad_norm": 897.1039428710938, + "learning_rate": 1.6835459972859284e-05, + "loss": 94.8557, + "step": 148880 + }, + { + "epoch": 0.6015344400586626, + "grad_norm": 1024.3824462890625, + "learning_rate": 1.6832702655034672e-05, + "loss": 78.1146, + "step": 148890 + }, + { + "epoch": 0.6015748413240303, + "grad_norm": 559.740478515625, + "learning_rate": 1.6829945398958152e-05, + "loss": 73.8243, + "step": 148900 + }, + { + "epoch": 0.6016152425893979, + "grad_norm": 802.4649658203125, + "learning_rate": 1.682718820468347e-05, + "loss": 115.119, + "step": 148910 + }, + { + "epoch": 0.6016556438547656, + "grad_norm": 1395.499755859375, + "learning_rate": 1.6824431072264395e-05, + "loss": 74.4616, + "step": 148920 + }, + { + "epoch": 0.6016960451201332, + "grad_norm": 850.8084106445312, + "learning_rate": 1.682167400175467e-05, + "loss": 67.5796, + "step": 148930 + }, + { + "epoch": 0.6017364463855008, + "grad_norm": 590.1466674804688, + "learning_rate": 1.6818916993208038e-05, + "loss": 106.0871, + "step": 148940 + }, + { + "epoch": 0.6017768476508685, + "grad_norm": 478.3706359863281, + "learning_rate": 1.681616004667827e-05, + "loss": 89.8742, + "step": 148950 + }, + { + "epoch": 0.601817248916236, + "grad_norm": 339.9763488769531, + "learning_rate": 1.6813403162219094e-05, + "loss": 42.36, + "step": 148960 + }, + { + "epoch": 0.6018576501816036, + "grad_norm": 748.9717407226562, + "learning_rate": 1.6810646339884266e-05, + "loss": 76.6158, + "step": 148970 + }, + { + "epoch": 0.6018980514469713, + "grad_norm": 622.640380859375, + "learning_rate": 1.6807889579727532e-05, + "loss": 71.7131, + "step": 148980 + }, + { + "epoch": 0.6019384527123389, + "grad_norm": 377.0371398925781, + "learning_rate": 1.6805132881802623e-05, + "loss": 89.7465, + "step": 148990 + }, + { + "epoch": 0.6019788539777066, + "grad_norm": 595.3945922851562, + "learning_rate": 1.6802376246163307e-05, + "loss": 74.9032, + "step": 149000 + }, + { + "epoch": 0.6020192552430742, + "grad_norm": 633.1327514648438, + "learning_rate": 1.6799619672863308e-05, + "loss": 77.8001, + "step": 149010 + }, + { + "epoch": 0.6020596565084418, + "grad_norm": 720.2255859375, + "learning_rate": 1.6796863161956363e-05, + "loss": 74.8213, + "step": 149020 + }, + { + "epoch": 0.6021000577738095, + "grad_norm": 678.586181640625, + "learning_rate": 1.679410671349623e-05, + "loss": 71.4302, + "step": 149030 + }, + { + "epoch": 0.6021404590391771, + "grad_norm": 807.252197265625, + "learning_rate": 1.6791350327536632e-05, + "loss": 80.2325, + "step": 149040 + }, + { + "epoch": 0.6021808603045448, + "grad_norm": 770.0614624023438, + "learning_rate": 1.6788594004131316e-05, + "loss": 95.0492, + "step": 149050 + }, + { + "epoch": 0.6022212615699124, + "grad_norm": 864.5762939453125, + "learning_rate": 1.6785837743334013e-05, + "loss": 57.4488, + "step": 149060 + }, + { + "epoch": 0.60226166283528, + "grad_norm": 570.93994140625, + "learning_rate": 1.678308154519845e-05, + "loss": 53.4821, + "step": 149070 + }, + { + "epoch": 0.6023020641006477, + "grad_norm": 593.1246948242188, + "learning_rate": 1.6780325409778373e-05, + "loss": 80.2378, + "step": 149080 + }, + { + "epoch": 0.6023424653660152, + "grad_norm": 584.2158203125, + "learning_rate": 1.6777569337127514e-05, + "loss": 44.884, + "step": 149090 + }, + { + "epoch": 0.6023828666313829, + "grad_norm": 690.8095092773438, + "learning_rate": 1.6774813327299592e-05, + "loss": 33.7261, + "step": 149100 + }, + { + "epoch": 0.6024232678967505, + "grad_norm": 1040.266357421875, + "learning_rate": 1.677205738034835e-05, + "loss": 54.5771, + "step": 149110 + }, + { + "epoch": 0.6024636691621181, + "grad_norm": 582.3399658203125, + "learning_rate": 1.676930149632751e-05, + "loss": 52.0308, + "step": 149120 + }, + { + "epoch": 0.6025040704274858, + "grad_norm": 590.16015625, + "learning_rate": 1.67665456752908e-05, + "loss": 70.7117, + "step": 149130 + }, + { + "epoch": 0.6025444716928534, + "grad_norm": 622.6539306640625, + "learning_rate": 1.6763789917291952e-05, + "loss": 58.24, + "step": 149140 + }, + { + "epoch": 0.602584872958221, + "grad_norm": 689.0911865234375, + "learning_rate": 1.6761034222384672e-05, + "loss": 70.9869, + "step": 149150 + }, + { + "epoch": 0.6026252742235887, + "grad_norm": 741.4791870117188, + "learning_rate": 1.675827859062271e-05, + "loss": 68.5661, + "step": 149160 + }, + { + "epoch": 0.6026656754889563, + "grad_norm": 611.5933837890625, + "learning_rate": 1.6755523022059776e-05, + "loss": 64.2361, + "step": 149170 + }, + { + "epoch": 0.602706076754324, + "grad_norm": 900.6167602539062, + "learning_rate": 1.675276751674958e-05, + "loss": 69.4712, + "step": 149180 + }, + { + "epoch": 0.6027464780196916, + "grad_norm": 652.5316162109375, + "learning_rate": 1.6750012074745863e-05, + "loss": 66.309, + "step": 149190 + }, + { + "epoch": 0.6027868792850593, + "grad_norm": 531.3875732421875, + "learning_rate": 1.674725669610233e-05, + "loss": 91.0316, + "step": 149200 + }, + { + "epoch": 0.6028272805504269, + "grad_norm": 935.9772338867188, + "learning_rate": 1.6744501380872706e-05, + "loss": 66.142, + "step": 149210 + }, + { + "epoch": 0.6028676818157944, + "grad_norm": 550.491943359375, + "learning_rate": 1.67417461291107e-05, + "loss": 65.6835, + "step": 149220 + }, + { + "epoch": 0.6029080830811621, + "grad_norm": 525.212890625, + "learning_rate": 1.6738990940870028e-05, + "loss": 84.4861, + "step": 149230 + }, + { + "epoch": 0.6029484843465297, + "grad_norm": 710.5709838867188, + "learning_rate": 1.6736235816204414e-05, + "loss": 80.8305, + "step": 149240 + }, + { + "epoch": 0.6029888856118973, + "grad_norm": 502.1382751464844, + "learning_rate": 1.6733480755167562e-05, + "loss": 70.7394, + "step": 149250 + }, + { + "epoch": 0.603029286877265, + "grad_norm": 461.452392578125, + "learning_rate": 1.6730725757813175e-05, + "loss": 58.613, + "step": 149260 + }, + { + "epoch": 0.6030696881426326, + "grad_norm": 671.01171875, + "learning_rate": 1.6727970824194982e-05, + "loss": 59.4209, + "step": 149270 + }, + { + "epoch": 0.6031100894080003, + "grad_norm": 820.2286987304688, + "learning_rate": 1.6725215954366677e-05, + "loss": 50.2212, + "step": 149280 + }, + { + "epoch": 0.6031504906733679, + "grad_norm": 890.6653442382812, + "learning_rate": 1.6722461148381976e-05, + "loss": 75.6839, + "step": 149290 + }, + { + "epoch": 0.6031908919387355, + "grad_norm": 593.6481323242188, + "learning_rate": 1.671970640629458e-05, + "loss": 95.4418, + "step": 149300 + }, + { + "epoch": 0.6032312932041032, + "grad_norm": 768.9072875976562, + "learning_rate": 1.67169517281582e-05, + "loss": 69.8285, + "step": 149310 + }, + { + "epoch": 0.6032716944694708, + "grad_norm": 555.86572265625, + "learning_rate": 1.6714197114026537e-05, + "loss": 79.0492, + "step": 149320 + }, + { + "epoch": 0.6033120957348385, + "grad_norm": 525.4534912109375, + "learning_rate": 1.671144256395329e-05, + "loss": 72.4045, + "step": 149330 + }, + { + "epoch": 0.6033524970002061, + "grad_norm": 487.48626708984375, + "learning_rate": 1.670868807799216e-05, + "loss": 82.1369, + "step": 149340 + }, + { + "epoch": 0.6033928982655736, + "grad_norm": 765.4730834960938, + "learning_rate": 1.6705933656196857e-05, + "loss": 48.2028, + "step": 149350 + }, + { + "epoch": 0.6034332995309413, + "grad_norm": 784.09326171875, + "learning_rate": 1.6703179298621065e-05, + "loss": 51.75, + "step": 149360 + }, + { + "epoch": 0.6034737007963089, + "grad_norm": 321.2127990722656, + "learning_rate": 1.6700425005318498e-05, + "loss": 71.4349, + "step": 149370 + }, + { + "epoch": 0.6035141020616765, + "grad_norm": 3002.350341796875, + "learning_rate": 1.6697670776342843e-05, + "loss": 93.2889, + "step": 149380 + }, + { + "epoch": 0.6035545033270442, + "grad_norm": 426.686767578125, + "learning_rate": 1.6694916611747793e-05, + "loss": 72.9205, + "step": 149390 + }, + { + "epoch": 0.6035949045924118, + "grad_norm": 1384.7955322265625, + "learning_rate": 1.6692162511587048e-05, + "loss": 84.4501, + "step": 149400 + }, + { + "epoch": 0.6036353058577795, + "grad_norm": 354.49786376953125, + "learning_rate": 1.6689408475914293e-05, + "loss": 86.145, + "step": 149410 + }, + { + "epoch": 0.6036757071231471, + "grad_norm": 721.7005004882812, + "learning_rate": 1.6686654504783232e-05, + "loss": 68.221, + "step": 149420 + }, + { + "epoch": 0.6037161083885147, + "grad_norm": 636.4378662109375, + "learning_rate": 1.6683900598247544e-05, + "loss": 48.1039, + "step": 149430 + }, + { + "epoch": 0.6037565096538824, + "grad_norm": 723.943359375, + "learning_rate": 1.6681146756360914e-05, + "loss": 107.5957, + "step": 149440 + }, + { + "epoch": 0.60379691091925, + "grad_norm": 615.4259643554688, + "learning_rate": 1.6678392979177044e-05, + "loss": 64.8472, + "step": 149450 + }, + { + "epoch": 0.6038373121846177, + "grad_norm": 358.4313049316406, + "learning_rate": 1.6675639266749618e-05, + "loss": 89.5187, + "step": 149460 + }, + { + "epoch": 0.6038777134499852, + "grad_norm": 768.6290283203125, + "learning_rate": 1.6672885619132305e-05, + "loss": 66.133, + "step": 149470 + }, + { + "epoch": 0.6039181147153528, + "grad_norm": 620.7903442382812, + "learning_rate": 1.6670132036378807e-05, + "loss": 58.8034, + "step": 149480 + }, + { + "epoch": 0.6039585159807205, + "grad_norm": 662.9428100585938, + "learning_rate": 1.666737851854279e-05, + "loss": 68.4493, + "step": 149490 + }, + { + "epoch": 0.6039989172460881, + "grad_norm": 511.2181396484375, + "learning_rate": 1.6664625065677957e-05, + "loss": 97.039, + "step": 149500 + }, + { + "epoch": 0.6040393185114558, + "grad_norm": 704.4388427734375, + "learning_rate": 1.666187167783797e-05, + "loss": 56.8005, + "step": 149510 + }, + { + "epoch": 0.6040797197768234, + "grad_norm": 652.1463012695312, + "learning_rate": 1.665911835507651e-05, + "loss": 76.4674, + "step": 149520 + }, + { + "epoch": 0.604120121042191, + "grad_norm": 543.5281982421875, + "learning_rate": 1.665636509744726e-05, + "loss": 64.9634, + "step": 149530 + }, + { + "epoch": 0.6041605223075587, + "grad_norm": 636.3565063476562, + "learning_rate": 1.6653611905003902e-05, + "loss": 67.793, + "step": 149540 + }, + { + "epoch": 0.6042009235729263, + "grad_norm": 1451.247314453125, + "learning_rate": 1.665085877780009e-05, + "loss": 96.7768, + "step": 149550 + }, + { + "epoch": 0.604241324838294, + "grad_norm": 735.0396728515625, + "learning_rate": 1.664810571588952e-05, + "loss": 71.1088, + "step": 149560 + }, + { + "epoch": 0.6042817261036616, + "grad_norm": 794.5078125, + "learning_rate": 1.6645352719325844e-05, + "loss": 103.3356, + "step": 149570 + }, + { + "epoch": 0.6043221273690292, + "grad_norm": 1575.6268310546875, + "learning_rate": 1.6642599788162754e-05, + "loss": 104.5613, + "step": 149580 + }, + { + "epoch": 0.6043625286343969, + "grad_norm": 1317.53173828125, + "learning_rate": 1.663984692245391e-05, + "loss": 84.682, + "step": 149590 + }, + { + "epoch": 0.6044029298997644, + "grad_norm": 332.6781311035156, + "learning_rate": 1.663709412225297e-05, + "loss": 73.7738, + "step": 149600 + }, + { + "epoch": 0.604443331165132, + "grad_norm": 781.8280639648438, + "learning_rate": 1.663434138761362e-05, + "loss": 55.8138, + "step": 149610 + }, + { + "epoch": 0.6044837324304997, + "grad_norm": 607.5905151367188, + "learning_rate": 1.6631588718589517e-05, + "loss": 61.3013, + "step": 149620 + }, + { + "epoch": 0.6045241336958673, + "grad_norm": 1418.001708984375, + "learning_rate": 1.662883611523432e-05, + "loss": 77.6554, + "step": 149630 + }, + { + "epoch": 0.604564534961235, + "grad_norm": 798.2886962890625, + "learning_rate": 1.6626083577601704e-05, + "loss": 96.2976, + "step": 149640 + }, + { + "epoch": 0.6046049362266026, + "grad_norm": 864.41259765625, + "learning_rate": 1.6623331105745323e-05, + "loss": 82.0068, + "step": 149650 + }, + { + "epoch": 0.6046453374919702, + "grad_norm": 706.48876953125, + "learning_rate": 1.6620578699718842e-05, + "loss": 64.5769, + "step": 149660 + }, + { + "epoch": 0.6046857387573379, + "grad_norm": 407.585693359375, + "learning_rate": 1.6617826359575925e-05, + "loss": 52.9222, + "step": 149670 + }, + { + "epoch": 0.6047261400227055, + "grad_norm": 477.0304260253906, + "learning_rate": 1.661507408537021e-05, + "loss": 49.4214, + "step": 149680 + }, + { + "epoch": 0.6047665412880732, + "grad_norm": 374.3786926269531, + "learning_rate": 1.661232187715538e-05, + "loss": 76.5732, + "step": 149690 + }, + { + "epoch": 0.6048069425534408, + "grad_norm": 947.44873046875, + "learning_rate": 1.660956973498508e-05, + "loss": 82.8288, + "step": 149700 + }, + { + "epoch": 0.6048473438188084, + "grad_norm": 1378.7774658203125, + "learning_rate": 1.6606817658912952e-05, + "loss": 108.1522, + "step": 149710 + }, + { + "epoch": 0.6048877450841761, + "grad_norm": 1184.6864013671875, + "learning_rate": 1.660406564899267e-05, + "loss": 65.4671, + "step": 149720 + }, + { + "epoch": 0.6049281463495436, + "grad_norm": 656.0692138671875, + "learning_rate": 1.6601313705277876e-05, + "loss": 52.786, + "step": 149730 + }, + { + "epoch": 0.6049685476149113, + "grad_norm": 535.6517944335938, + "learning_rate": 1.659856182782222e-05, + "loss": 120.2034, + "step": 149740 + }, + { + "epoch": 0.6050089488802789, + "grad_norm": 855.0171508789062, + "learning_rate": 1.6595810016679355e-05, + "loss": 91.7018, + "step": 149750 + }, + { + "epoch": 0.6050493501456465, + "grad_norm": 738.8018798828125, + "learning_rate": 1.6593058271902916e-05, + "loss": 84.59, + "step": 149760 + }, + { + "epoch": 0.6050897514110142, + "grad_norm": 789.2011108398438, + "learning_rate": 1.6590306593546573e-05, + "loss": 80.8233, + "step": 149770 + }, + { + "epoch": 0.6051301526763818, + "grad_norm": 121.0404281616211, + "learning_rate": 1.6587554981663953e-05, + "loss": 63.3022, + "step": 149780 + }, + { + "epoch": 0.6051705539417495, + "grad_norm": 557.0289306640625, + "learning_rate": 1.65848034363087e-05, + "loss": 75.7135, + "step": 149790 + }, + { + "epoch": 0.6052109552071171, + "grad_norm": 745.7527465820312, + "learning_rate": 1.658205195753447e-05, + "loss": 49.3655, + "step": 149800 + }, + { + "epoch": 0.6052513564724847, + "grad_norm": 1494.465576171875, + "learning_rate": 1.6579300545394895e-05, + "loss": 66.1569, + "step": 149810 + }, + { + "epoch": 0.6052917577378524, + "grad_norm": 1017.7464599609375, + "learning_rate": 1.6576549199943618e-05, + "loss": 67.0836, + "step": 149820 + }, + { + "epoch": 0.60533215900322, + "grad_norm": 831.3901977539062, + "learning_rate": 1.6573797921234276e-05, + "loss": 75.5788, + "step": 149830 + }, + { + "epoch": 0.6053725602685877, + "grad_norm": 673.5865478515625, + "learning_rate": 1.6571046709320498e-05, + "loss": 63.7423, + "step": 149840 + }, + { + "epoch": 0.6054129615339553, + "grad_norm": 836.1609497070312, + "learning_rate": 1.6568295564255943e-05, + "loss": 81.9902, + "step": 149850 + }, + { + "epoch": 0.6054533627993228, + "grad_norm": 834.1426391601562, + "learning_rate": 1.656554448609423e-05, + "loss": 72.4867, + "step": 149860 + }, + { + "epoch": 0.6054937640646905, + "grad_norm": 733.5240478515625, + "learning_rate": 1.656279347488899e-05, + "loss": 77.8998, + "step": 149870 + }, + { + "epoch": 0.6055341653300581, + "grad_norm": 996.1405029296875, + "learning_rate": 1.6560042530693865e-05, + "loss": 85.1212, + "step": 149880 + }, + { + "epoch": 0.6055745665954257, + "grad_norm": 897.6669311523438, + "learning_rate": 1.655729165356248e-05, + "loss": 87.4439, + "step": 149890 + }, + { + "epoch": 0.6056149678607934, + "grad_norm": 308.4402160644531, + "learning_rate": 1.655454084354847e-05, + "loss": 84.0733, + "step": 149900 + }, + { + "epoch": 0.605655369126161, + "grad_norm": 1044.991455078125, + "learning_rate": 1.6551790100705457e-05, + "loss": 85.7082, + "step": 149910 + }, + { + "epoch": 0.6056957703915287, + "grad_norm": 375.5672607421875, + "learning_rate": 1.654903942508707e-05, + "loss": 72.377, + "step": 149920 + }, + { + "epoch": 0.6057361716568963, + "grad_norm": 727.2952880859375, + "learning_rate": 1.654628881674694e-05, + "loss": 98.0163, + "step": 149930 + }, + { + "epoch": 0.605776572922264, + "grad_norm": 1685.47021484375, + "learning_rate": 1.654353827573868e-05, + "loss": 66.4229, + "step": 149940 + }, + { + "epoch": 0.6058169741876316, + "grad_norm": 600.80615234375, + "learning_rate": 1.654078780211593e-05, + "loss": 46.9567, + "step": 149950 + }, + { + "epoch": 0.6058573754529992, + "grad_norm": 976.967041015625, + "learning_rate": 1.65380373959323e-05, + "loss": 92.9235, + "step": 149960 + }, + { + "epoch": 0.6058977767183669, + "grad_norm": 554.918701171875, + "learning_rate": 1.6535287057241408e-05, + "loss": 75.7094, + "step": 149970 + }, + { + "epoch": 0.6059381779837345, + "grad_norm": 1187.9927978515625, + "learning_rate": 1.6532536786096885e-05, + "loss": 83.1481, + "step": 149980 + }, + { + "epoch": 0.605978579249102, + "grad_norm": 568.3271484375, + "learning_rate": 1.6529786582552345e-05, + "loss": 73.1073, + "step": 149990 + }, + { + "epoch": 0.6060189805144697, + "grad_norm": 492.823974609375, + "learning_rate": 1.6527036446661396e-05, + "loss": 86.7833, + "step": 150000 + }, + { + "epoch": 0.6060593817798373, + "grad_norm": 2989.4619140625, + "learning_rate": 1.6524286378477664e-05, + "loss": 92.766, + "step": 150010 + }, + { + "epoch": 0.606099783045205, + "grad_norm": 692.8067016601562, + "learning_rate": 1.652153637805475e-05, + "loss": 39.5081, + "step": 150020 + }, + { + "epoch": 0.6061401843105726, + "grad_norm": 895.3536376953125, + "learning_rate": 1.6518786445446282e-05, + "loss": 76.9141, + "step": 150030 + }, + { + "epoch": 0.6061805855759402, + "grad_norm": 811.5888671875, + "learning_rate": 1.6516036580705867e-05, + "loss": 104.8454, + "step": 150040 + }, + { + "epoch": 0.6062209868413079, + "grad_norm": 492.9122009277344, + "learning_rate": 1.65132867838871e-05, + "loss": 61.2365, + "step": 150050 + }, + { + "epoch": 0.6062613881066755, + "grad_norm": 587.114990234375, + "learning_rate": 1.6510537055043615e-05, + "loss": 90.4639, + "step": 150060 + }, + { + "epoch": 0.6063017893720432, + "grad_norm": 228.83042907714844, + "learning_rate": 1.6507787394229008e-05, + "loss": 66.887, + "step": 150070 + }, + { + "epoch": 0.6063421906374108, + "grad_norm": 513.0355834960938, + "learning_rate": 1.6505037801496876e-05, + "loss": 59.7084, + "step": 150080 + }, + { + "epoch": 0.6063825919027784, + "grad_norm": 943.17333984375, + "learning_rate": 1.6502288276900834e-05, + "loss": 80.9398, + "step": 150090 + }, + { + "epoch": 0.6064229931681461, + "grad_norm": 921.53564453125, + "learning_rate": 1.6499538820494477e-05, + "loss": 90.1401, + "step": 150100 + }, + { + "epoch": 0.6064633944335136, + "grad_norm": 1107.1619873046875, + "learning_rate": 1.649678943233142e-05, + "loss": 68.4341, + "step": 150110 + }, + { + "epoch": 0.6065037956988812, + "grad_norm": 730.8287963867188, + "learning_rate": 1.649404011246526e-05, + "loss": 73.3479, + "step": 150120 + }, + { + "epoch": 0.6065441969642489, + "grad_norm": 760.67822265625, + "learning_rate": 1.6491290860949582e-05, + "loss": 61.4219, + "step": 150130 + }, + { + "epoch": 0.6065845982296165, + "grad_norm": 1854.2744140625, + "learning_rate": 1.6488541677838003e-05, + "loss": 83.0901, + "step": 150140 + }, + { + "epoch": 0.6066249994949842, + "grad_norm": 1114.8675537109375, + "learning_rate": 1.6485792563184114e-05, + "loss": 74.9487, + "step": 150150 + }, + { + "epoch": 0.6066654007603518, + "grad_norm": 531.4180908203125, + "learning_rate": 1.64830435170415e-05, + "loss": 83.4952, + "step": 150160 + }, + { + "epoch": 0.6067058020257194, + "grad_norm": 402.4681396484375, + "learning_rate": 1.6480294539463774e-05, + "loss": 55.8494, + "step": 150170 + }, + { + "epoch": 0.6067462032910871, + "grad_norm": 740.625244140625, + "learning_rate": 1.6477545630504505e-05, + "loss": 84.0706, + "step": 150180 + }, + { + "epoch": 0.6067866045564547, + "grad_norm": 446.0681457519531, + "learning_rate": 1.6474796790217307e-05, + "loss": 55.6698, + "step": 150190 + }, + { + "epoch": 0.6068270058218224, + "grad_norm": 617.6613159179688, + "learning_rate": 1.6472048018655757e-05, + "loss": 61.1605, + "step": 150200 + }, + { + "epoch": 0.60686740708719, + "grad_norm": 873.3192138671875, + "learning_rate": 1.6469299315873445e-05, + "loss": 76.6178, + "step": 150210 + }, + { + "epoch": 0.6069078083525576, + "grad_norm": 931.4111938476562, + "learning_rate": 1.646655068192396e-05, + "loss": 113.4959, + "step": 150220 + }, + { + "epoch": 0.6069482096179253, + "grad_norm": 787.4359741210938, + "learning_rate": 1.64638021168609e-05, + "loss": 69.1384, + "step": 150230 + }, + { + "epoch": 0.6069886108832928, + "grad_norm": 1279.7525634765625, + "learning_rate": 1.6461053620737825e-05, + "loss": 89.1575, + "step": 150240 + }, + { + "epoch": 0.6070290121486605, + "grad_norm": 1189.17431640625, + "learning_rate": 1.645830519360834e-05, + "loss": 68.67, + "step": 150250 + }, + { + "epoch": 0.6070694134140281, + "grad_norm": 794.5217895507812, + "learning_rate": 1.645555683552601e-05, + "loss": 81.9453, + "step": 150260 + }, + { + "epoch": 0.6071098146793957, + "grad_norm": 909.6439819335938, + "learning_rate": 1.6452808546544433e-05, + "loss": 53.9933, + "step": 150270 + }, + { + "epoch": 0.6071502159447634, + "grad_norm": 580.2351684570312, + "learning_rate": 1.645006032671718e-05, + "loss": 60.5806, + "step": 150280 + }, + { + "epoch": 0.607190617210131, + "grad_norm": 1584.9796142578125, + "learning_rate": 1.6447312176097815e-05, + "loss": 92.8484, + "step": 150290 + }, + { + "epoch": 0.6072310184754987, + "grad_norm": 639.7561645507812, + "learning_rate": 1.6444564094739944e-05, + "loss": 56.8418, + "step": 150300 + }, + { + "epoch": 0.6072714197408663, + "grad_norm": 1053.861328125, + "learning_rate": 1.644181608269712e-05, + "loss": 70.073, + "step": 150310 + }, + { + "epoch": 0.6073118210062339, + "grad_norm": 489.1802062988281, + "learning_rate": 1.6439068140022917e-05, + "loss": 67.1787, + "step": 150320 + }, + { + "epoch": 0.6073522222716016, + "grad_norm": 507.907958984375, + "learning_rate": 1.6436320266770925e-05, + "loss": 53.1605, + "step": 150330 + }, + { + "epoch": 0.6073926235369692, + "grad_norm": 675.6184692382812, + "learning_rate": 1.64335724629947e-05, + "loss": 84.0167, + "step": 150340 + }, + { + "epoch": 0.6074330248023369, + "grad_norm": 741.65673828125, + "learning_rate": 1.6430824728747816e-05, + "loss": 66.6006, + "step": 150350 + }, + { + "epoch": 0.6074734260677045, + "grad_norm": 583.593505859375, + "learning_rate": 1.6428077064083843e-05, + "loss": 68.2358, + "step": 150360 + }, + { + "epoch": 0.607513827333072, + "grad_norm": 586.6576538085938, + "learning_rate": 1.642532946905634e-05, + "loss": 54.8466, + "step": 150370 + }, + { + "epoch": 0.6075542285984397, + "grad_norm": 702.9231567382812, + "learning_rate": 1.6422581943718884e-05, + "loss": 54.1327, + "step": 150380 + }, + { + "epoch": 0.6075946298638073, + "grad_norm": 705.3573608398438, + "learning_rate": 1.6419834488125038e-05, + "loss": 116.5971, + "step": 150390 + }, + { + "epoch": 0.6076350311291749, + "grad_norm": 505.7421875, + "learning_rate": 1.6417087102328356e-05, + "loss": 81.0823, + "step": 150400 + }, + { + "epoch": 0.6076754323945426, + "grad_norm": 937.9141235351562, + "learning_rate": 1.641433978638241e-05, + "loss": 134.2539, + "step": 150410 + }, + { + "epoch": 0.6077158336599102, + "grad_norm": 345.1689453125, + "learning_rate": 1.6411592540340754e-05, + "loss": 48.0515, + "step": 150420 + }, + { + "epoch": 0.6077562349252779, + "grad_norm": 665.5704956054688, + "learning_rate": 1.640884536425695e-05, + "loss": 62.0314, + "step": 150430 + }, + { + "epoch": 0.6077966361906455, + "grad_norm": 365.30242919921875, + "learning_rate": 1.6406098258184556e-05, + "loss": 60.5775, + "step": 150440 + }, + { + "epoch": 0.6078370374560131, + "grad_norm": 1095.2176513671875, + "learning_rate": 1.6403351222177117e-05, + "loss": 73.5437, + "step": 150450 + }, + { + "epoch": 0.6078774387213808, + "grad_norm": 728.5873413085938, + "learning_rate": 1.640060425628821e-05, + "loss": 80.649, + "step": 150460 + }, + { + "epoch": 0.6079178399867484, + "grad_norm": 2438.779296875, + "learning_rate": 1.639785736057136e-05, + "loss": 86.1176, + "step": 150470 + }, + { + "epoch": 0.6079582412521161, + "grad_norm": 695.301513671875, + "learning_rate": 1.639511053508015e-05, + "loss": 61.0479, + "step": 150480 + }, + { + "epoch": 0.6079986425174837, + "grad_norm": 1362.8992919921875, + "learning_rate": 1.6392363779868118e-05, + "loss": 112.151, + "step": 150490 + }, + { + "epoch": 0.6080390437828512, + "grad_norm": 662.6505737304688, + "learning_rate": 1.6389617094988802e-05, + "loss": 62.1723, + "step": 150500 + }, + { + "epoch": 0.6080794450482189, + "grad_norm": 717.7249755859375, + "learning_rate": 1.6386870480495765e-05, + "loss": 79.5404, + "step": 150510 + }, + { + "epoch": 0.6081198463135865, + "grad_norm": 921.2769165039062, + "learning_rate": 1.638412393644255e-05, + "loss": 58.8594, + "step": 150520 + }, + { + "epoch": 0.6081602475789541, + "grad_norm": 608.341064453125, + "learning_rate": 1.638137746288269e-05, + "loss": 78.3243, + "step": 150530 + }, + { + "epoch": 0.6082006488443218, + "grad_norm": 644.6043701171875, + "learning_rate": 1.637863105986975e-05, + "loss": 54.8395, + "step": 150540 + }, + { + "epoch": 0.6082410501096894, + "grad_norm": 936.6647338867188, + "learning_rate": 1.637588472745725e-05, + "loss": 82.4412, + "step": 150550 + }, + { + "epoch": 0.6082814513750571, + "grad_norm": 812.483642578125, + "learning_rate": 1.6373138465698753e-05, + "loss": 52.8689, + "step": 150560 + }, + { + "epoch": 0.6083218526404247, + "grad_norm": 573.0987548828125, + "learning_rate": 1.6370392274647794e-05, + "loss": 82.9242, + "step": 150570 + }, + { + "epoch": 0.6083622539057923, + "grad_norm": 876.4087524414062, + "learning_rate": 1.6367646154357895e-05, + "loss": 56.19, + "step": 150580 + }, + { + "epoch": 0.60840265517116, + "grad_norm": 543.0292358398438, + "learning_rate": 1.6364900104882608e-05, + "loss": 64.5136, + "step": 150590 + }, + { + "epoch": 0.6084430564365276, + "grad_norm": 1523.5577392578125, + "learning_rate": 1.6362154126275467e-05, + "loss": 91.4029, + "step": 150600 + }, + { + "epoch": 0.6084834577018953, + "grad_norm": 1069.1248779296875, + "learning_rate": 1.635940821859e-05, + "loss": 93.77, + "step": 150610 + }, + { + "epoch": 0.6085238589672629, + "grad_norm": 475.1838073730469, + "learning_rate": 1.6356662381879747e-05, + "loss": 51.7248, + "step": 150620 + }, + { + "epoch": 0.6085642602326304, + "grad_norm": 693.1802368164062, + "learning_rate": 1.6353916616198234e-05, + "loss": 63.6237, + "step": 150630 + }, + { + "epoch": 0.6086046614979981, + "grad_norm": 1023.0438232421875, + "learning_rate": 1.6351170921598997e-05, + "loss": 98.0962, + "step": 150640 + }, + { + "epoch": 0.6086450627633657, + "grad_norm": 731.6085205078125, + "learning_rate": 1.6348425298135563e-05, + "loss": 64.242, + "step": 150650 + }, + { + "epoch": 0.6086854640287334, + "grad_norm": 752.8010864257812, + "learning_rate": 1.634567974586145e-05, + "loss": 64.4947, + "step": 150660 + }, + { + "epoch": 0.608725865294101, + "grad_norm": 930.0340576171875, + "learning_rate": 1.63429342648302e-05, + "loss": 56.4645, + "step": 150670 + }, + { + "epoch": 0.6087662665594686, + "grad_norm": 456.8484802246094, + "learning_rate": 1.6340188855095326e-05, + "loss": 59.8653, + "step": 150680 + }, + { + "epoch": 0.6088066678248363, + "grad_norm": 887.51416015625, + "learning_rate": 1.6337443516710354e-05, + "loss": 72.362, + "step": 150690 + }, + { + "epoch": 0.6088470690902039, + "grad_norm": 898.3201904296875, + "learning_rate": 1.6334698249728812e-05, + "loss": 78.6538, + "step": 150700 + }, + { + "epoch": 0.6088874703555716, + "grad_norm": 359.8312683105469, + "learning_rate": 1.63319530542042e-05, + "loss": 62.6081, + "step": 150710 + }, + { + "epoch": 0.6089278716209392, + "grad_norm": 809.5452880859375, + "learning_rate": 1.6329207930190067e-05, + "loss": 60.2922, + "step": 150720 + }, + { + "epoch": 0.6089682728863068, + "grad_norm": 707.030029296875, + "learning_rate": 1.632646287773991e-05, + "loss": 46.1342, + "step": 150730 + }, + { + "epoch": 0.6090086741516745, + "grad_norm": 613.5623779296875, + "learning_rate": 1.6323717896907244e-05, + "loss": 64.3813, + "step": 150740 + }, + { + "epoch": 0.609049075417042, + "grad_norm": 410.35980224609375, + "learning_rate": 1.63209729877456e-05, + "loss": 51.4256, + "step": 150750 + }, + { + "epoch": 0.6090894766824096, + "grad_norm": 447.3254089355469, + "learning_rate": 1.631822815030848e-05, + "loss": 66.4962, + "step": 150760 + }, + { + "epoch": 0.6091298779477773, + "grad_norm": 423.9212646484375, + "learning_rate": 1.6315483384649395e-05, + "loss": 71.7863, + "step": 150770 + }, + { + "epoch": 0.6091702792131449, + "grad_norm": 833.4757080078125, + "learning_rate": 1.631273869082186e-05, + "loss": 101.3871, + "step": 150780 + }, + { + "epoch": 0.6092106804785126, + "grad_norm": 625.0147094726562, + "learning_rate": 1.6309994068879376e-05, + "loss": 66.7686, + "step": 150790 + }, + { + "epoch": 0.6092510817438802, + "grad_norm": 1007.3116455078125, + "learning_rate": 1.6307249518875466e-05, + "loss": 75.2715, + "step": 150800 + }, + { + "epoch": 0.6092914830092478, + "grad_norm": 1101.6529541015625, + "learning_rate": 1.6304505040863624e-05, + "loss": 63.6166, + "step": 150810 + }, + { + "epoch": 0.6093318842746155, + "grad_norm": 655.2672729492188, + "learning_rate": 1.6301760634897355e-05, + "loss": 49.9157, + "step": 150820 + }, + { + "epoch": 0.6093722855399831, + "grad_norm": 1036.1771240234375, + "learning_rate": 1.629901630103017e-05, + "loss": 74.8592, + "step": 150830 + }, + { + "epoch": 0.6094126868053508, + "grad_norm": 620.0659790039062, + "learning_rate": 1.6296272039315575e-05, + "loss": 106.7273, + "step": 150840 + }, + { + "epoch": 0.6094530880707184, + "grad_norm": 425.4994812011719, + "learning_rate": 1.6293527849807054e-05, + "loss": 64.1357, + "step": 150850 + }, + { + "epoch": 0.609493489336086, + "grad_norm": 535.3873901367188, + "learning_rate": 1.629078373255812e-05, + "loss": 50.7437, + "step": 150860 + }, + { + "epoch": 0.6095338906014537, + "grad_norm": 1370.73876953125, + "learning_rate": 1.6288039687622262e-05, + "loss": 46.4284, + "step": 150870 + }, + { + "epoch": 0.6095742918668212, + "grad_norm": 400.05181884765625, + "learning_rate": 1.6285295715052987e-05, + "loss": 50.9298, + "step": 150880 + }, + { + "epoch": 0.6096146931321889, + "grad_norm": 2033.00537109375, + "learning_rate": 1.6282551814903783e-05, + "loss": 77.3396, + "step": 150890 + }, + { + "epoch": 0.6096550943975565, + "grad_norm": 847.4696044921875, + "learning_rate": 1.6279807987228137e-05, + "loss": 88.2188, + "step": 150900 + }, + { + "epoch": 0.6096954956629241, + "grad_norm": 924.1599731445312, + "learning_rate": 1.6277064232079562e-05, + "loss": 90.438, + "step": 150910 + }, + { + "epoch": 0.6097358969282918, + "grad_norm": 1167.26025390625, + "learning_rate": 1.627432054951153e-05, + "loss": 75.7736, + "step": 150920 + }, + { + "epoch": 0.6097762981936594, + "grad_norm": 727.8522338867188, + "learning_rate": 1.6271576939577537e-05, + "loss": 78.2022, + "step": 150930 + }, + { + "epoch": 0.6098166994590271, + "grad_norm": 841.5473022460938, + "learning_rate": 1.6268833402331075e-05, + "loss": 84.735, + "step": 150940 + }, + { + "epoch": 0.6098571007243947, + "grad_norm": 513.004638671875, + "learning_rate": 1.6266089937825623e-05, + "loss": 93.0914, + "step": 150950 + }, + { + "epoch": 0.6098975019897623, + "grad_norm": 604.3025512695312, + "learning_rate": 1.6263346546114674e-05, + "loss": 79.3364, + "step": 150960 + }, + { + "epoch": 0.60993790325513, + "grad_norm": 964.5348510742188, + "learning_rate": 1.6260603227251706e-05, + "loss": 58.5438, + "step": 150970 + }, + { + "epoch": 0.6099783045204976, + "grad_norm": 489.9312744140625, + "learning_rate": 1.6257859981290197e-05, + "loss": 61.2081, + "step": 150980 + }, + { + "epoch": 0.6100187057858653, + "grad_norm": 561.61181640625, + "learning_rate": 1.6255116808283643e-05, + "loss": 78.5013, + "step": 150990 + }, + { + "epoch": 0.6100591070512329, + "grad_norm": 457.0298156738281, + "learning_rate": 1.6252373708285505e-05, + "loss": 78.8762, + "step": 151000 + }, + { + "epoch": 0.6100995083166004, + "grad_norm": 708.0266723632812, + "learning_rate": 1.624963068134928e-05, + "loss": 71.8332, + "step": 151010 + }, + { + "epoch": 0.6101399095819681, + "grad_norm": 935.8271484375, + "learning_rate": 1.6246887727528436e-05, + "loss": 67.6604, + "step": 151020 + }, + { + "epoch": 0.6101803108473357, + "grad_norm": 594.5115356445312, + "learning_rate": 1.6244144846876442e-05, + "loss": 78.7534, + "step": 151030 + }, + { + "epoch": 0.6102207121127033, + "grad_norm": 791.379150390625, + "learning_rate": 1.6241402039446784e-05, + "loss": 66.3081, + "step": 151040 + }, + { + "epoch": 0.610261113378071, + "grad_norm": 760.36181640625, + "learning_rate": 1.6238659305292926e-05, + "loss": 70.7984, + "step": 151050 + }, + { + "epoch": 0.6103015146434386, + "grad_norm": 356.96112060546875, + "learning_rate": 1.6235916644468337e-05, + "loss": 48.0359, + "step": 151060 + }, + { + "epoch": 0.6103419159088063, + "grad_norm": 1239.9888916015625, + "learning_rate": 1.6233174057026498e-05, + "loss": 71.5684, + "step": 151070 + }, + { + "epoch": 0.6103823171741739, + "grad_norm": 753.3863525390625, + "learning_rate": 1.623043154302086e-05, + "loss": 70.0568, + "step": 151080 + }, + { + "epoch": 0.6104227184395415, + "grad_norm": 963.7362060546875, + "learning_rate": 1.622768910250491e-05, + "loss": 81.794, + "step": 151090 + }, + { + "epoch": 0.6104631197049092, + "grad_norm": 559.18994140625, + "learning_rate": 1.6224946735532102e-05, + "loss": 63.9879, + "step": 151100 + }, + { + "epoch": 0.6105035209702768, + "grad_norm": 436.7294006347656, + "learning_rate": 1.6222204442155894e-05, + "loss": 70.0454, + "step": 151110 + }, + { + "epoch": 0.6105439222356445, + "grad_norm": 450.8589782714844, + "learning_rate": 1.6219462222429763e-05, + "loss": 69.2345, + "step": 151120 + }, + { + "epoch": 0.6105843235010121, + "grad_norm": 1086.546875, + "learning_rate": 1.621672007640716e-05, + "loss": 86.0063, + "step": 151130 + }, + { + "epoch": 0.6106247247663796, + "grad_norm": 518.0477294921875, + "learning_rate": 1.621397800414154e-05, + "loss": 64.4737, + "step": 151140 + }, + { + "epoch": 0.6106651260317473, + "grad_norm": 352.8130187988281, + "learning_rate": 1.621123600568638e-05, + "loss": 70.6507, + "step": 151150 + }, + { + "epoch": 0.6107055272971149, + "grad_norm": 1103.6044921875, + "learning_rate": 1.620849408109511e-05, + "loss": 84.6331, + "step": 151160 + }, + { + "epoch": 0.6107459285624826, + "grad_norm": 712.4826049804688, + "learning_rate": 1.6205752230421214e-05, + "loss": 68.8677, + "step": 151170 + }, + { + "epoch": 0.6107863298278502, + "grad_norm": 824.7449951171875, + "learning_rate": 1.6203010453718126e-05, + "loss": 87.235, + "step": 151180 + }, + { + "epoch": 0.6108267310932178, + "grad_norm": 852.8096313476562, + "learning_rate": 1.6200268751039304e-05, + "loss": 49.4923, + "step": 151190 + }, + { + "epoch": 0.6108671323585855, + "grad_norm": 876.839111328125, + "learning_rate": 1.6197527122438202e-05, + "loss": 87.248, + "step": 151200 + }, + { + "epoch": 0.6109075336239531, + "grad_norm": 346.9331970214844, + "learning_rate": 1.619478556796826e-05, + "loss": 83.3807, + "step": 151210 + }, + { + "epoch": 0.6109479348893208, + "grad_norm": 409.4013671875, + "learning_rate": 1.6192044087682934e-05, + "loss": 60.7418, + "step": 151220 + }, + { + "epoch": 0.6109883361546884, + "grad_norm": 394.60162353515625, + "learning_rate": 1.6189302681635673e-05, + "loss": 86.2352, + "step": 151230 + }, + { + "epoch": 0.611028737420056, + "grad_norm": 871.7366943359375, + "learning_rate": 1.618656134987991e-05, + "loss": 90.5249, + "step": 151240 + }, + { + "epoch": 0.6110691386854237, + "grad_norm": 1050.3035888671875, + "learning_rate": 1.6183820092469103e-05, + "loss": 72.2047, + "step": 151250 + }, + { + "epoch": 0.6111095399507913, + "grad_norm": 437.8131103515625, + "learning_rate": 1.6181078909456693e-05, + "loss": 58.397, + "step": 151260 + }, + { + "epoch": 0.6111499412161588, + "grad_norm": 779.978515625, + "learning_rate": 1.6178337800896105e-05, + "loss": 77.2737, + "step": 151270 + }, + { + "epoch": 0.6111903424815265, + "grad_norm": 385.6371154785156, + "learning_rate": 1.6175596766840795e-05, + "loss": 56.0035, + "step": 151280 + }, + { + "epoch": 0.6112307437468941, + "grad_norm": 573.8505859375, + "learning_rate": 1.6172855807344198e-05, + "loss": 85.3461, + "step": 151290 + }, + { + "epoch": 0.6112711450122618, + "grad_norm": 1491.432861328125, + "learning_rate": 1.617011492245974e-05, + "loss": 86.5483, + "step": 151300 + }, + { + "epoch": 0.6113115462776294, + "grad_norm": 1357.98583984375, + "learning_rate": 1.6167374112240874e-05, + "loss": 57.4247, + "step": 151310 + }, + { + "epoch": 0.611351947542997, + "grad_norm": 656.769775390625, + "learning_rate": 1.6164633376741008e-05, + "loss": 98.4912, + "step": 151320 + }, + { + "epoch": 0.6113923488083647, + "grad_norm": 726.1454467773438, + "learning_rate": 1.61618927160136e-05, + "loss": 122.0449, + "step": 151330 + }, + { + "epoch": 0.6114327500737323, + "grad_norm": 967.343505859375, + "learning_rate": 1.615915213011207e-05, + "loss": 113.7478, + "step": 151340 + }, + { + "epoch": 0.6114731513391, + "grad_norm": 486.0296325683594, + "learning_rate": 1.6156411619089835e-05, + "loss": 61.9079, + "step": 151350 + }, + { + "epoch": 0.6115135526044676, + "grad_norm": 933.3528442382812, + "learning_rate": 1.6153671183000345e-05, + "loss": 78.3986, + "step": 151360 + }, + { + "epoch": 0.6115539538698352, + "grad_norm": 736.937255859375, + "learning_rate": 1.6150930821897018e-05, + "loss": 78.2913, + "step": 151370 + }, + { + "epoch": 0.6115943551352029, + "grad_norm": 736.2245483398438, + "learning_rate": 1.6148190535833268e-05, + "loss": 62.6361, + "step": 151380 + }, + { + "epoch": 0.6116347564005704, + "grad_norm": 569.416748046875, + "learning_rate": 1.6145450324862532e-05, + "loss": 72.5269, + "step": 151390 + }, + { + "epoch": 0.611675157665938, + "grad_norm": 881.7041625976562, + "learning_rate": 1.614271018903822e-05, + "loss": 80.1721, + "step": 151400 + }, + { + "epoch": 0.6117155589313057, + "grad_norm": 732.8095092773438, + "learning_rate": 1.6139970128413765e-05, + "loss": 86.2427, + "step": 151410 + }, + { + "epoch": 0.6117559601966733, + "grad_norm": 621.7105712890625, + "learning_rate": 1.6137230143042578e-05, + "loss": 66.9497, + "step": 151420 + }, + { + "epoch": 0.611796361462041, + "grad_norm": 659.6859741210938, + "learning_rate": 1.6134490232978074e-05, + "loss": 62.7153, + "step": 151430 + }, + { + "epoch": 0.6118367627274086, + "grad_norm": 573.3911743164062, + "learning_rate": 1.613175039827368e-05, + "loss": 58.6511, + "step": 151440 + }, + { + "epoch": 0.6118771639927763, + "grad_norm": 430.47625732421875, + "learning_rate": 1.6129010638982795e-05, + "loss": 68.0534, + "step": 151450 + }, + { + "epoch": 0.6119175652581439, + "grad_norm": 428.08135986328125, + "learning_rate": 1.612627095515885e-05, + "loss": 70.3877, + "step": 151460 + }, + { + "epoch": 0.6119579665235115, + "grad_norm": 353.5169372558594, + "learning_rate": 1.6123531346855242e-05, + "loss": 47.4761, + "step": 151470 + }, + { + "epoch": 0.6119983677888792, + "grad_norm": 302.42291259765625, + "learning_rate": 1.6120791814125384e-05, + "loss": 89.9149, + "step": 151480 + }, + { + "epoch": 0.6120387690542468, + "grad_norm": 529.8429565429688, + "learning_rate": 1.6118052357022693e-05, + "loss": 79.1062, + "step": 151490 + }, + { + "epoch": 0.6120791703196145, + "grad_norm": 258.8475341796875, + "learning_rate": 1.6115312975600566e-05, + "loss": 47.251, + "step": 151500 + }, + { + "epoch": 0.6121195715849821, + "grad_norm": 819.2401123046875, + "learning_rate": 1.6112573669912407e-05, + "loss": 69.9976, + "step": 151510 + }, + { + "epoch": 0.6121599728503496, + "grad_norm": 918.7113647460938, + "learning_rate": 1.6109834440011633e-05, + "loss": 73.8965, + "step": 151520 + }, + { + "epoch": 0.6122003741157173, + "grad_norm": 133.0743865966797, + "learning_rate": 1.6107095285951638e-05, + "loss": 95.0118, + "step": 151530 + }, + { + "epoch": 0.6122407753810849, + "grad_norm": 682.49267578125, + "learning_rate": 1.6104356207785826e-05, + "loss": 65.0548, + "step": 151540 + }, + { + "epoch": 0.6122811766464525, + "grad_norm": 752.2581787109375, + "learning_rate": 1.6101617205567593e-05, + "loss": 46.5207, + "step": 151550 + }, + { + "epoch": 0.6123215779118202, + "grad_norm": 969.2288208007812, + "learning_rate": 1.6098878279350337e-05, + "loss": 42.9156, + "step": 151560 + }, + { + "epoch": 0.6123619791771878, + "grad_norm": 284.83795166015625, + "learning_rate": 1.6096139429187464e-05, + "loss": 59.6176, + "step": 151570 + }, + { + "epoch": 0.6124023804425555, + "grad_norm": 864.9432983398438, + "learning_rate": 1.6093400655132362e-05, + "loss": 69.4169, + "step": 151580 + }, + { + "epoch": 0.6124427817079231, + "grad_norm": 532.10107421875, + "learning_rate": 1.6090661957238417e-05, + "loss": 86.2737, + "step": 151590 + }, + { + "epoch": 0.6124831829732907, + "grad_norm": 823.8885498046875, + "learning_rate": 1.608792333555904e-05, + "loss": 57.7265, + "step": 151600 + }, + { + "epoch": 0.6125235842386584, + "grad_norm": 451.80810546875, + "learning_rate": 1.60851847901476e-05, + "loss": 68.2076, + "step": 151610 + }, + { + "epoch": 0.612563985504026, + "grad_norm": 780.7122192382812, + "learning_rate": 1.6082446321057508e-05, + "loss": 83.5442, + "step": 151620 + }, + { + "epoch": 0.6126043867693937, + "grad_norm": 605.3251953125, + "learning_rate": 1.6079707928342144e-05, + "loss": 42.6746, + "step": 151630 + }, + { + "epoch": 0.6126447880347613, + "grad_norm": 692.8709106445312, + "learning_rate": 1.6076969612054885e-05, + "loss": 61.5256, + "step": 151640 + }, + { + "epoch": 0.6126851893001288, + "grad_norm": 1217.9136962890625, + "learning_rate": 1.6074231372249128e-05, + "loss": 119.7272, + "step": 151650 + }, + { + "epoch": 0.6127255905654965, + "grad_norm": 439.9564514160156, + "learning_rate": 1.6071493208978252e-05, + "loss": 68.717, + "step": 151660 + }, + { + "epoch": 0.6127659918308641, + "grad_norm": 822.0838012695312, + "learning_rate": 1.6068755122295628e-05, + "loss": 59.2894, + "step": 151670 + }, + { + "epoch": 0.6128063930962317, + "grad_norm": 763.3595581054688, + "learning_rate": 1.6066017112254656e-05, + "loss": 58.0441, + "step": 151680 + }, + { + "epoch": 0.6128467943615994, + "grad_norm": 595.068115234375, + "learning_rate": 1.6063279178908698e-05, + "loss": 39.8049, + "step": 151690 + }, + { + "epoch": 0.612887195626967, + "grad_norm": 437.4356689453125, + "learning_rate": 1.606054132231115e-05, + "loss": 61.9782, + "step": 151700 + }, + { + "epoch": 0.6129275968923347, + "grad_norm": 1090.1676025390625, + "learning_rate": 1.6057803542515372e-05, + "loss": 88.2729, + "step": 151710 + }, + { + "epoch": 0.6129679981577023, + "grad_norm": 570.9186401367188, + "learning_rate": 1.6055065839574743e-05, + "loss": 57.3784, + "step": 151720 + }, + { + "epoch": 0.61300839942307, + "grad_norm": 747.4923095703125, + "learning_rate": 1.6052328213542643e-05, + "loss": 65.9711, + "step": 151730 + }, + { + "epoch": 0.6130488006884376, + "grad_norm": 589.6272583007812, + "learning_rate": 1.6049590664472433e-05, + "loss": 110.5411, + "step": 151740 + }, + { + "epoch": 0.6130892019538052, + "grad_norm": 705.920166015625, + "learning_rate": 1.6046853192417478e-05, + "loss": 58.024, + "step": 151750 + }, + { + "epoch": 0.6131296032191729, + "grad_norm": 562.2664184570312, + "learning_rate": 1.6044115797431164e-05, + "loss": 86.7728, + "step": 151760 + }, + { + "epoch": 0.6131700044845405, + "grad_norm": 1183.07666015625, + "learning_rate": 1.604137847956684e-05, + "loss": 72.057, + "step": 151770 + }, + { + "epoch": 0.613210405749908, + "grad_norm": 378.5177307128906, + "learning_rate": 1.6038641238877892e-05, + "loss": 56.0469, + "step": 151780 + }, + { + "epoch": 0.6132508070152757, + "grad_norm": 255.5067138671875, + "learning_rate": 1.6035904075417673e-05, + "loss": 42.5234, + "step": 151790 + }, + { + "epoch": 0.6132912082806433, + "grad_norm": 1072.9107666015625, + "learning_rate": 1.6033166989239538e-05, + "loss": 84.8168, + "step": 151800 + }, + { + "epoch": 0.613331609546011, + "grad_norm": 675.013916015625, + "learning_rate": 1.603042998039686e-05, + "loss": 72.6052, + "step": 151810 + }, + { + "epoch": 0.6133720108113786, + "grad_norm": 931.8118896484375, + "learning_rate": 1.602769304894299e-05, + "loss": 78.971, + "step": 151820 + }, + { + "epoch": 0.6134124120767462, + "grad_norm": 496.90850830078125, + "learning_rate": 1.6024956194931286e-05, + "loss": 97.582, + "step": 151830 + }, + { + "epoch": 0.6134528133421139, + "grad_norm": 399.3227844238281, + "learning_rate": 1.6022219418415115e-05, + "loss": 51.2759, + "step": 151840 + }, + { + "epoch": 0.6134932146074815, + "grad_norm": 557.4927978515625, + "learning_rate": 1.6019482719447812e-05, + "loss": 86.9316, + "step": 151850 + }, + { + "epoch": 0.6135336158728492, + "grad_norm": 378.4765625, + "learning_rate": 1.6016746098082757e-05, + "loss": 88.202, + "step": 151860 + }, + { + "epoch": 0.6135740171382168, + "grad_norm": 552.4365844726562, + "learning_rate": 1.6014009554373283e-05, + "loss": 56.9633, + "step": 151870 + }, + { + "epoch": 0.6136144184035844, + "grad_norm": 982.1788940429688, + "learning_rate": 1.601127308837274e-05, + "loss": 112.7174, + "step": 151880 + }, + { + "epoch": 0.6136548196689521, + "grad_norm": 1160.1385498046875, + "learning_rate": 1.6008536700134482e-05, + "loss": 101.0634, + "step": 151890 + }, + { + "epoch": 0.6136952209343196, + "grad_norm": 798.727783203125, + "learning_rate": 1.600580038971186e-05, + "loss": 54.3155, + "step": 151900 + }, + { + "epoch": 0.6137356221996872, + "grad_norm": 1189.0992431640625, + "learning_rate": 1.6003064157158216e-05, + "loss": 93.368, + "step": 151910 + }, + { + "epoch": 0.6137760234650549, + "grad_norm": 669.2736206054688, + "learning_rate": 1.6000328002526896e-05, + "loss": 67.0786, + "step": 151920 + }, + { + "epoch": 0.6138164247304225, + "grad_norm": 525.731689453125, + "learning_rate": 1.5997591925871233e-05, + "loss": 57.8405, + "step": 151930 + }, + { + "epoch": 0.6138568259957902, + "grad_norm": 541.86279296875, + "learning_rate": 1.599485592724458e-05, + "loss": 80.5898, + "step": 151940 + }, + { + "epoch": 0.6138972272611578, + "grad_norm": 441.3697509765625, + "learning_rate": 1.5992120006700278e-05, + "loss": 67.8555, + "step": 151950 + }, + { + "epoch": 0.6139376285265254, + "grad_norm": 935.898681640625, + "learning_rate": 1.5989384164291648e-05, + "loss": 80.0658, + "step": 151960 + }, + { + "epoch": 0.6139780297918931, + "grad_norm": 661.4522094726562, + "learning_rate": 1.598664840007205e-05, + "loss": 63.6152, + "step": 151970 + }, + { + "epoch": 0.6140184310572607, + "grad_norm": 906.1178588867188, + "learning_rate": 1.59839127140948e-05, + "loss": 78.787, + "step": 151980 + }, + { + "epoch": 0.6140588323226284, + "grad_norm": 747.2604370117188, + "learning_rate": 1.5981177106413246e-05, + "loss": 70.3271, + "step": 151990 + }, + { + "epoch": 0.614099233587996, + "grad_norm": 734.201171875, + "learning_rate": 1.5978441577080713e-05, + "loss": 83.2393, + "step": 152000 + }, + { + "epoch": 0.6141396348533636, + "grad_norm": 983.1251831054688, + "learning_rate": 1.5975706126150524e-05, + "loss": 72.1267, + "step": 152010 + }, + { + "epoch": 0.6141800361187313, + "grad_norm": 760.6577758789062, + "learning_rate": 1.5972970753676024e-05, + "loss": 75.7922, + "step": 152020 + }, + { + "epoch": 0.6142204373840988, + "grad_norm": 1029.0418701171875, + "learning_rate": 1.597023545971053e-05, + "loss": 89.0911, + "step": 152030 + }, + { + "epoch": 0.6142608386494665, + "grad_norm": 584.7739868164062, + "learning_rate": 1.5967500244307364e-05, + "loss": 57.5619, + "step": 152040 + }, + { + "epoch": 0.6143012399148341, + "grad_norm": 813.298095703125, + "learning_rate": 1.5964765107519863e-05, + "loss": 86.1703, + "step": 152050 + }, + { + "epoch": 0.6143416411802017, + "grad_norm": 869.957763671875, + "learning_rate": 1.5962030049401343e-05, + "loss": 81.3739, + "step": 152060 + }, + { + "epoch": 0.6143820424455694, + "grad_norm": 787.5074462890625, + "learning_rate": 1.5959295070005126e-05, + "loss": 79.8323, + "step": 152070 + }, + { + "epoch": 0.614422443710937, + "grad_norm": 555.3134765625, + "learning_rate": 1.5956560169384536e-05, + "loss": 102.7807, + "step": 152080 + }, + { + "epoch": 0.6144628449763047, + "grad_norm": 520.5637817382812, + "learning_rate": 1.5953825347592877e-05, + "loss": 58.2886, + "step": 152090 + }, + { + "epoch": 0.6145032462416723, + "grad_norm": 465.4957275390625, + "learning_rate": 1.5951090604683485e-05, + "loss": 103.3029, + "step": 152100 + }, + { + "epoch": 0.6145436475070399, + "grad_norm": 1212.626220703125, + "learning_rate": 1.5948355940709667e-05, + "loss": 70.2769, + "step": 152110 + }, + { + "epoch": 0.6145840487724076, + "grad_norm": 510.0199890136719, + "learning_rate": 1.5945621355724726e-05, + "loss": 78.3597, + "step": 152120 + }, + { + "epoch": 0.6146244500377752, + "grad_norm": 1128.132080078125, + "learning_rate": 1.5942886849781994e-05, + "loss": 84.7549, + "step": 152130 + }, + { + "epoch": 0.6146648513031429, + "grad_norm": 779.0321044921875, + "learning_rate": 1.5940152422934764e-05, + "loss": 77.8614, + "step": 152140 + }, + { + "epoch": 0.6147052525685105, + "grad_norm": 920.8392944335938, + "learning_rate": 1.593741807523636e-05, + "loss": 74.6604, + "step": 152150 + }, + { + "epoch": 0.614745653833878, + "grad_norm": 1249.1307373046875, + "learning_rate": 1.593468380674008e-05, + "loss": 65.1356, + "step": 152160 + }, + { + "epoch": 0.6147860550992457, + "grad_norm": 927.9535522460938, + "learning_rate": 1.5931949617499226e-05, + "loss": 85.4743, + "step": 152170 + }, + { + "epoch": 0.6148264563646133, + "grad_norm": 480.2413330078125, + "learning_rate": 1.5929215507567113e-05, + "loss": 67.4007, + "step": 152180 + }, + { + "epoch": 0.614866857629981, + "grad_norm": 590.7462158203125, + "learning_rate": 1.5926481476997045e-05, + "loss": 59.4859, + "step": 152190 + }, + { + "epoch": 0.6149072588953486, + "grad_norm": 739.468505859375, + "learning_rate": 1.5923747525842306e-05, + "loss": 114.8198, + "step": 152200 + }, + { + "epoch": 0.6149476601607162, + "grad_norm": 478.99468994140625, + "learning_rate": 1.5921013654156218e-05, + "loss": 42.4986, + "step": 152210 + }, + { + "epoch": 0.6149880614260839, + "grad_norm": 547.9443969726562, + "learning_rate": 1.5918279861992063e-05, + "loss": 55.8378, + "step": 152220 + }, + { + "epoch": 0.6150284626914515, + "grad_norm": 764.931640625, + "learning_rate": 1.5915546149403144e-05, + "loss": 132.1486, + "step": 152230 + }, + { + "epoch": 0.6150688639568191, + "grad_norm": 833.6188354492188, + "learning_rate": 1.5912812516442758e-05, + "loss": 69.2916, + "step": 152240 + }, + { + "epoch": 0.6151092652221868, + "grad_norm": 773.855224609375, + "learning_rate": 1.5910078963164195e-05, + "loss": 75.8401, + "step": 152250 + }, + { + "epoch": 0.6151496664875544, + "grad_norm": 957.8255615234375, + "learning_rate": 1.590734548962075e-05, + "loss": 82.2368, + "step": 152260 + }, + { + "epoch": 0.6151900677529221, + "grad_norm": 784.5614013671875, + "learning_rate": 1.590461209586571e-05, + "loss": 65.5536, + "step": 152270 + }, + { + "epoch": 0.6152304690182897, + "grad_norm": 884.734619140625, + "learning_rate": 1.5901878781952364e-05, + "loss": 79.1926, + "step": 152280 + }, + { + "epoch": 0.6152708702836572, + "grad_norm": 1039.085693359375, + "learning_rate": 1.5899145547934004e-05, + "loss": 88.3044, + "step": 152290 + }, + { + "epoch": 0.6153112715490249, + "grad_norm": 774.1190795898438, + "learning_rate": 1.5896412393863905e-05, + "loss": 64.2687, + "step": 152300 + }, + { + "epoch": 0.6153516728143925, + "grad_norm": 1173.2716064453125, + "learning_rate": 1.589367931979537e-05, + "loss": 73.05, + "step": 152310 + }, + { + "epoch": 0.6153920740797602, + "grad_norm": 1149.744384765625, + "learning_rate": 1.589094632578167e-05, + "loss": 66.9238, + "step": 152320 + }, + { + "epoch": 0.6154324753451278, + "grad_norm": 665.3458862304688, + "learning_rate": 1.588821341187608e-05, + "loss": 75.6739, + "step": 152330 + }, + { + "epoch": 0.6154728766104954, + "grad_norm": 411.6246337890625, + "learning_rate": 1.5885480578131893e-05, + "loss": 68.2341, + "step": 152340 + }, + { + "epoch": 0.6155132778758631, + "grad_norm": 823.7258911132812, + "learning_rate": 1.588274782460238e-05, + "loss": 62.9989, + "step": 152350 + }, + { + "epoch": 0.6155536791412307, + "grad_norm": 1674.228759765625, + "learning_rate": 1.588001515134081e-05, + "loss": 85.1688, + "step": 152360 + }, + { + "epoch": 0.6155940804065984, + "grad_norm": 869.6264038085938, + "learning_rate": 1.5877282558400472e-05, + "loss": 82.2247, + "step": 152370 + }, + { + "epoch": 0.615634481671966, + "grad_norm": 542.0640258789062, + "learning_rate": 1.587455004583463e-05, + "loss": 83.2305, + "step": 152380 + }, + { + "epoch": 0.6156748829373336, + "grad_norm": 1085.370849609375, + "learning_rate": 1.5871817613696562e-05, + "loss": 85.5887, + "step": 152390 + }, + { + "epoch": 0.6157152842027013, + "grad_norm": 1162.4268798828125, + "learning_rate": 1.5869085262039535e-05, + "loss": 83.4002, + "step": 152400 + }, + { + "epoch": 0.6157556854680689, + "grad_norm": 637.1834106445312, + "learning_rate": 1.5866352990916814e-05, + "loss": 110.1451, + "step": 152410 + }, + { + "epoch": 0.6157960867334364, + "grad_norm": 1228.680419921875, + "learning_rate": 1.5863620800381676e-05, + "loss": 80.7724, + "step": 152420 + }, + { + "epoch": 0.6158364879988041, + "grad_norm": 634.421875, + "learning_rate": 1.5860888690487377e-05, + "loss": 59.8311, + "step": 152430 + }, + { + "epoch": 0.6158768892641717, + "grad_norm": 620.900390625, + "learning_rate": 1.585815666128718e-05, + "loss": 145.6125, + "step": 152440 + }, + { + "epoch": 0.6159172905295394, + "grad_norm": 363.9997863769531, + "learning_rate": 1.5855424712834353e-05, + "loss": 59.7274, + "step": 152450 + }, + { + "epoch": 0.615957691794907, + "grad_norm": 783.8272094726562, + "learning_rate": 1.5852692845182153e-05, + "loss": 67.9643, + "step": 152460 + }, + { + "epoch": 0.6159980930602746, + "grad_norm": 445.77069091796875, + "learning_rate": 1.5849961058383846e-05, + "loss": 74.3991, + "step": 152470 + }, + { + "epoch": 0.6160384943256423, + "grad_norm": 1216.338134765625, + "learning_rate": 1.5847229352492682e-05, + "loss": 74.2123, + "step": 152480 + }, + { + "epoch": 0.6160788955910099, + "grad_norm": 858.2653198242188, + "learning_rate": 1.584449772756192e-05, + "loss": 84.0507, + "step": 152490 + }, + { + "epoch": 0.6161192968563776, + "grad_norm": 472.8829040527344, + "learning_rate": 1.584176618364482e-05, + "loss": 62.3205, + "step": 152500 + }, + { + "epoch": 0.6161596981217452, + "grad_norm": 891.8395385742188, + "learning_rate": 1.5839034720794618e-05, + "loss": 54.19, + "step": 152510 + }, + { + "epoch": 0.6162000993871128, + "grad_norm": 1089.166259765625, + "learning_rate": 1.5836303339064586e-05, + "loss": 56.865, + "step": 152520 + }, + { + "epoch": 0.6162405006524805, + "grad_norm": 588.3672485351562, + "learning_rate": 1.583357203850796e-05, + "loss": 53.2769, + "step": 152530 + }, + { + "epoch": 0.616280901917848, + "grad_norm": 741.336181640625, + "learning_rate": 1.5830840819177988e-05, + "loss": 62.1251, + "step": 152540 + }, + { + "epoch": 0.6163213031832157, + "grad_norm": 725.9051513671875, + "learning_rate": 1.5828109681127924e-05, + "loss": 78.6794, + "step": 152550 + }, + { + "epoch": 0.6163617044485833, + "grad_norm": 353.18927001953125, + "learning_rate": 1.5825378624411017e-05, + "loss": 56.6503, + "step": 152560 + }, + { + "epoch": 0.6164021057139509, + "grad_norm": 722.9967651367188, + "learning_rate": 1.5822647649080492e-05, + "loss": 84.0756, + "step": 152570 + }, + { + "epoch": 0.6164425069793186, + "grad_norm": 2353.728759765625, + "learning_rate": 1.5819916755189606e-05, + "loss": 96.8609, + "step": 152580 + }, + { + "epoch": 0.6164829082446862, + "grad_norm": 1207.8824462890625, + "learning_rate": 1.5817185942791595e-05, + "loss": 67.8239, + "step": 152590 + }, + { + "epoch": 0.6165233095100539, + "grad_norm": 952.1771850585938, + "learning_rate": 1.5814455211939698e-05, + "loss": 54.0012, + "step": 152600 + }, + { + "epoch": 0.6165637107754215, + "grad_norm": 574.3251342773438, + "learning_rate": 1.5811724562687157e-05, + "loss": 69.805, + "step": 152610 + }, + { + "epoch": 0.6166041120407891, + "grad_norm": 651.0619506835938, + "learning_rate": 1.580899399508719e-05, + "loss": 79.1222, + "step": 152620 + }, + { + "epoch": 0.6166445133061568, + "grad_norm": 1001.46630859375, + "learning_rate": 1.580626350919305e-05, + "loss": 57.5167, + "step": 152630 + }, + { + "epoch": 0.6166849145715244, + "grad_norm": 1020.8323364257812, + "learning_rate": 1.5803533105057963e-05, + "loss": 78.1011, + "step": 152640 + }, + { + "epoch": 0.616725315836892, + "grad_norm": 674.6625366210938, + "learning_rate": 1.5800802782735152e-05, + "loss": 58.9968, + "step": 152650 + }, + { + "epoch": 0.6167657171022597, + "grad_norm": 1206.986083984375, + "learning_rate": 1.579807254227786e-05, + "loss": 73.2691, + "step": 152660 + }, + { + "epoch": 0.6168061183676272, + "grad_norm": 609.6486206054688, + "learning_rate": 1.5795342383739304e-05, + "loss": 43.081, + "step": 152670 + }, + { + "epoch": 0.6168465196329949, + "grad_norm": 212.8444061279297, + "learning_rate": 1.5792612307172715e-05, + "loss": 75.5484, + "step": 152680 + }, + { + "epoch": 0.6168869208983625, + "grad_norm": 480.2177429199219, + "learning_rate": 1.5789882312631316e-05, + "loss": 48.4789, + "step": 152690 + }, + { + "epoch": 0.6169273221637301, + "grad_norm": 402.2249450683594, + "learning_rate": 1.578715240016832e-05, + "loss": 91.8122, + "step": 152700 + }, + { + "epoch": 0.6169677234290978, + "grad_norm": 885.6983642578125, + "learning_rate": 1.5784422569836964e-05, + "loss": 56.9721, + "step": 152710 + }, + { + "epoch": 0.6170081246944654, + "grad_norm": 928.3543090820312, + "learning_rate": 1.5781692821690463e-05, + "loss": 59.4073, + "step": 152720 + }, + { + "epoch": 0.6170485259598331, + "grad_norm": 1040.4940185546875, + "learning_rate": 1.5778963155782022e-05, + "loss": 117.8805, + "step": 152730 + }, + { + "epoch": 0.6170889272252007, + "grad_norm": 1009.7037963867188, + "learning_rate": 1.5776233572164878e-05, + "loss": 75.8224, + "step": 152740 + }, + { + "epoch": 0.6171293284905683, + "grad_norm": 745.7273559570312, + "learning_rate": 1.577350407089223e-05, + "loss": 86.8857, + "step": 152750 + }, + { + "epoch": 0.617169729755936, + "grad_norm": 362.44635009765625, + "learning_rate": 1.5770774652017296e-05, + "loss": 78.8031, + "step": 152760 + }, + { + "epoch": 0.6172101310213036, + "grad_norm": 504.6735534667969, + "learning_rate": 1.5768045315593294e-05, + "loss": 59.876, + "step": 152770 + }, + { + "epoch": 0.6172505322866713, + "grad_norm": 558.1041259765625, + "learning_rate": 1.5765316061673414e-05, + "loss": 74.1441, + "step": 152780 + }, + { + "epoch": 0.6172909335520389, + "grad_norm": 360.26806640625, + "learning_rate": 1.576258689031089e-05, + "loss": 33.0806, + "step": 152790 + }, + { + "epoch": 0.6173313348174064, + "grad_norm": 419.9398498535156, + "learning_rate": 1.5759857801558913e-05, + "loss": 86.8836, + "step": 152800 + }, + { + "epoch": 0.6173717360827741, + "grad_norm": 839.8963012695312, + "learning_rate": 1.5757128795470686e-05, + "loss": 74.1545, + "step": 152810 + }, + { + "epoch": 0.6174121373481417, + "grad_norm": 630.8148803710938, + "learning_rate": 1.575439987209942e-05, + "loss": 71.8901, + "step": 152820 + }, + { + "epoch": 0.6174525386135093, + "grad_norm": 1128.981689453125, + "learning_rate": 1.5751671031498317e-05, + "loss": 96.4013, + "step": 152830 + }, + { + "epoch": 0.617492939878877, + "grad_norm": 611.2599487304688, + "learning_rate": 1.5748942273720573e-05, + "loss": 89.1781, + "step": 152840 + }, + { + "epoch": 0.6175333411442446, + "grad_norm": 1156.3299560546875, + "learning_rate": 1.5746213598819392e-05, + "loss": 71.6927, + "step": 152850 + }, + { + "epoch": 0.6175737424096123, + "grad_norm": 970.258544921875, + "learning_rate": 1.5743485006847955e-05, + "loss": 98.533, + "step": 152860 + }, + { + "epoch": 0.6176141436749799, + "grad_norm": 542.661865234375, + "learning_rate": 1.574075649785948e-05, + "loss": 54.2712, + "step": 152870 + }, + { + "epoch": 0.6176545449403475, + "grad_norm": 465.5739440917969, + "learning_rate": 1.5738028071907152e-05, + "loss": 66.873, + "step": 152880 + }, + { + "epoch": 0.6176949462057152, + "grad_norm": 753.3284912109375, + "learning_rate": 1.573529972904415e-05, + "loss": 93.1333, + "step": 152890 + }, + { + "epoch": 0.6177353474710828, + "grad_norm": 1049.215087890625, + "learning_rate": 1.5732571469323683e-05, + "loss": 62.9545, + "step": 152900 + }, + { + "epoch": 0.6177757487364505, + "grad_norm": 770.2879638671875, + "learning_rate": 1.5729843292798928e-05, + "loss": 73.1696, + "step": 152910 + }, + { + "epoch": 0.6178161500018181, + "grad_norm": 1128.498046875, + "learning_rate": 1.5727115199523078e-05, + "loss": 73.0801, + "step": 152920 + }, + { + "epoch": 0.6178565512671856, + "grad_norm": 731.2767333984375, + "learning_rate": 1.572438718954932e-05, + "loss": 58.8045, + "step": 152930 + }, + { + "epoch": 0.6178969525325533, + "grad_norm": 445.2139892578125, + "learning_rate": 1.5721659262930835e-05, + "loss": 71.5207, + "step": 152940 + }, + { + "epoch": 0.6179373537979209, + "grad_norm": 588.3367309570312, + "learning_rate": 1.57189314197208e-05, + "loss": 66.6513, + "step": 152950 + }, + { + "epoch": 0.6179777550632886, + "grad_norm": 395.48681640625, + "learning_rate": 1.5716203659972408e-05, + "loss": 68.7658, + "step": 152960 + }, + { + "epoch": 0.6180181563286562, + "grad_norm": 661.6328125, + "learning_rate": 1.5713475983738823e-05, + "loss": 81.3574, + "step": 152970 + }, + { + "epoch": 0.6180585575940238, + "grad_norm": 826.8771362304688, + "learning_rate": 1.5710748391073235e-05, + "loss": 52.4286, + "step": 152980 + }, + { + "epoch": 0.6180989588593915, + "grad_norm": 924.315673828125, + "learning_rate": 1.5708020882028812e-05, + "loss": 59.5476, + "step": 152990 + }, + { + "epoch": 0.6181393601247591, + "grad_norm": 714.1250610351562, + "learning_rate": 1.5705293456658736e-05, + "loss": 107.8399, + "step": 153000 + }, + { + "epoch": 0.6181797613901268, + "grad_norm": 907.9360961914062, + "learning_rate": 1.570256611501618e-05, + "loss": 57.6493, + "step": 153010 + }, + { + "epoch": 0.6182201626554944, + "grad_norm": 809.1492919921875, + "learning_rate": 1.56998388571543e-05, + "loss": 69.1981, + "step": 153020 + }, + { + "epoch": 0.618260563920862, + "grad_norm": 424.8041687011719, + "learning_rate": 1.5697111683126284e-05, + "loss": 44.1043, + "step": 153030 + }, + { + "epoch": 0.6183009651862297, + "grad_norm": 552.44921875, + "learning_rate": 1.569438459298528e-05, + "loss": 86.3302, + "step": 153040 + }, + { + "epoch": 0.6183413664515973, + "grad_norm": 738.6425170898438, + "learning_rate": 1.5691657586784473e-05, + "loss": 73.0923, + "step": 153050 + }, + { + "epoch": 0.6183817677169648, + "grad_norm": 1176.328369140625, + "learning_rate": 1.568893066457702e-05, + "loss": 88.2073, + "step": 153060 + }, + { + "epoch": 0.6184221689823325, + "grad_norm": 958.396728515625, + "learning_rate": 1.5686203826416074e-05, + "loss": 97.6453, + "step": 153070 + }, + { + "epoch": 0.6184625702477001, + "grad_norm": 586.671630859375, + "learning_rate": 1.5683477072354815e-05, + "loss": 69.8152, + "step": 153080 + }, + { + "epoch": 0.6185029715130678, + "grad_norm": 669.2726440429688, + "learning_rate": 1.5680750402446394e-05, + "loss": 95.3169, + "step": 153090 + }, + { + "epoch": 0.6185433727784354, + "grad_norm": 439.0142517089844, + "learning_rate": 1.567802381674396e-05, + "loss": 121.0423, + "step": 153100 + }, + { + "epoch": 0.618583774043803, + "grad_norm": 884.1542358398438, + "learning_rate": 1.5675297315300683e-05, + "loss": 71.3576, + "step": 153110 + }, + { + "epoch": 0.6186241753091707, + "grad_norm": 927.4553833007812, + "learning_rate": 1.5672570898169706e-05, + "loss": 97.9747, + "step": 153120 + }, + { + "epoch": 0.6186645765745383, + "grad_norm": 375.52215576171875, + "learning_rate": 1.566984456540419e-05, + "loss": 64.8134, + "step": 153130 + }, + { + "epoch": 0.618704977839906, + "grad_norm": 795.3526611328125, + "learning_rate": 1.5667118317057287e-05, + "loss": 83.7763, + "step": 153140 + }, + { + "epoch": 0.6187453791052736, + "grad_norm": 412.031982421875, + "learning_rate": 1.5664392153182134e-05, + "loss": 93.4397, + "step": 153150 + }, + { + "epoch": 0.6187857803706412, + "grad_norm": 764.4992065429688, + "learning_rate": 1.5661666073831894e-05, + "loss": 45.894, + "step": 153160 + }, + { + "epoch": 0.6188261816360089, + "grad_norm": 1067.2431640625, + "learning_rate": 1.5658940079059714e-05, + "loss": 69.1203, + "step": 153170 + }, + { + "epoch": 0.6188665829013764, + "grad_norm": 1090.9478759765625, + "learning_rate": 1.565621416891872e-05, + "loss": 76.5543, + "step": 153180 + }, + { + "epoch": 0.6189069841667441, + "grad_norm": 489.3139953613281, + "learning_rate": 1.5653488343462078e-05, + "loss": 60.9514, + "step": 153190 + }, + { + "epoch": 0.6189473854321117, + "grad_norm": 376.2430725097656, + "learning_rate": 1.5650762602742904e-05, + "loss": 79.4723, + "step": 153200 + }, + { + "epoch": 0.6189877866974793, + "grad_norm": 642.5101928710938, + "learning_rate": 1.5648036946814365e-05, + "loss": 68.4038, + "step": 153210 + }, + { + "epoch": 0.619028187962847, + "grad_norm": 828.3849487304688, + "learning_rate": 1.5645311375729586e-05, + "loss": 64.7315, + "step": 153220 + }, + { + "epoch": 0.6190685892282146, + "grad_norm": 373.9156799316406, + "learning_rate": 1.5642585889541693e-05, + "loss": 70.8372, + "step": 153230 + }, + { + "epoch": 0.6191089904935823, + "grad_norm": 704.2777709960938, + "learning_rate": 1.563986048830384e-05, + "loss": 78.258, + "step": 153240 + }, + { + "epoch": 0.6191493917589499, + "grad_norm": 764.4619140625, + "learning_rate": 1.5637135172069155e-05, + "loss": 72.0034, + "step": 153250 + }, + { + "epoch": 0.6191897930243175, + "grad_norm": 810.7069702148438, + "learning_rate": 1.5634409940890752e-05, + "loss": 57.4056, + "step": 153260 + }, + { + "epoch": 0.6192301942896852, + "grad_norm": 635.462158203125, + "learning_rate": 1.563168479482179e-05, + "loss": 87.5875, + "step": 153270 + }, + { + "epoch": 0.6192705955550528, + "grad_norm": 373.0495910644531, + "learning_rate": 1.5628959733915373e-05, + "loss": 75.1218, + "step": 153280 + }, + { + "epoch": 0.6193109968204205, + "grad_norm": 432.35504150390625, + "learning_rate": 1.5626234758224642e-05, + "loss": 99.0643, + "step": 153290 + }, + { + "epoch": 0.6193513980857881, + "grad_norm": 574.6649780273438, + "learning_rate": 1.5623509867802715e-05, + "loss": 57.0054, + "step": 153300 + }, + { + "epoch": 0.6193917993511556, + "grad_norm": 444.1225280761719, + "learning_rate": 1.562078506270271e-05, + "loss": 77.9436, + "step": 153310 + }, + { + "epoch": 0.6194322006165233, + "grad_norm": 406.9061584472656, + "learning_rate": 1.561806034297776e-05, + "loss": 69.2167, + "step": 153320 + }, + { + "epoch": 0.6194726018818909, + "grad_norm": 1267.5694580078125, + "learning_rate": 1.561533570868098e-05, + "loss": 70.7361, + "step": 153330 + }, + { + "epoch": 0.6195130031472585, + "grad_norm": 920.1006469726562, + "learning_rate": 1.5612611159865482e-05, + "loss": 98.9368, + "step": 153340 + }, + { + "epoch": 0.6195534044126262, + "grad_norm": 1290.843505859375, + "learning_rate": 1.5609886696584395e-05, + "loss": 76.6899, + "step": 153350 + }, + { + "epoch": 0.6195938056779938, + "grad_norm": 759.5234375, + "learning_rate": 1.5607162318890823e-05, + "loss": 64.543, + "step": 153360 + }, + { + "epoch": 0.6196342069433615, + "grad_norm": 625.9662475585938, + "learning_rate": 1.5604438026837886e-05, + "loss": 74.8615, + "step": 153370 + }, + { + "epoch": 0.6196746082087291, + "grad_norm": 410.50213623046875, + "learning_rate": 1.5601713820478697e-05, + "loss": 45.2205, + "step": 153380 + }, + { + "epoch": 0.6197150094740967, + "grad_norm": 1114.861572265625, + "learning_rate": 1.559898969986635e-05, + "loss": 50.984, + "step": 153390 + }, + { + "epoch": 0.6197554107394644, + "grad_norm": 541.4967041015625, + "learning_rate": 1.5596265665053972e-05, + "loss": 57.0357, + "step": 153400 + }, + { + "epoch": 0.619795812004832, + "grad_norm": 1052.855712890625, + "learning_rate": 1.5593541716094665e-05, + "loss": 79.0665, + "step": 153410 + }, + { + "epoch": 0.6198362132701997, + "grad_norm": 895.2467041015625, + "learning_rate": 1.559081785304152e-05, + "loss": 112.4125, + "step": 153420 + }, + { + "epoch": 0.6198766145355673, + "grad_norm": 839.645263671875, + "learning_rate": 1.558809407594766e-05, + "loss": 65.9107, + "step": 153430 + }, + { + "epoch": 0.6199170158009348, + "grad_norm": 780.0509033203125, + "learning_rate": 1.5585370384866173e-05, + "loss": 95.4316, + "step": 153440 + }, + { + "epoch": 0.6199574170663025, + "grad_norm": 676.77001953125, + "learning_rate": 1.5582646779850168e-05, + "loss": 52.1046, + "step": 153450 + }, + { + "epoch": 0.6199978183316701, + "grad_norm": 848.5068359375, + "learning_rate": 1.5579923260952737e-05, + "loss": 80.4604, + "step": 153460 + }, + { + "epoch": 0.6200382195970378, + "grad_norm": 549.9689331054688, + "learning_rate": 1.557719982822697e-05, + "loss": 66.469, + "step": 153470 + }, + { + "epoch": 0.6200786208624054, + "grad_norm": 589.197021484375, + "learning_rate": 1.5574476481725976e-05, + "loss": 72.1431, + "step": 153480 + }, + { + "epoch": 0.620119022127773, + "grad_norm": 761.4107055664062, + "learning_rate": 1.5571753221502846e-05, + "loss": 78.8818, + "step": 153490 + }, + { + "epoch": 0.6201594233931407, + "grad_norm": 698.5140380859375, + "learning_rate": 1.5569030047610656e-05, + "loss": 69.7546, + "step": 153500 + }, + { + "epoch": 0.6201998246585083, + "grad_norm": 819.5653076171875, + "learning_rate": 1.556630696010251e-05, + "loss": 81.7758, + "step": 153510 + }, + { + "epoch": 0.620240225923876, + "grad_norm": 571.4340209960938, + "learning_rate": 1.5563583959031497e-05, + "loss": 62.4422, + "step": 153520 + }, + { + "epoch": 0.6202806271892436, + "grad_norm": 421.4046630859375, + "learning_rate": 1.5560861044450695e-05, + "loss": 40.9142, + "step": 153530 + }, + { + "epoch": 0.6203210284546112, + "grad_norm": 1469.6826171875, + "learning_rate": 1.5558138216413194e-05, + "loss": 69.7764, + "step": 153540 + }, + { + "epoch": 0.6203614297199789, + "grad_norm": 853.0863647460938, + "learning_rate": 1.5555415474972073e-05, + "loss": 62.8425, + "step": 153550 + }, + { + "epoch": 0.6204018309853465, + "grad_norm": 908.3795776367188, + "learning_rate": 1.555269282018042e-05, + "loss": 70.1144, + "step": 153560 + }, + { + "epoch": 0.620442232250714, + "grad_norm": 1169.360595703125, + "learning_rate": 1.5549970252091303e-05, + "loss": 63.4675, + "step": 153570 + }, + { + "epoch": 0.6204826335160817, + "grad_norm": 684.2550048828125, + "learning_rate": 1.554724777075781e-05, + "loss": 90.2192, + "step": 153580 + }, + { + "epoch": 0.6205230347814493, + "grad_norm": 936.9381103515625, + "learning_rate": 1.554452537623302e-05, + "loss": 66.8049, + "step": 153590 + }, + { + "epoch": 0.620563436046817, + "grad_norm": 1024.3106689453125, + "learning_rate": 1.5541803068569993e-05, + "loss": 73.8319, + "step": 153600 + }, + { + "epoch": 0.6206038373121846, + "grad_norm": 759.0418090820312, + "learning_rate": 1.5539080847821816e-05, + "loss": 98.5746, + "step": 153610 + }, + { + "epoch": 0.6206442385775522, + "grad_norm": 470.4747009277344, + "learning_rate": 1.5536358714041555e-05, + "loss": 50.2325, + "step": 153620 + }, + { + "epoch": 0.6206846398429199, + "grad_norm": 688.5573120117188, + "learning_rate": 1.5533636667282275e-05, + "loss": 67.2853, + "step": 153630 + }, + { + "epoch": 0.6207250411082875, + "grad_norm": 666.4147338867188, + "learning_rate": 1.5530914707597052e-05, + "loss": 76.6658, + "step": 153640 + }, + { + "epoch": 0.6207654423736552, + "grad_norm": 603.4683227539062, + "learning_rate": 1.552819283503894e-05, + "loss": 75.9298, + "step": 153650 + }, + { + "epoch": 0.6208058436390228, + "grad_norm": 341.4415588378906, + "learning_rate": 1.552547104966102e-05, + "loss": 57.9464, + "step": 153660 + }, + { + "epoch": 0.6208462449043904, + "grad_norm": 797.2058715820312, + "learning_rate": 1.5522749351516342e-05, + "loss": 78.2013, + "step": 153670 + }, + { + "epoch": 0.6208866461697581, + "grad_norm": 444.2821350097656, + "learning_rate": 1.5520027740657963e-05, + "loss": 52.108, + "step": 153680 + }, + { + "epoch": 0.6209270474351257, + "grad_norm": 781.51904296875, + "learning_rate": 1.551730621713896e-05, + "loss": 65.2111, + "step": 153690 + }, + { + "epoch": 0.6209674487004933, + "grad_norm": 510.353271484375, + "learning_rate": 1.5514584781012382e-05, + "loss": 79.8078, + "step": 153700 + }, + { + "epoch": 0.6210078499658609, + "grad_norm": 617.3316040039062, + "learning_rate": 1.5511863432331273e-05, + "loss": 66.1495, + "step": 153710 + }, + { + "epoch": 0.6210482512312285, + "grad_norm": 521.2061767578125, + "learning_rate": 1.5509142171148704e-05, + "loss": 72.5934, + "step": 153720 + }, + { + "epoch": 0.6210886524965962, + "grad_norm": 210.14227294921875, + "learning_rate": 1.5506420997517714e-05, + "loss": 46.8526, + "step": 153730 + }, + { + "epoch": 0.6211290537619638, + "grad_norm": 473.0346374511719, + "learning_rate": 1.5503699911491368e-05, + "loss": 53.2732, + "step": 153740 + }, + { + "epoch": 0.6211694550273315, + "grad_norm": 682.9020385742188, + "learning_rate": 1.5500978913122702e-05, + "loss": 81.987, + "step": 153750 + }, + { + "epoch": 0.6212098562926991, + "grad_norm": 780.7532958984375, + "learning_rate": 1.5498258002464766e-05, + "loss": 60.9009, + "step": 153760 + }, + { + "epoch": 0.6212502575580667, + "grad_norm": 716.6137084960938, + "learning_rate": 1.5495537179570614e-05, + "loss": 56.2463, + "step": 153770 + }, + { + "epoch": 0.6212906588234344, + "grad_norm": 629.5020751953125, + "learning_rate": 1.5492816444493284e-05, + "loss": 54.5286, + "step": 153780 + }, + { + "epoch": 0.621331060088802, + "grad_norm": 477.2386474609375, + "learning_rate": 1.5490095797285817e-05, + "loss": 64.1399, + "step": 153790 + }, + { + "epoch": 0.6213714613541697, + "grad_norm": 1013.468017578125, + "learning_rate": 1.548737523800125e-05, + "loss": 103.4541, + "step": 153800 + }, + { + "epoch": 0.6214118626195373, + "grad_norm": 382.4214782714844, + "learning_rate": 1.5484654766692626e-05, + "loss": 54.2493, + "step": 153810 + }, + { + "epoch": 0.6214522638849048, + "grad_norm": 657.2894897460938, + "learning_rate": 1.5481934383412985e-05, + "loss": 96.6182, + "step": 153820 + }, + { + "epoch": 0.6214926651502725, + "grad_norm": 716.7754516601562, + "learning_rate": 1.5479214088215362e-05, + "loss": 95.7516, + "step": 153830 + }, + { + "epoch": 0.6215330664156401, + "grad_norm": 654.9014282226562, + "learning_rate": 1.547649388115278e-05, + "loss": 105.3903, + "step": 153840 + }, + { + "epoch": 0.6215734676810077, + "grad_norm": 1261.4224853515625, + "learning_rate": 1.5473773762278285e-05, + "loss": 78.7168, + "step": 153850 + }, + { + "epoch": 0.6216138689463754, + "grad_norm": 1096.274169921875, + "learning_rate": 1.54710537316449e-05, + "loss": 105.9161, + "step": 153860 + }, + { + "epoch": 0.621654270211743, + "grad_norm": 954.1072387695312, + "learning_rate": 1.5468333789305652e-05, + "loss": 66.0597, + "step": 153870 + }, + { + "epoch": 0.6216946714771107, + "grad_norm": 488.45733642578125, + "learning_rate": 1.546561393531357e-05, + "loss": 46.9367, + "step": 153880 + }, + { + "epoch": 0.6217350727424783, + "grad_norm": 453.9670715332031, + "learning_rate": 1.546289416972168e-05, + "loss": 80.4165, + "step": 153890 + }, + { + "epoch": 0.6217754740078459, + "grad_norm": 842.4208984375, + "learning_rate": 1.5460174492583005e-05, + "loss": 60.8013, + "step": 153900 + }, + { + "epoch": 0.6218158752732136, + "grad_norm": 524.4949951171875, + "learning_rate": 1.5457454903950566e-05, + "loss": 89.9946, + "step": 153910 + }, + { + "epoch": 0.6218562765385812, + "grad_norm": 1248.560791015625, + "learning_rate": 1.5454735403877373e-05, + "loss": 55.8281, + "step": 153920 + }, + { + "epoch": 0.6218966778039489, + "grad_norm": 745.3416748046875, + "learning_rate": 1.5452015992416467e-05, + "loss": 84.735, + "step": 153930 + }, + { + "epoch": 0.6219370790693165, + "grad_norm": 969.7011108398438, + "learning_rate": 1.5449296669620846e-05, + "loss": 94.8815, + "step": 153940 + }, + { + "epoch": 0.621977480334684, + "grad_norm": 688.9558715820312, + "learning_rate": 1.544657743554352e-05, + "loss": 67.0862, + "step": 153950 + }, + { + "epoch": 0.6220178816000517, + "grad_norm": 396.4944152832031, + "learning_rate": 1.5443858290237525e-05, + "loss": 48.8191, + "step": 153960 + }, + { + "epoch": 0.6220582828654193, + "grad_norm": 1114.3050537109375, + "learning_rate": 1.544113923375585e-05, + "loss": 76.3286, + "step": 153970 + }, + { + "epoch": 0.622098684130787, + "grad_norm": 509.0501708984375, + "learning_rate": 1.5438420266151517e-05, + "loss": 69.044, + "step": 153980 + }, + { + "epoch": 0.6221390853961546, + "grad_norm": 467.8182373046875, + "learning_rate": 1.543570138747753e-05, + "loss": 91.3481, + "step": 153990 + }, + { + "epoch": 0.6221794866615222, + "grad_norm": 792.668701171875, + "learning_rate": 1.5432982597786886e-05, + "loss": 65.9046, + "step": 154000 + }, + { + "epoch": 0.6222198879268899, + "grad_norm": 684.974609375, + "learning_rate": 1.5430263897132607e-05, + "loss": 68.8131, + "step": 154010 + }, + { + "epoch": 0.6222602891922575, + "grad_norm": 468.4241943359375, + "learning_rate": 1.5427545285567675e-05, + "loss": 68.0863, + "step": 154020 + }, + { + "epoch": 0.6223006904576251, + "grad_norm": 675.1613159179688, + "learning_rate": 1.542482676314511e-05, + "loss": 57.6962, + "step": 154030 + }, + { + "epoch": 0.6223410917229928, + "grad_norm": 727.3892822265625, + "learning_rate": 1.5422108329917908e-05, + "loss": 68.4754, + "step": 154040 + }, + { + "epoch": 0.6223814929883604, + "grad_norm": 1995.6292724609375, + "learning_rate": 1.541938998593905e-05, + "loss": 99.0929, + "step": 154050 + }, + { + "epoch": 0.6224218942537281, + "grad_norm": 787.84716796875, + "learning_rate": 1.541667173126155e-05, + "loss": 58.4568, + "step": 154060 + }, + { + "epoch": 0.6224622955190957, + "grad_norm": 899.7470703125, + "learning_rate": 1.5413953565938396e-05, + "loss": 69.2011, + "step": 154070 + }, + { + "epoch": 0.6225026967844632, + "grad_norm": 485.49285888671875, + "learning_rate": 1.5411235490022568e-05, + "loss": 59.0806, + "step": 154080 + }, + { + "epoch": 0.6225430980498309, + "grad_norm": 740.4190673828125, + "learning_rate": 1.5408517503567074e-05, + "loss": 110.4619, + "step": 154090 + }, + { + "epoch": 0.6225834993151985, + "grad_norm": 678.0313110351562, + "learning_rate": 1.5405799606624886e-05, + "loss": 88.8934, + "step": 154100 + }, + { + "epoch": 0.6226239005805662, + "grad_norm": 507.8426513671875, + "learning_rate": 1.540308179924901e-05, + "loss": 46.5135, + "step": 154110 + }, + { + "epoch": 0.6226643018459338, + "grad_norm": 394.9922790527344, + "learning_rate": 1.540036408149242e-05, + "loss": 73.6193, + "step": 154120 + }, + { + "epoch": 0.6227047031113014, + "grad_norm": 1024.0770263671875, + "learning_rate": 1.5397646453408096e-05, + "loss": 95.9349, + "step": 154130 + }, + { + "epoch": 0.6227451043766691, + "grad_norm": 881.5184936523438, + "learning_rate": 1.539492891504903e-05, + "loss": 48.2219, + "step": 154140 + }, + { + "epoch": 0.6227855056420367, + "grad_norm": 642.3825073242188, + "learning_rate": 1.5392211466468198e-05, + "loss": 83.8112, + "step": 154150 + }, + { + "epoch": 0.6228259069074044, + "grad_norm": 646.3540649414062, + "learning_rate": 1.538949410771856e-05, + "loss": 62.809, + "step": 154160 + }, + { + "epoch": 0.622866308172772, + "grad_norm": 227.0119171142578, + "learning_rate": 1.538677683885312e-05, + "loss": 55.3162, + "step": 154170 + }, + { + "epoch": 0.6229067094381396, + "grad_norm": 342.80523681640625, + "learning_rate": 1.5384059659924835e-05, + "loss": 97.0365, + "step": 154180 + }, + { + "epoch": 0.6229471107035073, + "grad_norm": 608.6118774414062, + "learning_rate": 1.5381342570986688e-05, + "loss": 72.8232, + "step": 154190 + }, + { + "epoch": 0.6229875119688749, + "grad_norm": 816.3831176757812, + "learning_rate": 1.5378625572091647e-05, + "loss": 46.4602, + "step": 154200 + }, + { + "epoch": 0.6230279132342424, + "grad_norm": 815.4061889648438, + "learning_rate": 1.5375908663292675e-05, + "loss": 69.692, + "step": 154210 + }, + { + "epoch": 0.6230683144996101, + "grad_norm": 834.0928344726562, + "learning_rate": 1.5373191844642745e-05, + "loss": 78.3428, + "step": 154220 + }, + { + "epoch": 0.6231087157649777, + "grad_norm": 1087.5648193359375, + "learning_rate": 1.5370475116194824e-05, + "loss": 94.6306, + "step": 154230 + }, + { + "epoch": 0.6231491170303454, + "grad_norm": 799.9658813476562, + "learning_rate": 1.5367758478001872e-05, + "loss": 78.1959, + "step": 154240 + }, + { + "epoch": 0.623189518295713, + "grad_norm": 825.8199462890625, + "learning_rate": 1.536504193011686e-05, + "loss": 51.5504, + "step": 154250 + }, + { + "epoch": 0.6232299195610806, + "grad_norm": 370.6082763671875, + "learning_rate": 1.5362325472592728e-05, + "loss": 67.5587, + "step": 154260 + }, + { + "epoch": 0.6232703208264483, + "grad_norm": 722.5872802734375, + "learning_rate": 1.535960910548246e-05, + "loss": 74.7984, + "step": 154270 + }, + { + "epoch": 0.6233107220918159, + "grad_norm": 366.9637756347656, + "learning_rate": 1.5356892828839e-05, + "loss": 53.3515, + "step": 154280 + }, + { + "epoch": 0.6233511233571836, + "grad_norm": 434.1397399902344, + "learning_rate": 1.5354176642715296e-05, + "loss": 50.3099, + "step": 154290 + }, + { + "epoch": 0.6233915246225512, + "grad_norm": 689.2254028320312, + "learning_rate": 1.535146054716432e-05, + "loss": 78.8151, + "step": 154300 + }, + { + "epoch": 0.6234319258879188, + "grad_norm": 908.6710815429688, + "learning_rate": 1.5348744542239015e-05, + "loss": 83.3003, + "step": 154310 + }, + { + "epoch": 0.6234723271532865, + "grad_norm": 846.1793823242188, + "learning_rate": 1.5346028627992323e-05, + "loss": 83.6122, + "step": 154320 + }, + { + "epoch": 0.6235127284186541, + "grad_norm": 554.7393188476562, + "learning_rate": 1.5343312804477208e-05, + "loss": 75.5378, + "step": 154330 + }, + { + "epoch": 0.6235531296840217, + "grad_norm": 870.555908203125, + "learning_rate": 1.5340597071746596e-05, + "loss": 89.2441, + "step": 154340 + }, + { + "epoch": 0.6235935309493893, + "grad_norm": 520.1298217773438, + "learning_rate": 1.5337881429853448e-05, + "loss": 68.7114, + "step": 154350 + }, + { + "epoch": 0.6236339322147569, + "grad_norm": 1215.51318359375, + "learning_rate": 1.5335165878850703e-05, + "loss": 71.4799, + "step": 154360 + }, + { + "epoch": 0.6236743334801246, + "grad_norm": 767.3395385742188, + "learning_rate": 1.5332450418791296e-05, + "loss": 85.7992, + "step": 154370 + }, + { + "epoch": 0.6237147347454922, + "grad_norm": 702.8889770507812, + "learning_rate": 1.5329735049728175e-05, + "loss": 74.5983, + "step": 154380 + }, + { + "epoch": 0.6237551360108599, + "grad_norm": 916.8285522460938, + "learning_rate": 1.532701977171428e-05, + "loss": 68.6088, + "step": 154390 + }, + { + "epoch": 0.6237955372762275, + "grad_norm": 395.7935791015625, + "learning_rate": 1.532430458480253e-05, + "loss": 45.9157, + "step": 154400 + }, + { + "epoch": 0.6238359385415951, + "grad_norm": 486.66339111328125, + "learning_rate": 1.5321589489045876e-05, + "loss": 61.2942, + "step": 154410 + }, + { + "epoch": 0.6238763398069628, + "grad_norm": 596.0330200195312, + "learning_rate": 1.5318874484497234e-05, + "loss": 66.3363, + "step": 154420 + }, + { + "epoch": 0.6239167410723304, + "grad_norm": 2919.337158203125, + "learning_rate": 1.5316159571209552e-05, + "loss": 126.6924, + "step": 154430 + }, + { + "epoch": 0.6239571423376981, + "grad_norm": 1330.2589111328125, + "learning_rate": 1.531344474923575e-05, + "loss": 71.4137, + "step": 154440 + }, + { + "epoch": 0.6239975436030657, + "grad_norm": 275.7259216308594, + "learning_rate": 1.531073001862875e-05, + "loss": 63.8319, + "step": 154450 + }, + { + "epoch": 0.6240379448684332, + "grad_norm": 1016.280029296875, + "learning_rate": 1.530801537944149e-05, + "loss": 84.934, + "step": 154460 + }, + { + "epoch": 0.6240783461338009, + "grad_norm": 1057.8538818359375, + "learning_rate": 1.5305300831726886e-05, + "loss": 82.3483, + "step": 154470 + }, + { + "epoch": 0.6241187473991685, + "grad_norm": 828.5070190429688, + "learning_rate": 1.5302586375537854e-05, + "loss": 75.7532, + "step": 154480 + }, + { + "epoch": 0.6241591486645361, + "grad_norm": 939.7634887695312, + "learning_rate": 1.5299872010927324e-05, + "loss": 65.4885, + "step": 154490 + }, + { + "epoch": 0.6241995499299038, + "grad_norm": 469.8560485839844, + "learning_rate": 1.52971577379482e-05, + "loss": 49.6236, + "step": 154500 + }, + { + "epoch": 0.6242399511952714, + "grad_norm": 1127.231201171875, + "learning_rate": 1.5294443556653415e-05, + "loss": 72.1748, + "step": 154510 + }, + { + "epoch": 0.6242803524606391, + "grad_norm": 432.2366943359375, + "learning_rate": 1.5291729467095876e-05, + "loss": 57.5266, + "step": 154520 + }, + { + "epoch": 0.6243207537260067, + "grad_norm": 874.5888061523438, + "learning_rate": 1.5289015469328488e-05, + "loss": 72.707, + "step": 154530 + }, + { + "epoch": 0.6243611549913743, + "grad_norm": 283.27374267578125, + "learning_rate": 1.5286301563404174e-05, + "loss": 61.3057, + "step": 154540 + }, + { + "epoch": 0.624401556256742, + "grad_norm": 1358.85107421875, + "learning_rate": 1.5283587749375837e-05, + "loss": 73.5163, + "step": 154550 + }, + { + "epoch": 0.6244419575221096, + "grad_norm": 377.94464111328125, + "learning_rate": 1.5280874027296382e-05, + "loss": 44.7919, + "step": 154560 + }, + { + "epoch": 0.6244823587874773, + "grad_norm": 599.3715209960938, + "learning_rate": 1.5278160397218726e-05, + "loss": 63.6556, + "step": 154570 + }, + { + "epoch": 0.6245227600528449, + "grad_norm": 1130.485595703125, + "learning_rate": 1.5275446859195756e-05, + "loss": 85.8646, + "step": 154580 + }, + { + "epoch": 0.6245631613182124, + "grad_norm": 344.1568908691406, + "learning_rate": 1.5272733413280386e-05, + "loss": 40.914, + "step": 154590 + }, + { + "epoch": 0.6246035625835801, + "grad_norm": 1331.9268798828125, + "learning_rate": 1.527002005952551e-05, + "loss": 97.8261, + "step": 154600 + }, + { + "epoch": 0.6246439638489477, + "grad_norm": 1789.1029052734375, + "learning_rate": 1.5267306797984026e-05, + "loss": 68.751, + "step": 154610 + }, + { + "epoch": 0.6246843651143154, + "grad_norm": 537.7854614257812, + "learning_rate": 1.5264593628708837e-05, + "loss": 52.0992, + "step": 154620 + }, + { + "epoch": 0.624724766379683, + "grad_norm": 262.9101867675781, + "learning_rate": 1.5261880551752826e-05, + "loss": 69.0603, + "step": 154630 + }, + { + "epoch": 0.6247651676450506, + "grad_norm": 1053.103271484375, + "learning_rate": 1.52591675671689e-05, + "loss": 77.3031, + "step": 154640 + }, + { + "epoch": 0.6248055689104183, + "grad_norm": 558.022705078125, + "learning_rate": 1.5256454675009944e-05, + "loss": 62.2865, + "step": 154650 + }, + { + "epoch": 0.6248459701757859, + "grad_norm": 832.2144165039062, + "learning_rate": 1.525374187532884e-05, + "loss": 67.2643, + "step": 154660 + }, + { + "epoch": 0.6248863714411536, + "grad_norm": 568.6776733398438, + "learning_rate": 1.5251029168178488e-05, + "loss": 67.2381, + "step": 154670 + }, + { + "epoch": 0.6249267727065212, + "grad_norm": 704.3900146484375, + "learning_rate": 1.5248316553611766e-05, + "loss": 77.2429, + "step": 154680 + }, + { + "epoch": 0.6249671739718888, + "grad_norm": 359.9125061035156, + "learning_rate": 1.524560403168155e-05, + "loss": 66.6735, + "step": 154690 + }, + { + "epoch": 0.6250075752372565, + "grad_norm": 839.8539428710938, + "learning_rate": 1.5242891602440743e-05, + "loss": 132.2149, + "step": 154700 + }, + { + "epoch": 0.6250479765026241, + "grad_norm": 1028.9013671875, + "learning_rate": 1.5240179265942204e-05, + "loss": 78.0235, + "step": 154710 + }, + { + "epoch": 0.6250883777679916, + "grad_norm": 876.002685546875, + "learning_rate": 1.5237467022238827e-05, + "loss": 87.6409, + "step": 154720 + }, + { + "epoch": 0.6251287790333593, + "grad_norm": 975.888671875, + "learning_rate": 1.5234754871383484e-05, + "loss": 63.3426, + "step": 154730 + }, + { + "epoch": 0.6251691802987269, + "grad_norm": 509.8883056640625, + "learning_rate": 1.5232042813429044e-05, + "loss": 81.4348, + "step": 154740 + }, + { + "epoch": 0.6252095815640946, + "grad_norm": 533.7643432617188, + "learning_rate": 1.5229330848428387e-05, + "loss": 84.5069, + "step": 154750 + }, + { + "epoch": 0.6252499828294622, + "grad_norm": 976.197509765625, + "learning_rate": 1.5226618976434382e-05, + "loss": 98.9802, + "step": 154760 + }, + { + "epoch": 0.6252903840948298, + "grad_norm": 1070.27880859375, + "learning_rate": 1.5223907197499888e-05, + "loss": 149.7499, + "step": 154770 + }, + { + "epoch": 0.6253307853601975, + "grad_norm": 505.835205078125, + "learning_rate": 1.5221195511677791e-05, + "loss": 44.3166, + "step": 154780 + }, + { + "epoch": 0.6253711866255651, + "grad_norm": 408.80242919921875, + "learning_rate": 1.5218483919020941e-05, + "loss": 45.2065, + "step": 154790 + }, + { + "epoch": 0.6254115878909328, + "grad_norm": 342.03564453125, + "learning_rate": 1.5215772419582215e-05, + "loss": 91.0239, + "step": 154800 + }, + { + "epoch": 0.6254519891563004, + "grad_norm": 2249.146240234375, + "learning_rate": 1.521306101341447e-05, + "loss": 76.6784, + "step": 154810 + }, + { + "epoch": 0.625492390421668, + "grad_norm": 308.122802734375, + "learning_rate": 1.521034970057056e-05, + "loss": 67.2539, + "step": 154820 + }, + { + "epoch": 0.6255327916870357, + "grad_norm": 650.0792236328125, + "learning_rate": 1.5207638481103356e-05, + "loss": 76.6168, + "step": 154830 + }, + { + "epoch": 0.6255731929524033, + "grad_norm": 621.2904663085938, + "learning_rate": 1.52049273550657e-05, + "loss": 89.189, + "step": 154840 + }, + { + "epoch": 0.6256135942177709, + "grad_norm": 1199.235107421875, + "learning_rate": 1.5202216322510453e-05, + "loss": 85.6586, + "step": 154850 + }, + { + "epoch": 0.6256539954831385, + "grad_norm": 701.1157836914062, + "learning_rate": 1.5199505383490473e-05, + "loss": 93.9944, + "step": 154860 + }, + { + "epoch": 0.6256943967485061, + "grad_norm": 501.5303955078125, + "learning_rate": 1.5196794538058601e-05, + "loss": 79.2633, + "step": 154870 + }, + { + "epoch": 0.6257347980138738, + "grad_norm": 1029.003173828125, + "learning_rate": 1.5194083786267699e-05, + "loss": 84.9625, + "step": 154880 + }, + { + "epoch": 0.6257751992792414, + "grad_norm": 665.5426635742188, + "learning_rate": 1.5191373128170608e-05, + "loss": 45.4357, + "step": 154890 + }, + { + "epoch": 0.625815600544609, + "grad_norm": 644.782470703125, + "learning_rate": 1.5188662563820165e-05, + "loss": 59.1271, + "step": 154900 + }, + { + "epoch": 0.6258560018099767, + "grad_norm": 346.4615173339844, + "learning_rate": 1.5185952093269233e-05, + "loss": 72.967, + "step": 154910 + }, + { + "epoch": 0.6258964030753443, + "grad_norm": 1438.3065185546875, + "learning_rate": 1.518324171657064e-05, + "loss": 74.792, + "step": 154920 + }, + { + "epoch": 0.625936804340712, + "grad_norm": 525.9834594726562, + "learning_rate": 1.5180531433777225e-05, + "loss": 74.9825, + "step": 154930 + }, + { + "epoch": 0.6259772056060796, + "grad_norm": 1113.6903076171875, + "learning_rate": 1.517782124494184e-05, + "loss": 67.5857, + "step": 154940 + }, + { + "epoch": 0.6260176068714473, + "grad_norm": 558.7354736328125, + "learning_rate": 1.51751111501173e-05, + "loss": 62.374, + "step": 154950 + }, + { + "epoch": 0.6260580081368149, + "grad_norm": 911.6202392578125, + "learning_rate": 1.5172401149356462e-05, + "loss": 88.9135, + "step": 154960 + }, + { + "epoch": 0.6260984094021824, + "grad_norm": 535.6473999023438, + "learning_rate": 1.5169691242712148e-05, + "loss": 40.4407, + "step": 154970 + }, + { + "epoch": 0.6261388106675501, + "grad_norm": 255.40940856933594, + "learning_rate": 1.5166981430237182e-05, + "loss": 42.1368, + "step": 154980 + }, + { + "epoch": 0.6261792119329177, + "grad_norm": 370.58746337890625, + "learning_rate": 1.516427171198441e-05, + "loss": 58.9151, + "step": 154990 + }, + { + "epoch": 0.6262196131982853, + "grad_norm": 819.7075805664062, + "learning_rate": 1.5161562088006649e-05, + "loss": 96.505, + "step": 155000 + }, + { + "epoch": 0.626260014463653, + "grad_norm": 636.8270263671875, + "learning_rate": 1.5158852558356724e-05, + "loss": 104.6206, + "step": 155010 + }, + { + "epoch": 0.6263004157290206, + "grad_norm": 1417.335205078125, + "learning_rate": 1.5156143123087463e-05, + "loss": 88.9424, + "step": 155020 + }, + { + "epoch": 0.6263408169943883, + "grad_norm": 269.9468078613281, + "learning_rate": 1.515343378225168e-05, + "loss": 118.3928, + "step": 155030 + }, + { + "epoch": 0.6263812182597559, + "grad_norm": 548.0360107421875, + "learning_rate": 1.5150724535902209e-05, + "loss": 66.5909, + "step": 155040 + }, + { + "epoch": 0.6264216195251235, + "grad_norm": 969.035400390625, + "learning_rate": 1.5148015384091856e-05, + "loss": 92.3683, + "step": 155050 + }, + { + "epoch": 0.6264620207904912, + "grad_norm": 715.3333740234375, + "learning_rate": 1.5145306326873437e-05, + "loss": 52.4486, + "step": 155060 + }, + { + "epoch": 0.6265024220558588, + "grad_norm": 504.2529296875, + "learning_rate": 1.5142597364299777e-05, + "loss": 77.9375, + "step": 155070 + }, + { + "epoch": 0.6265428233212265, + "grad_norm": 897.4830932617188, + "learning_rate": 1.5139888496423675e-05, + "loss": 80.3665, + "step": 155080 + }, + { + "epoch": 0.6265832245865941, + "grad_norm": 920.7125854492188, + "learning_rate": 1.5137179723297956e-05, + "loss": 86.8092, + "step": 155090 + }, + { + "epoch": 0.6266236258519616, + "grad_norm": 1475.4910888671875, + "learning_rate": 1.5134471044975423e-05, + "loss": 77.7013, + "step": 155100 + }, + { + "epoch": 0.6266640271173293, + "grad_norm": 516.3450927734375, + "learning_rate": 1.5131762461508872e-05, + "loss": 99.7693, + "step": 155110 + }, + { + "epoch": 0.6267044283826969, + "grad_norm": 1018.7539672851562, + "learning_rate": 1.5129053972951126e-05, + "loss": 88.7942, + "step": 155120 + }, + { + "epoch": 0.6267448296480645, + "grad_norm": 625.2257080078125, + "learning_rate": 1.5126345579354982e-05, + "loss": 74.982, + "step": 155130 + }, + { + "epoch": 0.6267852309134322, + "grad_norm": 1169.4434814453125, + "learning_rate": 1.5123637280773231e-05, + "loss": 104.5666, + "step": 155140 + }, + { + "epoch": 0.6268256321787998, + "grad_norm": 447.64617919921875, + "learning_rate": 1.512092907725869e-05, + "loss": 86.9672, + "step": 155150 + }, + { + "epoch": 0.6268660334441675, + "grad_norm": 417.5661315917969, + "learning_rate": 1.5118220968864146e-05, + "loss": 72.0953, + "step": 155160 + }, + { + "epoch": 0.6269064347095351, + "grad_norm": 760.6632690429688, + "learning_rate": 1.5115512955642397e-05, + "loss": 71.5018, + "step": 155170 + }, + { + "epoch": 0.6269468359749027, + "grad_norm": 613.0640258789062, + "learning_rate": 1.511280503764624e-05, + "loss": 143.3116, + "step": 155180 + }, + { + "epoch": 0.6269872372402704, + "grad_norm": 523.092041015625, + "learning_rate": 1.511009721492846e-05, + "loss": 70.1126, + "step": 155190 + }, + { + "epoch": 0.627027638505638, + "grad_norm": 467.596923828125, + "learning_rate": 1.5107389487541856e-05, + "loss": 74.2233, + "step": 155200 + }, + { + "epoch": 0.6270680397710057, + "grad_norm": 642.0260620117188, + "learning_rate": 1.5104681855539216e-05, + "loss": 52.5864, + "step": 155210 + }, + { + "epoch": 0.6271084410363733, + "grad_norm": 1641.6796875, + "learning_rate": 1.5101974318973313e-05, + "loss": 82.4806, + "step": 155220 + }, + { + "epoch": 0.6271488423017408, + "grad_norm": 510.649658203125, + "learning_rate": 1.5099266877896952e-05, + "loss": 45.1935, + "step": 155230 + }, + { + "epoch": 0.6271892435671085, + "grad_norm": 797.8385009765625, + "learning_rate": 1.5096559532362903e-05, + "loss": 85.9138, + "step": 155240 + }, + { + "epoch": 0.6272296448324761, + "grad_norm": 589.3023681640625, + "learning_rate": 1.509385228242395e-05, + "loss": 77.1994, + "step": 155250 + }, + { + "epoch": 0.6272700460978438, + "grad_norm": 1010.7744750976562, + "learning_rate": 1.5091145128132873e-05, + "loss": 101.6562, + "step": 155260 + }, + { + "epoch": 0.6273104473632114, + "grad_norm": 687.3636474609375, + "learning_rate": 1.508843806954245e-05, + "loss": 80.0385, + "step": 155270 + }, + { + "epoch": 0.627350848628579, + "grad_norm": 1078.1820068359375, + "learning_rate": 1.5085731106705457e-05, + "loss": 67.1356, + "step": 155280 + }, + { + "epoch": 0.6273912498939467, + "grad_norm": 1073.198974609375, + "learning_rate": 1.5083024239674666e-05, + "loss": 81.7548, + "step": 155290 + }, + { + "epoch": 0.6274316511593143, + "grad_norm": 726.0513305664062, + "learning_rate": 1.508031746850284e-05, + "loss": 121.1242, + "step": 155300 + }, + { + "epoch": 0.627472052424682, + "grad_norm": 484.8356628417969, + "learning_rate": 1.5077610793242767e-05, + "loss": 65.4975, + "step": 155310 + }, + { + "epoch": 0.6275124536900496, + "grad_norm": 1305.3914794921875, + "learning_rate": 1.50749042139472e-05, + "loss": 89.7788, + "step": 155320 + }, + { + "epoch": 0.6275528549554172, + "grad_norm": 1630.8251953125, + "learning_rate": 1.5072197730668916e-05, + "loss": 68.0796, + "step": 155330 + }, + { + "epoch": 0.6275932562207849, + "grad_norm": 445.65777587890625, + "learning_rate": 1.5069491343460673e-05, + "loss": 64.826, + "step": 155340 + }, + { + "epoch": 0.6276336574861525, + "grad_norm": 540.0055541992188, + "learning_rate": 1.5066785052375234e-05, + "loss": 75.1703, + "step": 155350 + }, + { + "epoch": 0.62767405875152, + "grad_norm": 1085.75927734375, + "learning_rate": 1.5064078857465363e-05, + "loss": 59.8912, + "step": 155360 + }, + { + "epoch": 0.6277144600168877, + "grad_norm": 189.11973571777344, + "learning_rate": 1.5061372758783816e-05, + "loss": 97.3675, + "step": 155370 + }, + { + "epoch": 0.6277548612822553, + "grad_norm": 259.7674255371094, + "learning_rate": 1.505866675638334e-05, + "loss": 57.3826, + "step": 155380 + }, + { + "epoch": 0.627795262547623, + "grad_norm": 953.7411499023438, + "learning_rate": 1.505596085031671e-05, + "loss": 77.9154, + "step": 155390 + }, + { + "epoch": 0.6278356638129906, + "grad_norm": 977.1476440429688, + "learning_rate": 1.5053255040636659e-05, + "loss": 130.8597, + "step": 155400 + }, + { + "epoch": 0.6278760650783582, + "grad_norm": 669.4309692382812, + "learning_rate": 1.5050549327395956e-05, + "loss": 102.3684, + "step": 155410 + }, + { + "epoch": 0.6279164663437259, + "grad_norm": 432.9653625488281, + "learning_rate": 1.5047843710647344e-05, + "loss": 66.2695, + "step": 155420 + }, + { + "epoch": 0.6279568676090935, + "grad_norm": 832.7944946289062, + "learning_rate": 1.5045138190443562e-05, + "loss": 50.9364, + "step": 155430 + }, + { + "epoch": 0.6279972688744612, + "grad_norm": 828.3533935546875, + "learning_rate": 1.5042432766837366e-05, + "loss": 62.8569, + "step": 155440 + }, + { + "epoch": 0.6280376701398288, + "grad_norm": 879.11328125, + "learning_rate": 1.5039727439881496e-05, + "loss": 67.6627, + "step": 155450 + }, + { + "epoch": 0.6280780714051964, + "grad_norm": 576.5384521484375, + "learning_rate": 1.5037022209628686e-05, + "loss": 36.9746, + "step": 155460 + }, + { + "epoch": 0.6281184726705641, + "grad_norm": 330.3351135253906, + "learning_rate": 1.503431707613169e-05, + "loss": 75.8628, + "step": 155470 + }, + { + "epoch": 0.6281588739359317, + "grad_norm": 683.106201171875, + "learning_rate": 1.503161203944323e-05, + "loss": 68.0877, + "step": 155480 + }, + { + "epoch": 0.6281992752012993, + "grad_norm": 440.8873596191406, + "learning_rate": 1.5028907099616064e-05, + "loss": 81.4746, + "step": 155490 + }, + { + "epoch": 0.6282396764666669, + "grad_norm": 439.09014892578125, + "learning_rate": 1.5026202256702909e-05, + "loss": 61.6098, + "step": 155500 + }, + { + "epoch": 0.6282800777320345, + "grad_norm": 93.88956451416016, + "learning_rate": 1.50234975107565e-05, + "loss": 58.0175, + "step": 155510 + }, + { + "epoch": 0.6283204789974022, + "grad_norm": 578.8618774414062, + "learning_rate": 1.5020792861829571e-05, + "loss": 65.6012, + "step": 155520 + }, + { + "epoch": 0.6283608802627698, + "grad_norm": 961.232177734375, + "learning_rate": 1.5018088309974846e-05, + "loss": 109.4425, + "step": 155530 + }, + { + "epoch": 0.6284012815281375, + "grad_norm": 1301.2437744140625, + "learning_rate": 1.5015383855245054e-05, + "loss": 86.382, + "step": 155540 + }, + { + "epoch": 0.6284416827935051, + "grad_norm": 592.7103881835938, + "learning_rate": 1.5012679497692923e-05, + "loss": 69.4075, + "step": 155550 + }, + { + "epoch": 0.6284820840588727, + "grad_norm": 933.7501220703125, + "learning_rate": 1.5009975237371163e-05, + "loss": 115.2061, + "step": 155560 + }, + { + "epoch": 0.6285224853242404, + "grad_norm": 635.9288940429688, + "learning_rate": 1.5007271074332517e-05, + "loss": 61.0482, + "step": 155570 + }, + { + "epoch": 0.628562886589608, + "grad_norm": 904.969970703125, + "learning_rate": 1.5004567008629692e-05, + "loss": 68.651, + "step": 155580 + }, + { + "epoch": 0.6286032878549757, + "grad_norm": 729.0704956054688, + "learning_rate": 1.5001863040315394e-05, + "loss": 88.6226, + "step": 155590 + }, + { + "epoch": 0.6286436891203433, + "grad_norm": 560.8312377929688, + "learning_rate": 1.499915916944236e-05, + "loss": 66.6436, + "step": 155600 + }, + { + "epoch": 0.6286840903857108, + "grad_norm": 619.1416625976562, + "learning_rate": 1.4996455396063285e-05, + "loss": 68.8571, + "step": 155610 + }, + { + "epoch": 0.6287244916510785, + "grad_norm": 1436.04150390625, + "learning_rate": 1.4993751720230893e-05, + "loss": 73.2686, + "step": 155620 + }, + { + "epoch": 0.6287648929164461, + "grad_norm": 358.6614990234375, + "learning_rate": 1.4991048141997893e-05, + "loss": 79.7837, + "step": 155630 + }, + { + "epoch": 0.6288052941818137, + "grad_norm": 268.5382080078125, + "learning_rate": 1.4988344661416975e-05, + "loss": 42.3873, + "step": 155640 + }, + { + "epoch": 0.6288456954471814, + "grad_norm": 471.78240966796875, + "learning_rate": 1.498564127854087e-05, + "loss": 82.5957, + "step": 155650 + }, + { + "epoch": 0.628886096712549, + "grad_norm": 1131.9830322265625, + "learning_rate": 1.4982937993422266e-05, + "loss": 111.5805, + "step": 155660 + }, + { + "epoch": 0.6289264979779167, + "grad_norm": 705.074462890625, + "learning_rate": 1.4980234806113862e-05, + "loss": 55.7616, + "step": 155670 + }, + { + "epoch": 0.6289668992432843, + "grad_norm": 1361.7696533203125, + "learning_rate": 1.4977531716668376e-05, + "loss": 107.9154, + "step": 155680 + }, + { + "epoch": 0.629007300508652, + "grad_norm": 571.7637329101562, + "learning_rate": 1.4974828725138487e-05, + "loss": 58.3696, + "step": 155690 + }, + { + "epoch": 0.6290477017740196, + "grad_norm": 1047.1182861328125, + "learning_rate": 1.4972125831576905e-05, + "loss": 66.7409, + "step": 155700 + }, + { + "epoch": 0.6290881030393872, + "grad_norm": 1566.75732421875, + "learning_rate": 1.496942303603632e-05, + "loss": 77.2444, + "step": 155710 + }, + { + "epoch": 0.6291285043047549, + "grad_norm": 755.58935546875, + "learning_rate": 1.4966720338569411e-05, + "loss": 47.2742, + "step": 155720 + }, + { + "epoch": 0.6291689055701225, + "grad_norm": 767.9099731445312, + "learning_rate": 1.4964017739228891e-05, + "loss": 59.112, + "step": 155730 + }, + { + "epoch": 0.62920930683549, + "grad_norm": 635.1253051757812, + "learning_rate": 1.4961315238067435e-05, + "loss": 40.4369, + "step": 155740 + }, + { + "epoch": 0.6292497081008577, + "grad_norm": 577.240234375, + "learning_rate": 1.4958612835137727e-05, + "loss": 64.5564, + "step": 155750 + }, + { + "epoch": 0.6292901093662253, + "grad_norm": 449.0574645996094, + "learning_rate": 1.4955910530492465e-05, + "loss": 57.3205, + "step": 155760 + }, + { + "epoch": 0.629330510631593, + "grad_norm": 784.01611328125, + "learning_rate": 1.495320832418432e-05, + "loss": 84.8703, + "step": 155770 + }, + { + "epoch": 0.6293709118969606, + "grad_norm": 560.8143920898438, + "learning_rate": 1.495050621626598e-05, + "loss": 72.3739, + "step": 155780 + }, + { + "epoch": 0.6294113131623282, + "grad_norm": 244.49127197265625, + "learning_rate": 1.4947804206790121e-05, + "loss": 139.3818, + "step": 155790 + }, + { + "epoch": 0.6294517144276959, + "grad_norm": 667.9622192382812, + "learning_rate": 1.4945102295809415e-05, + "loss": 95.4993, + "step": 155800 + }, + { + "epoch": 0.6294921156930635, + "grad_norm": 822.63623046875, + "learning_rate": 1.4942400483376547e-05, + "loss": 63.3942, + "step": 155810 + }, + { + "epoch": 0.6295325169584312, + "grad_norm": 431.8647155761719, + "learning_rate": 1.4939698769544186e-05, + "loss": 96.8765, + "step": 155820 + }, + { + "epoch": 0.6295729182237988, + "grad_norm": 995.6657104492188, + "learning_rate": 1.4936997154364997e-05, + "loss": 65.4366, + "step": 155830 + }, + { + "epoch": 0.6296133194891664, + "grad_norm": 708.4447631835938, + "learning_rate": 1.4934295637891661e-05, + "loss": 73.506, + "step": 155840 + }, + { + "epoch": 0.6296537207545341, + "grad_norm": 974.8126831054688, + "learning_rate": 1.4931594220176834e-05, + "loss": 72.1784, + "step": 155850 + }, + { + "epoch": 0.6296941220199017, + "grad_norm": 508.1643981933594, + "learning_rate": 1.4928892901273193e-05, + "loss": 74.1204, + "step": 155860 + }, + { + "epoch": 0.6297345232852692, + "grad_norm": 1305.826904296875, + "learning_rate": 1.4926191681233393e-05, + "loss": 71.2335, + "step": 155870 + }, + { + "epoch": 0.6297749245506369, + "grad_norm": 739.2384033203125, + "learning_rate": 1.4923490560110097e-05, + "loss": 47.1704, + "step": 155880 + }, + { + "epoch": 0.6298153258160045, + "grad_norm": 910.4365234375, + "learning_rate": 1.4920789537955969e-05, + "loss": 89.8754, + "step": 155890 + }, + { + "epoch": 0.6298557270813722, + "grad_norm": 596.9612426757812, + "learning_rate": 1.4918088614823665e-05, + "loss": 80.4188, + "step": 155900 + }, + { + "epoch": 0.6298961283467398, + "grad_norm": 967.322998046875, + "learning_rate": 1.4915387790765832e-05, + "loss": 93.906, + "step": 155910 + }, + { + "epoch": 0.6299365296121074, + "grad_norm": 747.8112182617188, + "learning_rate": 1.491268706583514e-05, + "loss": 44.168, + "step": 155920 + }, + { + "epoch": 0.6299769308774751, + "grad_norm": 990.2349853515625, + "learning_rate": 1.490998644008422e-05, + "loss": 79.5299, + "step": 155930 + }, + { + "epoch": 0.6300173321428427, + "grad_norm": 3174.209716796875, + "learning_rate": 1.490728591356575e-05, + "loss": 74.1403, + "step": 155940 + }, + { + "epoch": 0.6300577334082104, + "grad_norm": 696.6058959960938, + "learning_rate": 1.4904585486332361e-05, + "loss": 42.9566, + "step": 155950 + }, + { + "epoch": 0.630098134673578, + "grad_norm": 2003.001220703125, + "learning_rate": 1.4901885158436696e-05, + "loss": 99.9955, + "step": 155960 + }, + { + "epoch": 0.6301385359389456, + "grad_norm": 957.818359375, + "learning_rate": 1.4899184929931409e-05, + "loss": 69.4298, + "step": 155970 + }, + { + "epoch": 0.6301789372043133, + "grad_norm": 651.7313232421875, + "learning_rate": 1.4896484800869138e-05, + "loss": 65.7367, + "step": 155980 + }, + { + "epoch": 0.6302193384696809, + "grad_norm": 115.39328002929688, + "learning_rate": 1.4893784771302512e-05, + "loss": 55.0122, + "step": 155990 + }, + { + "epoch": 0.6302597397350485, + "grad_norm": 662.175537109375, + "learning_rate": 1.4891084841284194e-05, + "loss": 38.2106, + "step": 156000 + }, + { + "epoch": 0.6303001410004161, + "grad_norm": 202.2489776611328, + "learning_rate": 1.4888385010866798e-05, + "loss": 38.2496, + "step": 156010 + }, + { + "epoch": 0.6303405422657837, + "grad_norm": 287.0430603027344, + "learning_rate": 1.4885685280102973e-05, + "loss": 74.6243, + "step": 156020 + }, + { + "epoch": 0.6303809435311514, + "grad_norm": 695.2216186523438, + "learning_rate": 1.4882985649045347e-05, + "loss": 54.1563, + "step": 156030 + }, + { + "epoch": 0.630421344796519, + "grad_norm": 259.36993408203125, + "learning_rate": 1.488028611774655e-05, + "loss": 104.5212, + "step": 156040 + }, + { + "epoch": 0.6304617460618867, + "grad_norm": 332.0163879394531, + "learning_rate": 1.487758668625921e-05, + "loss": 45.2679, + "step": 156050 + }, + { + "epoch": 0.6305021473272543, + "grad_norm": 331.1156921386719, + "learning_rate": 1.487488735463596e-05, + "loss": 62.539, + "step": 156060 + }, + { + "epoch": 0.6305425485926219, + "grad_norm": 1584.1014404296875, + "learning_rate": 1.4872188122929407e-05, + "loss": 79.1919, + "step": 156070 + }, + { + "epoch": 0.6305829498579896, + "grad_norm": 2678.967529296875, + "learning_rate": 1.4869488991192196e-05, + "loss": 78.7911, + "step": 156080 + }, + { + "epoch": 0.6306233511233572, + "grad_norm": 950.40966796875, + "learning_rate": 1.486678995947693e-05, + "loss": 70.0033, + "step": 156090 + }, + { + "epoch": 0.6306637523887249, + "grad_norm": 1074.0162353515625, + "learning_rate": 1.4864091027836245e-05, + "loss": 98.7038, + "step": 156100 + }, + { + "epoch": 0.6307041536540925, + "grad_norm": 742.64013671875, + "learning_rate": 1.4861392196322753e-05, + "loss": 62.0764, + "step": 156110 + }, + { + "epoch": 0.6307445549194601, + "grad_norm": 1057.21826171875, + "learning_rate": 1.4858693464989058e-05, + "loss": 68.6465, + "step": 156120 + }, + { + "epoch": 0.6307849561848277, + "grad_norm": 791.9356689453125, + "learning_rate": 1.4855994833887785e-05, + "loss": 96.3174, + "step": 156130 + }, + { + "epoch": 0.6308253574501953, + "grad_norm": 367.4755554199219, + "learning_rate": 1.4853296303071534e-05, + "loss": 53.9934, + "step": 156140 + }, + { + "epoch": 0.6308657587155629, + "grad_norm": 625.8234252929688, + "learning_rate": 1.4850597872592927e-05, + "loss": 113.8424, + "step": 156150 + }, + { + "epoch": 0.6309061599809306, + "grad_norm": 1051.656005859375, + "learning_rate": 1.4847899542504567e-05, + "loss": 82.9777, + "step": 156160 + }, + { + "epoch": 0.6309465612462982, + "grad_norm": 764.6788330078125, + "learning_rate": 1.484520131285905e-05, + "loss": 67.4149, + "step": 156170 + }, + { + "epoch": 0.6309869625116659, + "grad_norm": 716.7052612304688, + "learning_rate": 1.4842503183708995e-05, + "loss": 65.8896, + "step": 156180 + }, + { + "epoch": 0.6310273637770335, + "grad_norm": 729.618408203125, + "learning_rate": 1.4839805155106996e-05, + "loss": 56.5401, + "step": 156190 + }, + { + "epoch": 0.6310677650424011, + "grad_norm": 1098.292724609375, + "learning_rate": 1.4837107227105649e-05, + "loss": 107.2299, + "step": 156200 + }, + { + "epoch": 0.6311081663077688, + "grad_norm": 468.4741516113281, + "learning_rate": 1.4834409399757555e-05, + "loss": 90.3437, + "step": 156210 + }, + { + "epoch": 0.6311485675731364, + "grad_norm": 1079.0841064453125, + "learning_rate": 1.4831711673115305e-05, + "loss": 77.7002, + "step": 156220 + }, + { + "epoch": 0.6311889688385041, + "grad_norm": 599.2054443359375, + "learning_rate": 1.4829014047231507e-05, + "loss": 64.1494, + "step": 156230 + }, + { + "epoch": 0.6312293701038717, + "grad_norm": 783.5579223632812, + "learning_rate": 1.4826316522158738e-05, + "loss": 70.6774, + "step": 156240 + }, + { + "epoch": 0.6312697713692392, + "grad_norm": 423.9833068847656, + "learning_rate": 1.4823619097949584e-05, + "loss": 64.9681, + "step": 156250 + }, + { + "epoch": 0.6313101726346069, + "grad_norm": 441.3977966308594, + "learning_rate": 1.4820921774656652e-05, + "loss": 59.3226, + "step": 156260 + }, + { + "epoch": 0.6313505738999745, + "grad_norm": 321.7123718261719, + "learning_rate": 1.4818224552332515e-05, + "loss": 84.9818, + "step": 156270 + }, + { + "epoch": 0.6313909751653421, + "grad_norm": 358.4299621582031, + "learning_rate": 1.4815527431029745e-05, + "loss": 67.5006, + "step": 156280 + }, + { + "epoch": 0.6314313764307098, + "grad_norm": 789.7781982421875, + "learning_rate": 1.481283041080095e-05, + "loss": 57.2903, + "step": 156290 + }, + { + "epoch": 0.6314717776960774, + "grad_norm": 1264.6729736328125, + "learning_rate": 1.4810133491698693e-05, + "loss": 111.8428, + "step": 156300 + }, + { + "epoch": 0.6315121789614451, + "grad_norm": 611.44970703125, + "learning_rate": 1.480743667377556e-05, + "loss": 58.7902, + "step": 156310 + }, + { + "epoch": 0.6315525802268127, + "grad_norm": 718.92578125, + "learning_rate": 1.4804739957084117e-05, + "loss": 64.9081, + "step": 156320 + }, + { + "epoch": 0.6315929814921803, + "grad_norm": 547.7733764648438, + "learning_rate": 1.4802043341676942e-05, + "loss": 118.673, + "step": 156330 + }, + { + "epoch": 0.631633382757548, + "grad_norm": 1031.191162109375, + "learning_rate": 1.479934682760661e-05, + "loss": 85.3568, + "step": 156340 + }, + { + "epoch": 0.6316737840229156, + "grad_norm": 435.9314270019531, + "learning_rate": 1.4796650414925693e-05, + "loss": 50.3895, + "step": 156350 + }, + { + "epoch": 0.6317141852882833, + "grad_norm": 731.6911010742188, + "learning_rate": 1.4793954103686747e-05, + "loss": 43.2721, + "step": 156360 + }, + { + "epoch": 0.6317545865536509, + "grad_norm": 703.953125, + "learning_rate": 1.4791257893942353e-05, + "loss": 66.6028, + "step": 156370 + }, + { + "epoch": 0.6317949878190184, + "grad_norm": 3290.709228515625, + "learning_rate": 1.4788561785745063e-05, + "loss": 69.2092, + "step": 156380 + }, + { + "epoch": 0.6318353890843861, + "grad_norm": 386.9081726074219, + "learning_rate": 1.478586577914745e-05, + "loss": 79.6905, + "step": 156390 + }, + { + "epoch": 0.6318757903497537, + "grad_norm": 780.0101928710938, + "learning_rate": 1.4783169874202067e-05, + "loss": 64.1192, + "step": 156400 + }, + { + "epoch": 0.6319161916151214, + "grad_norm": 903.5742797851562, + "learning_rate": 1.4780474070961467e-05, + "loss": 57.3937, + "step": 156410 + }, + { + "epoch": 0.631956592880489, + "grad_norm": 1170.1363525390625, + "learning_rate": 1.4777778369478217e-05, + "loss": 112.8963, + "step": 156420 + }, + { + "epoch": 0.6319969941458566, + "grad_norm": 450.0503234863281, + "learning_rate": 1.477508276980487e-05, + "loss": 46.6392, + "step": 156430 + }, + { + "epoch": 0.6320373954112243, + "grad_norm": 581.8621826171875, + "learning_rate": 1.4772387271993962e-05, + "loss": 74.4566, + "step": 156440 + }, + { + "epoch": 0.6320777966765919, + "grad_norm": 377.7392578125, + "learning_rate": 1.4769691876098065e-05, + "loss": 58.7216, + "step": 156450 + }, + { + "epoch": 0.6321181979419596, + "grad_norm": 351.4676818847656, + "learning_rate": 1.4766996582169714e-05, + "loss": 67.0194, + "step": 156460 + }, + { + "epoch": 0.6321585992073272, + "grad_norm": 609.9014282226562, + "learning_rate": 1.4764301390261463e-05, + "loss": 71.6414, + "step": 156470 + }, + { + "epoch": 0.6321990004726948, + "grad_norm": 742.1620483398438, + "learning_rate": 1.4761606300425855e-05, + "loss": 120.0248, + "step": 156480 + }, + { + "epoch": 0.6322394017380625, + "grad_norm": 609.908935546875, + "learning_rate": 1.4758911312715417e-05, + "loss": 59.4415, + "step": 156490 + }, + { + "epoch": 0.6322798030034301, + "grad_norm": 337.6656494140625, + "learning_rate": 1.475621642718271e-05, + "loss": 35.668, + "step": 156500 + }, + { + "epoch": 0.6323202042687976, + "grad_norm": 427.8983459472656, + "learning_rate": 1.4753521643880265e-05, + "loss": 73.4693, + "step": 156510 + }, + { + "epoch": 0.6323606055341653, + "grad_norm": 470.9547119140625, + "learning_rate": 1.4750826962860605e-05, + "loss": 69.8945, + "step": 156520 + }, + { + "epoch": 0.6324010067995329, + "grad_norm": 528.8994750976562, + "learning_rate": 1.4748132384176285e-05, + "loss": 81.1193, + "step": 156530 + }, + { + "epoch": 0.6324414080649006, + "grad_norm": 408.62493896484375, + "learning_rate": 1.4745437907879827e-05, + "loss": 51.2917, + "step": 156540 + }, + { + "epoch": 0.6324818093302682, + "grad_norm": 1640.861328125, + "learning_rate": 1.474274353402376e-05, + "loss": 75.8443, + "step": 156550 + }, + { + "epoch": 0.6325222105956358, + "grad_norm": 1123.0179443359375, + "learning_rate": 1.4740049262660618e-05, + "loss": 82.3711, + "step": 156560 + }, + { + "epoch": 0.6325626118610035, + "grad_norm": 603.68798828125, + "learning_rate": 1.4737355093842922e-05, + "loss": 59.2123, + "step": 156570 + }, + { + "epoch": 0.6326030131263711, + "grad_norm": 775.8084716796875, + "learning_rate": 1.4734661027623198e-05, + "loss": 68.0615, + "step": 156580 + }, + { + "epoch": 0.6326434143917388, + "grad_norm": 429.7221374511719, + "learning_rate": 1.4731967064053972e-05, + "loss": 56.4126, + "step": 156590 + }, + { + "epoch": 0.6326838156571064, + "grad_norm": 1061.1976318359375, + "learning_rate": 1.472927320318775e-05, + "loss": 86.9221, + "step": 156600 + }, + { + "epoch": 0.632724216922474, + "grad_norm": 702.2677612304688, + "learning_rate": 1.4726579445077069e-05, + "loss": 63.301, + "step": 156610 + }, + { + "epoch": 0.6327646181878417, + "grad_norm": 1058.05859375, + "learning_rate": 1.472388578977443e-05, + "loss": 87.0958, + "step": 156620 + }, + { + "epoch": 0.6328050194532093, + "grad_norm": 815.1450805664062, + "learning_rate": 1.4721192237332357e-05, + "loss": 71.0366, + "step": 156630 + }, + { + "epoch": 0.6328454207185769, + "grad_norm": 716.0730590820312, + "learning_rate": 1.4718498787803363e-05, + "loss": 68.2651, + "step": 156640 + }, + { + "epoch": 0.6328858219839445, + "grad_norm": 910.5143432617188, + "learning_rate": 1.4715805441239951e-05, + "loss": 103.9641, + "step": 156650 + }, + { + "epoch": 0.6329262232493121, + "grad_norm": 496.4553527832031, + "learning_rate": 1.4713112197694636e-05, + "loss": 60.8319, + "step": 156660 + }, + { + "epoch": 0.6329666245146798, + "grad_norm": 797.3623657226562, + "learning_rate": 1.471041905721991e-05, + "loss": 75.1521, + "step": 156670 + }, + { + "epoch": 0.6330070257800474, + "grad_norm": 353.162353515625, + "learning_rate": 1.47077260198683e-05, + "loss": 72.4749, + "step": 156680 + }, + { + "epoch": 0.6330474270454151, + "grad_norm": 742.200927734375, + "learning_rate": 1.4705033085692293e-05, + "loss": 116.1406, + "step": 156690 + }, + { + "epoch": 0.6330878283107827, + "grad_norm": 863.1614379882812, + "learning_rate": 1.4702340254744382e-05, + "loss": 109.1378, + "step": 156700 + }, + { + "epoch": 0.6331282295761503, + "grad_norm": 180.9199676513672, + "learning_rate": 1.4699647527077088e-05, + "loss": 77.1441, + "step": 156710 + }, + { + "epoch": 0.633168630841518, + "grad_norm": 10356.1630859375, + "learning_rate": 1.469695490274289e-05, + "loss": 133.3134, + "step": 156720 + }, + { + "epoch": 0.6332090321068856, + "grad_norm": 570.1561279296875, + "learning_rate": 1.4694262381794287e-05, + "loss": 69.04, + "step": 156730 + }, + { + "epoch": 0.6332494333722533, + "grad_norm": 465.9795227050781, + "learning_rate": 1.4691569964283771e-05, + "loss": 86.098, + "step": 156740 + }, + { + "epoch": 0.6332898346376209, + "grad_norm": 1059.5194091796875, + "learning_rate": 1.4688877650263824e-05, + "loss": 100.6445, + "step": 156750 + }, + { + "epoch": 0.6333302359029885, + "grad_norm": 794.830810546875, + "learning_rate": 1.4686185439786949e-05, + "loss": 51.5526, + "step": 156760 + }, + { + "epoch": 0.6333706371683561, + "grad_norm": 479.64190673828125, + "learning_rate": 1.4683493332905627e-05, + "loss": 61.1679, + "step": 156770 + }, + { + "epoch": 0.6334110384337237, + "grad_norm": 1243.44873046875, + "learning_rate": 1.468080132967233e-05, + "loss": 88.327, + "step": 156780 + }, + { + "epoch": 0.6334514396990913, + "grad_norm": 492.45452880859375, + "learning_rate": 1.4678109430139555e-05, + "loss": 56.6101, + "step": 156790 + }, + { + "epoch": 0.633491840964459, + "grad_norm": 949.059814453125, + "learning_rate": 1.467541763435978e-05, + "loss": 61.8721, + "step": 156800 + }, + { + "epoch": 0.6335322422298266, + "grad_norm": 323.5378723144531, + "learning_rate": 1.4672725942385475e-05, + "loss": 71.1401, + "step": 156810 + }, + { + "epoch": 0.6335726434951943, + "grad_norm": 528.9710693359375, + "learning_rate": 1.4670034354269124e-05, + "loss": 77.8624, + "step": 156820 + }, + { + "epoch": 0.6336130447605619, + "grad_norm": 570.38232421875, + "learning_rate": 1.466734287006319e-05, + "loss": 89.1126, + "step": 156830 + }, + { + "epoch": 0.6336534460259295, + "grad_norm": 580.6535034179688, + "learning_rate": 1.466465148982016e-05, + "loss": 79.2254, + "step": 156840 + }, + { + "epoch": 0.6336938472912972, + "grad_norm": 1188.779296875, + "learning_rate": 1.4661960213592494e-05, + "loss": 100.7557, + "step": 156850 + }, + { + "epoch": 0.6337342485566648, + "grad_norm": 448.426513671875, + "learning_rate": 1.4659269041432656e-05, + "loss": 63.3088, + "step": 156860 + }, + { + "epoch": 0.6337746498220325, + "grad_norm": 268.8529052734375, + "learning_rate": 1.4656577973393124e-05, + "loss": 53.4374, + "step": 156870 + }, + { + "epoch": 0.6338150510874001, + "grad_norm": 695.4364624023438, + "learning_rate": 1.4653887009526356e-05, + "loss": 50.387, + "step": 156880 + }, + { + "epoch": 0.6338554523527676, + "grad_norm": 406.3636779785156, + "learning_rate": 1.4651196149884805e-05, + "loss": 64.1406, + "step": 156890 + }, + { + "epoch": 0.6338958536181353, + "grad_norm": 458.9206237792969, + "learning_rate": 1.464850539452095e-05, + "loss": 56.3864, + "step": 156900 + }, + { + "epoch": 0.6339362548835029, + "grad_norm": 436.1049499511719, + "learning_rate": 1.464581474348723e-05, + "loss": 86.8897, + "step": 156910 + }, + { + "epoch": 0.6339766561488706, + "grad_norm": 514.5556030273438, + "learning_rate": 1.4643124196836111e-05, + "loss": 55.1289, + "step": 156920 + }, + { + "epoch": 0.6340170574142382, + "grad_norm": 1047.844970703125, + "learning_rate": 1.4640433754620047e-05, + "loss": 64.4486, + "step": 156930 + }, + { + "epoch": 0.6340574586796058, + "grad_norm": 745.4342041015625, + "learning_rate": 1.4637743416891476e-05, + "loss": 86.3127, + "step": 156940 + }, + { + "epoch": 0.6340978599449735, + "grad_norm": 347.2980651855469, + "learning_rate": 1.4635053183702867e-05, + "loss": 77.9751, + "step": 156950 + }, + { + "epoch": 0.6341382612103411, + "grad_norm": 560.5182495117188, + "learning_rate": 1.4632363055106657e-05, + "loss": 67.6185, + "step": 156960 + }, + { + "epoch": 0.6341786624757088, + "grad_norm": 642.9034423828125, + "learning_rate": 1.4629673031155287e-05, + "loss": 82.0875, + "step": 156970 + }, + { + "epoch": 0.6342190637410764, + "grad_norm": 737.0567626953125, + "learning_rate": 1.4626983111901212e-05, + "loss": 60.6421, + "step": 156980 + }, + { + "epoch": 0.634259465006444, + "grad_norm": 503.2254943847656, + "learning_rate": 1.4624293297396863e-05, + "loss": 53.9989, + "step": 156990 + }, + { + "epoch": 0.6342998662718117, + "grad_norm": 929.0985107421875, + "learning_rate": 1.4621603587694688e-05, + "loss": 81.5643, + "step": 157000 + }, + { + "epoch": 0.6343402675371793, + "grad_norm": 1039.268310546875, + "learning_rate": 1.4618913982847122e-05, + "loss": 106.1455, + "step": 157010 + }, + { + "epoch": 0.6343806688025468, + "grad_norm": 1109.618896484375, + "learning_rate": 1.461622448290659e-05, + "loss": 74.0704, + "step": 157020 + }, + { + "epoch": 0.6344210700679145, + "grad_norm": 1222.85302734375, + "learning_rate": 1.461353508792554e-05, + "loss": 68.5379, + "step": 157030 + }, + { + "epoch": 0.6344614713332821, + "grad_norm": 923.7366333007812, + "learning_rate": 1.4610845797956396e-05, + "loss": 96.3386, + "step": 157040 + }, + { + "epoch": 0.6345018725986498, + "grad_norm": 469.212646484375, + "learning_rate": 1.4608156613051581e-05, + "loss": 64.1575, + "step": 157050 + }, + { + "epoch": 0.6345422738640174, + "grad_norm": 892.2661743164062, + "learning_rate": 1.4605467533263538e-05, + "loss": 58.5821, + "step": 157060 + }, + { + "epoch": 0.634582675129385, + "grad_norm": 750.8076782226562, + "learning_rate": 1.4602778558644676e-05, + "loss": 35.3287, + "step": 157070 + }, + { + "epoch": 0.6346230763947527, + "grad_norm": 639.487548828125, + "learning_rate": 1.460008968924743e-05, + "loss": 81.7224, + "step": 157080 + }, + { + "epoch": 0.6346634776601203, + "grad_norm": 820.1612548828125, + "learning_rate": 1.4597400925124217e-05, + "loss": 90.2525, + "step": 157090 + }, + { + "epoch": 0.634703878925488, + "grad_norm": 1031.6068115234375, + "learning_rate": 1.4594712266327444e-05, + "loss": 109.8813, + "step": 157100 + }, + { + "epoch": 0.6347442801908556, + "grad_norm": 490.1838073730469, + "learning_rate": 1.4592023712909549e-05, + "loss": 52.9072, + "step": 157110 + }, + { + "epoch": 0.6347846814562232, + "grad_norm": 720.6361083984375, + "learning_rate": 1.4589335264922926e-05, + "loss": 84.5676, + "step": 157120 + }, + { + "epoch": 0.6348250827215909, + "grad_norm": 512.2216796875, + "learning_rate": 1.4586646922420006e-05, + "loss": 86.2262, + "step": 157130 + }, + { + "epoch": 0.6348654839869585, + "grad_norm": 665.2570190429688, + "learning_rate": 1.4583958685453188e-05, + "loss": 66.5989, + "step": 157140 + }, + { + "epoch": 0.634905885252326, + "grad_norm": 431.8143310546875, + "learning_rate": 1.4581270554074885e-05, + "loss": 53.6626, + "step": 157150 + }, + { + "epoch": 0.6349462865176937, + "grad_norm": 526.677001953125, + "learning_rate": 1.4578582528337504e-05, + "loss": 81.7625, + "step": 157160 + }, + { + "epoch": 0.6349866877830613, + "grad_norm": 542.757568359375, + "learning_rate": 1.4575894608293442e-05, + "loss": 62.9694, + "step": 157170 + }, + { + "epoch": 0.635027089048429, + "grad_norm": 604.6796875, + "learning_rate": 1.457320679399511e-05, + "loss": 88.6989, + "step": 157180 + }, + { + "epoch": 0.6350674903137966, + "grad_norm": 933.3673706054688, + "learning_rate": 1.4570519085494907e-05, + "loss": 71.2033, + "step": 157190 + }, + { + "epoch": 0.6351078915791643, + "grad_norm": 890.9435424804688, + "learning_rate": 1.4567831482845222e-05, + "loss": 75.8605, + "step": 157200 + }, + { + "epoch": 0.6351482928445319, + "grad_norm": 1520.1016845703125, + "learning_rate": 1.4565143986098469e-05, + "loss": 78.9619, + "step": 157210 + }, + { + "epoch": 0.6351886941098995, + "grad_norm": 1202.798828125, + "learning_rate": 1.456245659530703e-05, + "loss": 64.856, + "step": 157220 + }, + { + "epoch": 0.6352290953752672, + "grad_norm": 1053.35986328125, + "learning_rate": 1.4559769310523292e-05, + "loss": 88.932, + "step": 157230 + }, + { + "epoch": 0.6352694966406348, + "grad_norm": 802.9597778320312, + "learning_rate": 1.4557082131799662e-05, + "loss": 70.2635, + "step": 157240 + }, + { + "epoch": 0.6353098979060025, + "grad_norm": 1194.244873046875, + "learning_rate": 1.4554395059188515e-05, + "loss": 64.448, + "step": 157250 + }, + { + "epoch": 0.6353502991713701, + "grad_norm": 964.4576416015625, + "learning_rate": 1.455170809274224e-05, + "loss": 80.0916, + "step": 157260 + }, + { + "epoch": 0.6353907004367377, + "grad_norm": 393.19793701171875, + "learning_rate": 1.454902123251322e-05, + "loss": 44.2767, + "step": 157270 + }, + { + "epoch": 0.6354311017021053, + "grad_norm": 924.3762817382812, + "learning_rate": 1.4546334478553832e-05, + "loss": 77.2413, + "step": 157280 + }, + { + "epoch": 0.6354715029674729, + "grad_norm": 861.3959350585938, + "learning_rate": 1.4543647830916468e-05, + "loss": 85.811, + "step": 157290 + }, + { + "epoch": 0.6355119042328405, + "grad_norm": 591.285888671875, + "learning_rate": 1.45409612896535e-05, + "loss": 86.8249, + "step": 157300 + }, + { + "epoch": 0.6355523054982082, + "grad_norm": 651.3357543945312, + "learning_rate": 1.4538274854817292e-05, + "loss": 87.0976, + "step": 157310 + }, + { + "epoch": 0.6355927067635758, + "grad_norm": 541.8470458984375, + "learning_rate": 1.4535588526460237e-05, + "loss": 71.3921, + "step": 157320 + }, + { + "epoch": 0.6356331080289435, + "grad_norm": 582.4735107421875, + "learning_rate": 1.4532902304634698e-05, + "loss": 82.155, + "step": 157330 + }, + { + "epoch": 0.6356735092943111, + "grad_norm": 729.019287109375, + "learning_rate": 1.4530216189393036e-05, + "loss": 65.5385, + "step": 157340 + }, + { + "epoch": 0.6357139105596787, + "grad_norm": 661.1536254882812, + "learning_rate": 1.4527530180787634e-05, + "loss": 55.4674, + "step": 157350 + }, + { + "epoch": 0.6357543118250464, + "grad_norm": 1089.6871337890625, + "learning_rate": 1.4524844278870838e-05, + "loss": 98.5554, + "step": 157360 + }, + { + "epoch": 0.635794713090414, + "grad_norm": 275.330078125, + "learning_rate": 1.4522158483695029e-05, + "loss": 95.6343, + "step": 157370 + }, + { + "epoch": 0.6358351143557817, + "grad_norm": 597.3402099609375, + "learning_rate": 1.4519472795312562e-05, + "loss": 85.1221, + "step": 157380 + }, + { + "epoch": 0.6358755156211493, + "grad_norm": 218.19989013671875, + "learning_rate": 1.4516787213775785e-05, + "loss": 65.3399, + "step": 157390 + }, + { + "epoch": 0.6359159168865169, + "grad_norm": 1709.747802734375, + "learning_rate": 1.4514101739137072e-05, + "loss": 81.3998, + "step": 157400 + }, + { + "epoch": 0.6359563181518845, + "grad_norm": 683.5751342773438, + "learning_rate": 1.451141637144877e-05, + "loss": 98.2589, + "step": 157410 + }, + { + "epoch": 0.6359967194172521, + "grad_norm": 661.8906860351562, + "learning_rate": 1.4508731110763228e-05, + "loss": 33.6306, + "step": 157420 + }, + { + "epoch": 0.6360371206826197, + "grad_norm": 486.34796142578125, + "learning_rate": 1.4506045957132803e-05, + "loss": 58.0004, + "step": 157430 + }, + { + "epoch": 0.6360775219479874, + "grad_norm": 489.0040588378906, + "learning_rate": 1.4503360910609834e-05, + "loss": 69.7471, + "step": 157440 + }, + { + "epoch": 0.636117923213355, + "grad_norm": 855.4965209960938, + "learning_rate": 1.4500675971246683e-05, + "loss": 68.2322, + "step": 157450 + }, + { + "epoch": 0.6361583244787227, + "grad_norm": 1582.02685546875, + "learning_rate": 1.4497991139095684e-05, + "loss": 79.4163, + "step": 157460 + }, + { + "epoch": 0.6361987257440903, + "grad_norm": 720.5360717773438, + "learning_rate": 1.4495306414209172e-05, + "loss": 71.7085, + "step": 157470 + }, + { + "epoch": 0.636239127009458, + "grad_norm": 773.9551391601562, + "learning_rate": 1.4492621796639506e-05, + "loss": 57.6559, + "step": 157480 + }, + { + "epoch": 0.6362795282748256, + "grad_norm": 530.8468017578125, + "learning_rate": 1.448993728643901e-05, + "loss": 94.9408, + "step": 157490 + }, + { + "epoch": 0.6363199295401932, + "grad_norm": 236.35256958007812, + "learning_rate": 1.4487252883660019e-05, + "loss": 91.8643, + "step": 157500 + }, + { + "epoch": 0.6363603308055609, + "grad_norm": 1246.5479736328125, + "learning_rate": 1.4484568588354875e-05, + "loss": 90.8632, + "step": 157510 + }, + { + "epoch": 0.6364007320709285, + "grad_norm": 586.0814208984375, + "learning_rate": 1.4481884400575901e-05, + "loss": 53.9368, + "step": 157520 + }, + { + "epoch": 0.636441133336296, + "grad_norm": 994.2615966796875, + "learning_rate": 1.447920032037544e-05, + "loss": 64.9106, + "step": 157530 + }, + { + "epoch": 0.6364815346016637, + "grad_norm": 1032.315185546875, + "learning_rate": 1.4476516347805809e-05, + "loss": 69.9752, + "step": 157540 + }, + { + "epoch": 0.6365219358670313, + "grad_norm": 579.4603881835938, + "learning_rate": 1.4473832482919327e-05, + "loss": 47.6458, + "step": 157550 + }, + { + "epoch": 0.636562337132399, + "grad_norm": 1260.1043701171875, + "learning_rate": 1.4471148725768337e-05, + "loss": 71.5388, + "step": 157560 + }, + { + "epoch": 0.6366027383977666, + "grad_norm": 597.6293334960938, + "learning_rate": 1.4468465076405145e-05, + "loss": 107.1628, + "step": 157570 + }, + { + "epoch": 0.6366431396631342, + "grad_norm": 995.9818725585938, + "learning_rate": 1.4465781534882068e-05, + "loss": 64.815, + "step": 157580 + }, + { + "epoch": 0.6366835409285019, + "grad_norm": 433.11859130859375, + "learning_rate": 1.4463098101251435e-05, + "loss": 65.349, + "step": 157590 + }, + { + "epoch": 0.6367239421938695, + "grad_norm": 834.4934692382812, + "learning_rate": 1.4460414775565555e-05, + "loss": 103.0415, + "step": 157600 + }, + { + "epoch": 0.6367643434592372, + "grad_norm": 543.08740234375, + "learning_rate": 1.4457731557876741e-05, + "loss": 80.7173, + "step": 157610 + }, + { + "epoch": 0.6368047447246048, + "grad_norm": 522.2800903320312, + "learning_rate": 1.4455048448237307e-05, + "loss": 80.4036, + "step": 157620 + }, + { + "epoch": 0.6368451459899724, + "grad_norm": 535.4306640625, + "learning_rate": 1.4452365446699549e-05, + "loss": 74.294, + "step": 157630 + }, + { + "epoch": 0.6368855472553401, + "grad_norm": 729.0249633789062, + "learning_rate": 1.444968255331579e-05, + "loss": 75.8315, + "step": 157640 + }, + { + "epoch": 0.6369259485207077, + "grad_norm": 1265.9583740234375, + "learning_rate": 1.444699976813832e-05, + "loss": 77.9237, + "step": 157650 + }, + { + "epoch": 0.6369663497860752, + "grad_norm": 213.55645751953125, + "learning_rate": 1.4444317091219455e-05, + "loss": 53.6138, + "step": 157660 + }, + { + "epoch": 0.6370067510514429, + "grad_norm": 493.53033447265625, + "learning_rate": 1.4441634522611492e-05, + "loss": 53.3028, + "step": 157670 + }, + { + "epoch": 0.6370471523168105, + "grad_norm": 758.3211669921875, + "learning_rate": 1.4438952062366718e-05, + "loss": 60.6129, + "step": 157680 + }, + { + "epoch": 0.6370875535821782, + "grad_norm": 854.4917602539062, + "learning_rate": 1.4436269710537443e-05, + "loss": 69.8444, + "step": 157690 + }, + { + "epoch": 0.6371279548475458, + "grad_norm": 1254.205810546875, + "learning_rate": 1.4433587467175951e-05, + "loss": 52.3552, + "step": 157700 + }, + { + "epoch": 0.6371683561129134, + "grad_norm": 566.9596557617188, + "learning_rate": 1.4430905332334532e-05, + "loss": 87.8356, + "step": 157710 + }, + { + "epoch": 0.6372087573782811, + "grad_norm": 792.7800903320312, + "learning_rate": 1.4428223306065487e-05, + "loss": 92.7207, + "step": 157720 + }, + { + "epoch": 0.6372491586436487, + "grad_norm": 464.4983825683594, + "learning_rate": 1.442554138842109e-05, + "loss": 103.9855, + "step": 157730 + }, + { + "epoch": 0.6372895599090164, + "grad_norm": 750.5919189453125, + "learning_rate": 1.4422859579453643e-05, + "loss": 52.4427, + "step": 157740 + }, + { + "epoch": 0.637329961174384, + "grad_norm": 368.09222412109375, + "learning_rate": 1.4420177879215419e-05, + "loss": 56.9538, + "step": 157750 + }, + { + "epoch": 0.6373703624397516, + "grad_norm": 628.1519165039062, + "learning_rate": 1.4417496287758694e-05, + "loss": 77.0661, + "step": 157760 + }, + { + "epoch": 0.6374107637051193, + "grad_norm": 742.4114379882812, + "learning_rate": 1.441481480513576e-05, + "loss": 73.5435, + "step": 157770 + }, + { + "epoch": 0.6374511649704869, + "grad_norm": 557.7789916992188, + "learning_rate": 1.4412133431398885e-05, + "loss": 87.7744, + "step": 157780 + }, + { + "epoch": 0.6374915662358545, + "grad_norm": 951.8084716796875, + "learning_rate": 1.4409452166600337e-05, + "loss": 58.197, + "step": 157790 + }, + { + "epoch": 0.6375319675012221, + "grad_norm": 472.5886535644531, + "learning_rate": 1.4406771010792406e-05, + "loss": 59.1636, + "step": 157800 + }, + { + "epoch": 0.6375723687665897, + "grad_norm": 537.2726440429688, + "learning_rate": 1.4404089964027345e-05, + "loss": 56.2926, + "step": 157810 + }, + { + "epoch": 0.6376127700319574, + "grad_norm": 329.13568115234375, + "learning_rate": 1.440140902635744e-05, + "loss": 66.5217, + "step": 157820 + }, + { + "epoch": 0.637653171297325, + "grad_norm": 468.8335266113281, + "learning_rate": 1.4398728197834948e-05, + "loss": 59.8951, + "step": 157830 + }, + { + "epoch": 0.6376935725626927, + "grad_norm": 1330.7689208984375, + "learning_rate": 1.4396047478512132e-05, + "loss": 79.2235, + "step": 157840 + }, + { + "epoch": 0.6377339738280603, + "grad_norm": 539.327392578125, + "learning_rate": 1.4393366868441251e-05, + "loss": 54.6758, + "step": 157850 + }, + { + "epoch": 0.6377743750934279, + "grad_norm": 638.288330078125, + "learning_rate": 1.4390686367674577e-05, + "loss": 49.5907, + "step": 157860 + }, + { + "epoch": 0.6378147763587956, + "grad_norm": 694.1898193359375, + "learning_rate": 1.4388005976264356e-05, + "loss": 104.5221, + "step": 157870 + }, + { + "epoch": 0.6378551776241632, + "grad_norm": 494.9458923339844, + "learning_rate": 1.4385325694262848e-05, + "loss": 77.436, + "step": 157880 + }, + { + "epoch": 0.6378955788895309, + "grad_norm": 872.5474853515625, + "learning_rate": 1.4382645521722302e-05, + "loss": 70.7464, + "step": 157890 + }, + { + "epoch": 0.6379359801548985, + "grad_norm": 784.3088989257812, + "learning_rate": 1.4379965458694982e-05, + "loss": 71.2127, + "step": 157900 + }, + { + "epoch": 0.6379763814202661, + "grad_norm": 1001.6019287109375, + "learning_rate": 1.4377285505233128e-05, + "loss": 80.7699, + "step": 157910 + }, + { + "epoch": 0.6380167826856337, + "grad_norm": 667.6175537109375, + "learning_rate": 1.4374605661388977e-05, + "loss": 87.338, + "step": 157920 + }, + { + "epoch": 0.6380571839510013, + "grad_norm": 967.3297729492188, + "learning_rate": 1.4371925927214796e-05, + "loss": 63.7068, + "step": 157930 + }, + { + "epoch": 0.638097585216369, + "grad_norm": 995.3309936523438, + "learning_rate": 1.4369246302762813e-05, + "loss": 83.0252, + "step": 157940 + }, + { + "epoch": 0.6381379864817366, + "grad_norm": 282.02520751953125, + "learning_rate": 1.4366566788085269e-05, + "loss": 86.6882, + "step": 157950 + }, + { + "epoch": 0.6381783877471042, + "grad_norm": 990.4788208007812, + "learning_rate": 1.4363887383234413e-05, + "loss": 81.6363, + "step": 157960 + }, + { + "epoch": 0.6382187890124719, + "grad_norm": 524.1925048828125, + "learning_rate": 1.4361208088262464e-05, + "loss": 42.343, + "step": 157970 + }, + { + "epoch": 0.6382591902778395, + "grad_norm": 1620.3603515625, + "learning_rate": 1.4358528903221675e-05, + "loss": 93.9676, + "step": 157980 + }, + { + "epoch": 0.6382995915432071, + "grad_norm": 806.0289916992188, + "learning_rate": 1.435584982816427e-05, + "loss": 66.1013, + "step": 157990 + }, + { + "epoch": 0.6383399928085748, + "grad_norm": 560.2540893554688, + "learning_rate": 1.435317086314247e-05, + "loss": 64.6348, + "step": 158000 + }, + { + "epoch": 0.6383803940739424, + "grad_norm": 739.876220703125, + "learning_rate": 1.4350492008208519e-05, + "loss": 75.0499, + "step": 158010 + }, + { + "epoch": 0.6384207953393101, + "grad_norm": 443.3117980957031, + "learning_rate": 1.4347813263414633e-05, + "loss": 113.1924, + "step": 158020 + }, + { + "epoch": 0.6384611966046777, + "grad_norm": 792.05224609375, + "learning_rate": 1.4345134628813036e-05, + "loss": 96.0, + "step": 158030 + }, + { + "epoch": 0.6385015978700452, + "grad_norm": 385.81341552734375, + "learning_rate": 1.4342456104455956e-05, + "loss": 54.8296, + "step": 158040 + }, + { + "epoch": 0.6385419991354129, + "grad_norm": 960.816162109375, + "learning_rate": 1.4339777690395596e-05, + "loss": 64.6085, + "step": 158050 + }, + { + "epoch": 0.6385824004007805, + "grad_norm": 1438.65185546875, + "learning_rate": 1.4337099386684192e-05, + "loss": 83.7957, + "step": 158060 + }, + { + "epoch": 0.6386228016661482, + "grad_norm": 977.3687744140625, + "learning_rate": 1.4334421193373951e-05, + "loss": 88.1819, + "step": 158070 + }, + { + "epoch": 0.6386632029315158, + "grad_norm": 509.67364501953125, + "learning_rate": 1.4331743110517077e-05, + "loss": 55.1937, + "step": 158080 + }, + { + "epoch": 0.6387036041968834, + "grad_norm": 1026.377685546875, + "learning_rate": 1.4329065138165801e-05, + "loss": 60.7157, + "step": 158090 + }, + { + "epoch": 0.6387440054622511, + "grad_norm": 539.6065063476562, + "learning_rate": 1.4326387276372317e-05, + "loss": 64.1637, + "step": 158100 + }, + { + "epoch": 0.6387844067276187, + "grad_norm": 762.8085327148438, + "learning_rate": 1.4323709525188831e-05, + "loss": 54.295, + "step": 158110 + }, + { + "epoch": 0.6388248079929864, + "grad_norm": 556.6881713867188, + "learning_rate": 1.4321031884667553e-05, + "loss": 85.0359, + "step": 158120 + }, + { + "epoch": 0.638865209258354, + "grad_norm": 925.5823364257812, + "learning_rate": 1.4318354354860675e-05, + "loss": 82.1241, + "step": 158130 + }, + { + "epoch": 0.6389056105237216, + "grad_norm": 730.1845703125, + "learning_rate": 1.4315676935820411e-05, + "loss": 73.2049, + "step": 158140 + }, + { + "epoch": 0.6389460117890893, + "grad_norm": 336.8897705078125, + "learning_rate": 1.4312999627598954e-05, + "loss": 66.6011, + "step": 158150 + }, + { + "epoch": 0.6389864130544569, + "grad_norm": 488.36383056640625, + "learning_rate": 1.4310322430248486e-05, + "loss": 97.4745, + "step": 158160 + }, + { + "epoch": 0.6390268143198244, + "grad_norm": 880.4995727539062, + "learning_rate": 1.4307645343821222e-05, + "loss": 75.4151, + "step": 158170 + }, + { + "epoch": 0.6390672155851921, + "grad_norm": 637.3079223632812, + "learning_rate": 1.4304968368369337e-05, + "loss": 63.243, + "step": 158180 + }, + { + "epoch": 0.6391076168505597, + "grad_norm": 1152.995361328125, + "learning_rate": 1.430229150394503e-05, + "loss": 97.8883, + "step": 158190 + }, + { + "epoch": 0.6391480181159274, + "grad_norm": 363.4104919433594, + "learning_rate": 1.4299614750600478e-05, + "loss": 61.9424, + "step": 158200 + }, + { + "epoch": 0.639188419381295, + "grad_norm": 1398.2176513671875, + "learning_rate": 1.4296938108387875e-05, + "loss": 104.0981, + "step": 158210 + }, + { + "epoch": 0.6392288206466626, + "grad_norm": 545.096923828125, + "learning_rate": 1.4294261577359403e-05, + "loss": 60.2509, + "step": 158220 + }, + { + "epoch": 0.6392692219120303, + "grad_norm": 367.0444641113281, + "learning_rate": 1.4291585157567237e-05, + "loss": 75.348, + "step": 158230 + }, + { + "epoch": 0.6393096231773979, + "grad_norm": 376.51318359375, + "learning_rate": 1.428890884906355e-05, + "loss": 41.767, + "step": 158240 + }, + { + "epoch": 0.6393500244427656, + "grad_norm": 980.0025024414062, + "learning_rate": 1.4286232651900532e-05, + "loss": 52.2508, + "step": 158250 + }, + { + "epoch": 0.6393904257081332, + "grad_norm": 1111.698486328125, + "learning_rate": 1.4283556566130343e-05, + "loss": 50.1424, + "step": 158260 + }, + { + "epoch": 0.6394308269735008, + "grad_norm": 970.2080688476562, + "learning_rate": 1.4280880591805169e-05, + "loss": 50.4989, + "step": 158270 + }, + { + "epoch": 0.6394712282388685, + "grad_norm": 788.9093627929688, + "learning_rate": 1.4278204728977174e-05, + "loss": 71.0306, + "step": 158280 + }, + { + "epoch": 0.6395116295042361, + "grad_norm": 695.6580810546875, + "learning_rate": 1.4275528977698519e-05, + "loss": 57.9636, + "step": 158290 + }, + { + "epoch": 0.6395520307696037, + "grad_norm": 728.3812255859375, + "learning_rate": 1.427285333802138e-05, + "loss": 39.1097, + "step": 158300 + }, + { + "epoch": 0.6395924320349713, + "grad_norm": 1789.5968017578125, + "learning_rate": 1.4270177809997909e-05, + "loss": 96.6259, + "step": 158310 + }, + { + "epoch": 0.6396328333003389, + "grad_norm": 358.1278381347656, + "learning_rate": 1.4267502393680266e-05, + "loss": 95.6053, + "step": 158320 + }, + { + "epoch": 0.6396732345657066, + "grad_norm": 643.32568359375, + "learning_rate": 1.4264827089120622e-05, + "loss": 79.6485, + "step": 158330 + }, + { + "epoch": 0.6397136358310742, + "grad_norm": 746.805419921875, + "learning_rate": 1.4262151896371119e-05, + "loss": 93.9912, + "step": 158340 + }, + { + "epoch": 0.6397540370964419, + "grad_norm": 759.8524780273438, + "learning_rate": 1.4259476815483928e-05, + "loss": 88.8695, + "step": 158350 + }, + { + "epoch": 0.6397944383618095, + "grad_norm": 625.4047241210938, + "learning_rate": 1.4256801846511192e-05, + "loss": 58.9117, + "step": 158360 + }, + { + "epoch": 0.6398348396271771, + "grad_norm": 837.1571044921875, + "learning_rate": 1.4254126989505057e-05, + "loss": 72.8122, + "step": 158370 + }, + { + "epoch": 0.6398752408925448, + "grad_norm": 767.5053100585938, + "learning_rate": 1.4251452244517676e-05, + "loss": 54.3256, + "step": 158380 + }, + { + "epoch": 0.6399156421579124, + "grad_norm": 830.0521240234375, + "learning_rate": 1.4248777611601195e-05, + "loss": 67.525, + "step": 158390 + }, + { + "epoch": 0.63995604342328, + "grad_norm": 672.0067138671875, + "learning_rate": 1.4246103090807748e-05, + "loss": 53.3813, + "step": 158400 + }, + { + "epoch": 0.6399964446886477, + "grad_norm": 1009.8511962890625, + "learning_rate": 1.424342868218949e-05, + "loss": 99.2757, + "step": 158410 + }, + { + "epoch": 0.6400368459540153, + "grad_norm": 1025.1011962890625, + "learning_rate": 1.4240754385798546e-05, + "loss": 81.4731, + "step": 158420 + }, + { + "epoch": 0.6400772472193829, + "grad_norm": 752.557861328125, + "learning_rate": 1.4238080201687068e-05, + "loss": 68.6028, + "step": 158430 + }, + { + "epoch": 0.6401176484847505, + "grad_norm": 630.4906005859375, + "learning_rate": 1.4235406129907184e-05, + "loss": 96.0891, + "step": 158440 + }, + { + "epoch": 0.6401580497501181, + "grad_norm": 607.9795532226562, + "learning_rate": 1.4232732170511021e-05, + "loss": 96.6694, + "step": 158450 + }, + { + "epoch": 0.6401984510154858, + "grad_norm": 590.3291625976562, + "learning_rate": 1.4230058323550715e-05, + "loss": 50.2052, + "step": 158460 + }, + { + "epoch": 0.6402388522808534, + "grad_norm": 661.8011474609375, + "learning_rate": 1.4227384589078392e-05, + "loss": 95.1437, + "step": 158470 + }, + { + "epoch": 0.6402792535462211, + "grad_norm": 386.04193115234375, + "learning_rate": 1.4224710967146175e-05, + "loss": 75.0451, + "step": 158480 + }, + { + "epoch": 0.6403196548115887, + "grad_norm": 1101.1568603515625, + "learning_rate": 1.4222037457806199e-05, + "loss": 93.7373, + "step": 158490 + }, + { + "epoch": 0.6403600560769563, + "grad_norm": 557.176513671875, + "learning_rate": 1.4219364061110565e-05, + "loss": 41.8394, + "step": 158500 + }, + { + "epoch": 0.640400457342324, + "grad_norm": 710.1544189453125, + "learning_rate": 1.4216690777111418e-05, + "loss": 52.4866, + "step": 158510 + }, + { + "epoch": 0.6404408586076916, + "grad_norm": 1096.615966796875, + "learning_rate": 1.4214017605860857e-05, + "loss": 95.3147, + "step": 158520 + }, + { + "epoch": 0.6404812598730593, + "grad_norm": 1259.9197998046875, + "learning_rate": 1.4211344547411001e-05, + "loss": 91.3548, + "step": 158530 + }, + { + "epoch": 0.6405216611384269, + "grad_norm": 812.0034790039062, + "learning_rate": 1.4208671601813962e-05, + "loss": 78.731, + "step": 158540 + }, + { + "epoch": 0.6405620624037945, + "grad_norm": 566.9772338867188, + "learning_rate": 1.4205998769121854e-05, + "loss": 66.6063, + "step": 158550 + }, + { + "epoch": 0.6406024636691621, + "grad_norm": 514.01513671875, + "learning_rate": 1.420332604938678e-05, + "loss": 59.2101, + "step": 158560 + }, + { + "epoch": 0.6406428649345297, + "grad_norm": 709.3492431640625, + "learning_rate": 1.4200653442660857e-05, + "loss": 37.1961, + "step": 158570 + }, + { + "epoch": 0.6406832661998973, + "grad_norm": 1068.6197509765625, + "learning_rate": 1.4197980948996169e-05, + "loss": 86.25, + "step": 158580 + }, + { + "epoch": 0.640723667465265, + "grad_norm": 497.070068359375, + "learning_rate": 1.4195308568444837e-05, + "loss": 51.7838, + "step": 158590 + }, + { + "epoch": 0.6407640687306326, + "grad_norm": 161.16307067871094, + "learning_rate": 1.4192636301058952e-05, + "loss": 57.3548, + "step": 158600 + }, + { + "epoch": 0.6408044699960003, + "grad_norm": 563.1381225585938, + "learning_rate": 1.4189964146890607e-05, + "loss": 57.2191, + "step": 158610 + }, + { + "epoch": 0.6408448712613679, + "grad_norm": 1484.111572265625, + "learning_rate": 1.418729210599191e-05, + "loss": 72.0189, + "step": 158620 + }, + { + "epoch": 0.6408852725267355, + "grad_norm": 761.8253784179688, + "learning_rate": 1.4184620178414945e-05, + "loss": 78.0199, + "step": 158630 + }, + { + "epoch": 0.6409256737921032, + "grad_norm": 451.72955322265625, + "learning_rate": 1.4181948364211797e-05, + "loss": 79.238, + "step": 158640 + }, + { + "epoch": 0.6409660750574708, + "grad_norm": 1005.7196655273438, + "learning_rate": 1.4179276663434569e-05, + "loss": 73.6376, + "step": 158650 + }, + { + "epoch": 0.6410064763228385, + "grad_norm": 519.866455078125, + "learning_rate": 1.4176605076135327e-05, + "loss": 41.4464, + "step": 158660 + }, + { + "epoch": 0.6410468775882061, + "grad_norm": 721.568359375, + "learning_rate": 1.4173933602366181e-05, + "loss": 97.6638, + "step": 158670 + }, + { + "epoch": 0.6410872788535736, + "grad_norm": 1030.747314453125, + "learning_rate": 1.4171262242179195e-05, + "loss": 75.8771, + "step": 158680 + }, + { + "epoch": 0.6411276801189413, + "grad_norm": 284.35546875, + "learning_rate": 1.4168590995626446e-05, + "loss": 70.5334, + "step": 158690 + }, + { + "epoch": 0.6411680813843089, + "grad_norm": 1036.1617431640625, + "learning_rate": 1.4165919862760023e-05, + "loss": 79.4834, + "step": 158700 + }, + { + "epoch": 0.6412084826496766, + "grad_norm": 358.1935119628906, + "learning_rate": 1.4163248843631996e-05, + "loss": 80.3425, + "step": 158710 + }, + { + "epoch": 0.6412488839150442, + "grad_norm": 401.36712646484375, + "learning_rate": 1.4160577938294437e-05, + "loss": 74.3121, + "step": 158720 + }, + { + "epoch": 0.6412892851804118, + "grad_norm": 654.5018920898438, + "learning_rate": 1.4157907146799422e-05, + "loss": 43.8232, + "step": 158730 + }, + { + "epoch": 0.6413296864457795, + "grad_norm": 589.8673706054688, + "learning_rate": 1.4155236469199003e-05, + "loss": 50.1828, + "step": 158740 + }, + { + "epoch": 0.6413700877111471, + "grad_norm": 500.234130859375, + "learning_rate": 1.4152565905545268e-05, + "loss": 71.7269, + "step": 158750 + }, + { + "epoch": 0.6414104889765148, + "grad_norm": 1582.934326171875, + "learning_rate": 1.4149895455890272e-05, + "loss": 71.8547, + "step": 158760 + }, + { + "epoch": 0.6414508902418824, + "grad_norm": 1197.3184814453125, + "learning_rate": 1.4147225120286065e-05, + "loss": 70.1337, + "step": 158770 + }, + { + "epoch": 0.64149129150725, + "grad_norm": 725.0204467773438, + "learning_rate": 1.4144554898784728e-05, + "loss": 63.985, + "step": 158780 + }, + { + "epoch": 0.6415316927726177, + "grad_norm": 550.6195678710938, + "learning_rate": 1.4141884791438303e-05, + "loss": 99.8017, + "step": 158790 + }, + { + "epoch": 0.6415720940379853, + "grad_norm": 1614.4764404296875, + "learning_rate": 1.4139214798298854e-05, + "loss": 69.9537, + "step": 158800 + }, + { + "epoch": 0.6416124953033528, + "grad_norm": 1512.65087890625, + "learning_rate": 1.4136544919418431e-05, + "loss": 80.8431, + "step": 158810 + }, + { + "epoch": 0.6416528965687205, + "grad_norm": 885.486572265625, + "learning_rate": 1.4133875154849075e-05, + "loss": 88.3057, + "step": 158820 + }, + { + "epoch": 0.6416932978340881, + "grad_norm": 369.710693359375, + "learning_rate": 1.4131205504642851e-05, + "loss": 66.5256, + "step": 158830 + }, + { + "epoch": 0.6417336990994558, + "grad_norm": 1058.43212890625, + "learning_rate": 1.4128535968851797e-05, + "loss": 75.593, + "step": 158840 + }, + { + "epoch": 0.6417741003648234, + "grad_norm": 455.640380859375, + "learning_rate": 1.4125866547527948e-05, + "loss": 66.7149, + "step": 158850 + }, + { + "epoch": 0.641814501630191, + "grad_norm": 944.70263671875, + "learning_rate": 1.4123197240723366e-05, + "loss": 76.7905, + "step": 158860 + }, + { + "epoch": 0.6418549028955587, + "grad_norm": 277.9966735839844, + "learning_rate": 1.4120528048490073e-05, + "loss": 77.0868, + "step": 158870 + }, + { + "epoch": 0.6418953041609263, + "grad_norm": 653.7036743164062, + "learning_rate": 1.4117858970880112e-05, + "loss": 69.3823, + "step": 158880 + }, + { + "epoch": 0.641935705426294, + "grad_norm": 994.02294921875, + "learning_rate": 1.4115190007945522e-05, + "loss": 70.5876, + "step": 158890 + }, + { + "epoch": 0.6419761066916616, + "grad_norm": 590.0132446289062, + "learning_rate": 1.411252115973833e-05, + "loss": 64.7001, + "step": 158900 + }, + { + "epoch": 0.6420165079570292, + "grad_norm": 428.26568603515625, + "learning_rate": 1.4109852426310573e-05, + "loss": 85.6802, + "step": 158910 + }, + { + "epoch": 0.6420569092223969, + "grad_norm": 696.8196411132812, + "learning_rate": 1.4107183807714275e-05, + "loss": 55.9113, + "step": 158920 + }, + { + "epoch": 0.6420973104877645, + "grad_norm": 305.9283752441406, + "learning_rate": 1.4104515304001454e-05, + "loss": 70.5088, + "step": 158930 + }, + { + "epoch": 0.6421377117531321, + "grad_norm": 1005.2109985351562, + "learning_rate": 1.4101846915224151e-05, + "loss": 88.4495, + "step": 158940 + }, + { + "epoch": 0.6421781130184997, + "grad_norm": 892.3837890625, + "learning_rate": 1.4099178641434372e-05, + "loss": 54.648, + "step": 158950 + }, + { + "epoch": 0.6422185142838673, + "grad_norm": 373.943603515625, + "learning_rate": 1.409651048268415e-05, + "loss": 62.0631, + "step": 158960 + }, + { + "epoch": 0.642258915549235, + "grad_norm": 503.530517578125, + "learning_rate": 1.4093842439025496e-05, + "loss": 60.1397, + "step": 158970 + }, + { + "epoch": 0.6422993168146026, + "grad_norm": 201.46559143066406, + "learning_rate": 1.4091174510510422e-05, + "loss": 41.0362, + "step": 158980 + }, + { + "epoch": 0.6423397180799703, + "grad_norm": 547.8228149414062, + "learning_rate": 1.4088506697190944e-05, + "loss": 75.5028, + "step": 158990 + }, + { + "epoch": 0.6423801193453379, + "grad_norm": 1159.9688720703125, + "learning_rate": 1.4085838999119075e-05, + "loss": 80.8678, + "step": 159000 + }, + { + "epoch": 0.6424205206107055, + "grad_norm": 718.0634765625, + "learning_rate": 1.4083171416346807e-05, + "loss": 58.9792, + "step": 159010 + }, + { + "epoch": 0.6424609218760732, + "grad_norm": 949.416259765625, + "learning_rate": 1.4080503948926167e-05, + "loss": 58.9786, + "step": 159020 + }, + { + "epoch": 0.6425013231414408, + "grad_norm": 739.4859619140625, + "learning_rate": 1.4077836596909145e-05, + "loss": 71.6076, + "step": 159030 + }, + { + "epoch": 0.6425417244068085, + "grad_norm": 468.5218811035156, + "learning_rate": 1.4075169360347754e-05, + "loss": 80.0134, + "step": 159040 + }, + { + "epoch": 0.6425821256721761, + "grad_norm": 277.00469970703125, + "learning_rate": 1.4072502239293985e-05, + "loss": 67.1047, + "step": 159050 + }, + { + "epoch": 0.6426225269375437, + "grad_norm": 1034.90771484375, + "learning_rate": 1.4069835233799838e-05, + "loss": 70.9111, + "step": 159060 + }, + { + "epoch": 0.6426629282029113, + "grad_norm": 745.0709838867188, + "learning_rate": 1.4067168343917305e-05, + "loss": 62.7054, + "step": 159070 + }, + { + "epoch": 0.6427033294682789, + "grad_norm": 955.1119995117188, + "learning_rate": 1.4064501569698382e-05, + "loss": 84.2557, + "step": 159080 + }, + { + "epoch": 0.6427437307336465, + "grad_norm": 724.2291870117188, + "learning_rate": 1.4061834911195047e-05, + "loss": 55.9836, + "step": 159090 + }, + { + "epoch": 0.6427841319990142, + "grad_norm": 1262.0684814453125, + "learning_rate": 1.4059168368459307e-05, + "loss": 66.8398, + "step": 159100 + }, + { + "epoch": 0.6428245332643818, + "grad_norm": 1394.91796875, + "learning_rate": 1.4056501941543128e-05, + "loss": 91.7338, + "step": 159110 + }, + { + "epoch": 0.6428649345297495, + "grad_norm": 1073.1806640625, + "learning_rate": 1.4053835630498514e-05, + "loss": 86.0652, + "step": 159120 + }, + { + "epoch": 0.6429053357951171, + "grad_norm": 618.6692504882812, + "learning_rate": 1.4051169435377436e-05, + "loss": 47.3674, + "step": 159130 + }, + { + "epoch": 0.6429457370604847, + "grad_norm": 892.6998291015625, + "learning_rate": 1.4048503356231867e-05, + "loss": 94.1901, + "step": 159140 + }, + { + "epoch": 0.6429861383258524, + "grad_norm": 915.1314086914062, + "learning_rate": 1.4045837393113793e-05, + "loss": 71.0617, + "step": 159150 + }, + { + "epoch": 0.64302653959122, + "grad_norm": 855.2825317382812, + "learning_rate": 1.404317154607518e-05, + "loss": 73.2698, + "step": 159160 + }, + { + "epoch": 0.6430669408565877, + "grad_norm": 365.9866027832031, + "learning_rate": 1.4040505815168004e-05, + "loss": 55.2205, + "step": 159170 + }, + { + "epoch": 0.6431073421219553, + "grad_norm": 1045.7646484375, + "learning_rate": 1.4037840200444244e-05, + "loss": 92.7473, + "step": 159180 + }, + { + "epoch": 0.643147743387323, + "grad_norm": 557.5571899414062, + "learning_rate": 1.4035174701955846e-05, + "loss": 81.1893, + "step": 159190 + }, + { + "epoch": 0.6431881446526905, + "grad_norm": 1178.652099609375, + "learning_rate": 1.4032509319754796e-05, + "loss": 64.9968, + "step": 159200 + }, + { + "epoch": 0.6432285459180581, + "grad_norm": 450.103759765625, + "learning_rate": 1.4029844053893052e-05, + "loss": 61.4268, + "step": 159210 + }, + { + "epoch": 0.6432689471834258, + "grad_norm": 1034.968994140625, + "learning_rate": 1.402717890442256e-05, + "loss": 73.3597, + "step": 159220 + }, + { + "epoch": 0.6433093484487934, + "grad_norm": 1175.1871337890625, + "learning_rate": 1.4024513871395304e-05, + "loss": 106.482, + "step": 159230 + }, + { + "epoch": 0.643349749714161, + "grad_norm": 236.5809326171875, + "learning_rate": 1.4021848954863218e-05, + "loss": 83.7382, + "step": 159240 + }, + { + "epoch": 0.6433901509795287, + "grad_norm": 417.0306396484375, + "learning_rate": 1.401918415487827e-05, + "loss": 63.0626, + "step": 159250 + }, + { + "epoch": 0.6434305522448963, + "grad_norm": 2200.512451171875, + "learning_rate": 1.4016519471492407e-05, + "loss": 110.9155, + "step": 159260 + }, + { + "epoch": 0.643470953510264, + "grad_norm": 1064.7779541015625, + "learning_rate": 1.4013854904757564e-05, + "loss": 74.4792, + "step": 159270 + }, + { + "epoch": 0.6435113547756316, + "grad_norm": 730.1138916015625, + "learning_rate": 1.4011190454725716e-05, + "loss": 46.0632, + "step": 159280 + }, + { + "epoch": 0.6435517560409992, + "grad_norm": 596.7861328125, + "learning_rate": 1.400852612144879e-05, + "loss": 54.2323, + "step": 159290 + }, + { + "epoch": 0.6435921573063669, + "grad_norm": 288.23785400390625, + "learning_rate": 1.4005861904978724e-05, + "loss": 49.285, + "step": 159300 + }, + { + "epoch": 0.6436325585717345, + "grad_norm": 724.33447265625, + "learning_rate": 1.4003197805367475e-05, + "loss": 69.8375, + "step": 159310 + }, + { + "epoch": 0.643672959837102, + "grad_norm": 830.7191772460938, + "learning_rate": 1.4000533822666968e-05, + "loss": 56.6147, + "step": 159320 + }, + { + "epoch": 0.6437133611024697, + "grad_norm": 631.4449462890625, + "learning_rate": 1.3997869956929147e-05, + "loss": 113.8898, + "step": 159330 + }, + { + "epoch": 0.6437537623678373, + "grad_norm": 657.1631469726562, + "learning_rate": 1.3995206208205942e-05, + "loss": 97.2598, + "step": 159340 + }, + { + "epoch": 0.643794163633205, + "grad_norm": 699.18896484375, + "learning_rate": 1.3992542576549273e-05, + "loss": 110.1179, + "step": 159350 + }, + { + "epoch": 0.6438345648985726, + "grad_norm": 781.56591796875, + "learning_rate": 1.398987906201109e-05, + "loss": 66.7821, + "step": 159360 + }, + { + "epoch": 0.6438749661639402, + "grad_norm": 1169.1307373046875, + "learning_rate": 1.3987215664643308e-05, + "loss": 117.1995, + "step": 159370 + }, + { + "epoch": 0.6439153674293079, + "grad_norm": 573.1962280273438, + "learning_rate": 1.3984552384497842e-05, + "loss": 48.1207, + "step": 159380 + }, + { + "epoch": 0.6439557686946755, + "grad_norm": 243.49693298339844, + "learning_rate": 1.3981889221626637e-05, + "loss": 71.7988, + "step": 159390 + }, + { + "epoch": 0.6439961699600432, + "grad_norm": 757.9393920898438, + "learning_rate": 1.3979226176081593e-05, + "loss": 69.9993, + "step": 159400 + }, + { + "epoch": 0.6440365712254108, + "grad_norm": 705.0243530273438, + "learning_rate": 1.397656324791464e-05, + "loss": 67.0936, + "step": 159410 + }, + { + "epoch": 0.6440769724907784, + "grad_norm": 339.592041015625, + "learning_rate": 1.3973900437177684e-05, + "loss": 74.7534, + "step": 159420 + }, + { + "epoch": 0.6441173737561461, + "grad_norm": 625.2017822265625, + "learning_rate": 1.3971237743922636e-05, + "loss": 90.917, + "step": 159430 + }, + { + "epoch": 0.6441577750215137, + "grad_norm": 869.3953857421875, + "learning_rate": 1.396857516820142e-05, + "loss": 51.6075, + "step": 159440 + }, + { + "epoch": 0.6441981762868813, + "grad_norm": 439.0805358886719, + "learning_rate": 1.3965912710065937e-05, + "loss": 34.612, + "step": 159450 + }, + { + "epoch": 0.6442385775522489, + "grad_norm": 876.8145751953125, + "learning_rate": 1.3963250369568082e-05, + "loss": 79.3626, + "step": 159460 + }, + { + "epoch": 0.6442789788176165, + "grad_norm": 1104.06494140625, + "learning_rate": 1.396058814675978e-05, + "loss": 75.7225, + "step": 159470 + }, + { + "epoch": 0.6443193800829842, + "grad_norm": 839.7938842773438, + "learning_rate": 1.3957926041692913e-05, + "loss": 90.4139, + "step": 159480 + }, + { + "epoch": 0.6443597813483518, + "grad_norm": 438.5372314453125, + "learning_rate": 1.3955264054419395e-05, + "loss": 64.2745, + "step": 159490 + }, + { + "epoch": 0.6444001826137195, + "grad_norm": 334.8061218261719, + "learning_rate": 1.3952602184991113e-05, + "loss": 65.2613, + "step": 159500 + }, + { + "epoch": 0.6444405838790871, + "grad_norm": 716.2437744140625, + "learning_rate": 1.3949940433459962e-05, + "loss": 77.4361, + "step": 159510 + }, + { + "epoch": 0.6444809851444547, + "grad_norm": 339.7471923828125, + "learning_rate": 1.3947278799877846e-05, + "loss": 59.2156, + "step": 159520 + }, + { + "epoch": 0.6445213864098224, + "grad_norm": 619.9138793945312, + "learning_rate": 1.3944617284296643e-05, + "loss": 72.8651, + "step": 159530 + }, + { + "epoch": 0.64456178767519, + "grad_norm": 506.0522155761719, + "learning_rate": 1.3941955886768235e-05, + "loss": 59.8655, + "step": 159540 + }, + { + "epoch": 0.6446021889405577, + "grad_norm": 385.418701171875, + "learning_rate": 1.3939294607344525e-05, + "loss": 61.4483, + "step": 159550 + }, + { + "epoch": 0.6446425902059253, + "grad_norm": 447.6346130371094, + "learning_rate": 1.3936633446077377e-05, + "loss": 56.3727, + "step": 159560 + }, + { + "epoch": 0.6446829914712929, + "grad_norm": 513.8580932617188, + "learning_rate": 1.3933972403018692e-05, + "loss": 93.729, + "step": 159570 + }, + { + "epoch": 0.6447233927366605, + "grad_norm": 451.8069152832031, + "learning_rate": 1.3931311478220338e-05, + "loss": 66.2441, + "step": 159580 + }, + { + "epoch": 0.6447637940020281, + "grad_norm": 238.62367248535156, + "learning_rate": 1.3928650671734186e-05, + "loss": 42.5211, + "step": 159590 + }, + { + "epoch": 0.6448041952673957, + "grad_norm": 1339.922607421875, + "learning_rate": 1.3925989983612118e-05, + "loss": 106.7994, + "step": 159600 + }, + { + "epoch": 0.6448445965327634, + "grad_norm": 607.470703125, + "learning_rate": 1.3923329413906004e-05, + "loss": 65.2393, + "step": 159610 + }, + { + "epoch": 0.644884997798131, + "grad_norm": 352.2939758300781, + "learning_rate": 1.3920668962667701e-05, + "loss": 56.0999, + "step": 159620 + }, + { + "epoch": 0.6449253990634987, + "grad_norm": 607.4183349609375, + "learning_rate": 1.3918008629949097e-05, + "loss": 83.2437, + "step": 159630 + }, + { + "epoch": 0.6449658003288663, + "grad_norm": 511.9974365234375, + "learning_rate": 1.3915348415802036e-05, + "loss": 67.6543, + "step": 159640 + }, + { + "epoch": 0.6450062015942339, + "grad_norm": 916.005615234375, + "learning_rate": 1.3912688320278403e-05, + "loss": 68.8099, + "step": 159650 + }, + { + "epoch": 0.6450466028596016, + "grad_norm": 578.1353149414062, + "learning_rate": 1.391002834343004e-05, + "loss": 58.6745, + "step": 159660 + }, + { + "epoch": 0.6450870041249692, + "grad_norm": 855.14599609375, + "learning_rate": 1.3907368485308807e-05, + "loss": 67.3765, + "step": 159670 + }, + { + "epoch": 0.6451274053903369, + "grad_norm": 323.8087158203125, + "learning_rate": 1.3904708745966566e-05, + "loss": 63.1329, + "step": 159680 + }, + { + "epoch": 0.6451678066557045, + "grad_norm": 738.2545166015625, + "learning_rate": 1.3902049125455169e-05, + "loss": 55.4668, + "step": 159690 + }, + { + "epoch": 0.6452082079210721, + "grad_norm": 645.4552001953125, + "learning_rate": 1.3899389623826451e-05, + "loss": 74.3453, + "step": 159700 + }, + { + "epoch": 0.6452486091864397, + "grad_norm": 840.5313110351562, + "learning_rate": 1.3896730241132286e-05, + "loss": 80.3557, + "step": 159710 + }, + { + "epoch": 0.6452890104518073, + "grad_norm": 635.4869384765625, + "learning_rate": 1.3894070977424497e-05, + "loss": 84.5596, + "step": 159720 + }, + { + "epoch": 0.645329411717175, + "grad_norm": 1010.9671630859375, + "learning_rate": 1.3891411832754944e-05, + "loss": 96.3035, + "step": 159730 + }, + { + "epoch": 0.6453698129825426, + "grad_norm": 542.51318359375, + "learning_rate": 1.3888752807175463e-05, + "loss": 82.7186, + "step": 159740 + }, + { + "epoch": 0.6454102142479102, + "grad_norm": 728.20166015625, + "learning_rate": 1.3886093900737889e-05, + "loss": 55.0079, + "step": 159750 + }, + { + "epoch": 0.6454506155132779, + "grad_norm": 560.1746826171875, + "learning_rate": 1.3883435113494066e-05, + "loss": 85.1765, + "step": 159760 + }, + { + "epoch": 0.6454910167786455, + "grad_norm": 626.2679443359375, + "learning_rate": 1.3880776445495817e-05, + "loss": 55.381, + "step": 159770 + }, + { + "epoch": 0.6455314180440131, + "grad_norm": 653.9595947265625, + "learning_rate": 1.387811789679499e-05, + "loss": 94.8802, + "step": 159780 + }, + { + "epoch": 0.6455718193093808, + "grad_norm": 603.1830444335938, + "learning_rate": 1.3875459467443405e-05, + "loss": 65.2427, + "step": 159790 + }, + { + "epoch": 0.6456122205747484, + "grad_norm": 1275.2486572265625, + "learning_rate": 1.3872801157492881e-05, + "loss": 96.0798, + "step": 159800 + }, + { + "epoch": 0.6456526218401161, + "grad_norm": 654.7244873046875, + "learning_rate": 1.3870142966995265e-05, + "loss": 68.5686, + "step": 159810 + }, + { + "epoch": 0.6456930231054837, + "grad_norm": 1112.2113037109375, + "learning_rate": 1.3867484896002365e-05, + "loss": 88.4273, + "step": 159820 + }, + { + "epoch": 0.6457334243708513, + "grad_norm": 640.9278564453125, + "learning_rate": 1.3864826944566004e-05, + "loss": 56.9886, + "step": 159830 + }, + { + "epoch": 0.6457738256362189, + "grad_norm": 979.39404296875, + "learning_rate": 1.3862169112737996e-05, + "loss": 77.3206, + "step": 159840 + }, + { + "epoch": 0.6458142269015865, + "grad_norm": 1046.5887451171875, + "learning_rate": 1.3859511400570163e-05, + "loss": 56.2963, + "step": 159850 + }, + { + "epoch": 0.6458546281669542, + "grad_norm": 882.7001342773438, + "learning_rate": 1.3856853808114319e-05, + "loss": 74.7857, + "step": 159860 + }, + { + "epoch": 0.6458950294323218, + "grad_norm": 457.1357421875, + "learning_rate": 1.3854196335422275e-05, + "loss": 54.1933, + "step": 159870 + }, + { + "epoch": 0.6459354306976894, + "grad_norm": 438.4779052734375, + "learning_rate": 1.3851538982545827e-05, + "loss": 54.3638, + "step": 159880 + }, + { + "epoch": 0.6459758319630571, + "grad_norm": 759.9801025390625, + "learning_rate": 1.3848881749536802e-05, + "loss": 68.8277, + "step": 159890 + }, + { + "epoch": 0.6460162332284247, + "grad_norm": 494.8306884765625, + "learning_rate": 1.3846224636446994e-05, + "loss": 61.7859, + "step": 159900 + }, + { + "epoch": 0.6460566344937924, + "grad_norm": 1167.8675537109375, + "learning_rate": 1.3843567643328195e-05, + "loss": 96.4279, + "step": 159910 + }, + { + "epoch": 0.64609703575916, + "grad_norm": 742.918701171875, + "learning_rate": 1.3840910770232223e-05, + "loss": 148.8222, + "step": 159920 + }, + { + "epoch": 0.6461374370245276, + "grad_norm": 766.9825439453125, + "learning_rate": 1.3838254017210864e-05, + "loss": 58.7796, + "step": 159930 + }, + { + "epoch": 0.6461778382898953, + "grad_norm": 313.5904846191406, + "learning_rate": 1.3835597384315918e-05, + "loss": 49.7936, + "step": 159940 + }, + { + "epoch": 0.6462182395552629, + "grad_norm": 463.3041076660156, + "learning_rate": 1.3832940871599175e-05, + "loss": 65.4458, + "step": 159950 + }, + { + "epoch": 0.6462586408206304, + "grad_norm": 924.6212158203125, + "learning_rate": 1.3830284479112416e-05, + "loss": 83.593, + "step": 159960 + }, + { + "epoch": 0.6462990420859981, + "grad_norm": 410.8275451660156, + "learning_rate": 1.3827628206907446e-05, + "loss": 62.2624, + "step": 159970 + }, + { + "epoch": 0.6463394433513657, + "grad_norm": 687.3089599609375, + "learning_rate": 1.3824972055036043e-05, + "loss": 75.0122, + "step": 159980 + }, + { + "epoch": 0.6463798446167334, + "grad_norm": 424.3802185058594, + "learning_rate": 1.382231602354998e-05, + "loss": 74.4688, + "step": 159990 + }, + { + "epoch": 0.646420245882101, + "grad_norm": 802.7678833007812, + "learning_rate": 1.3819660112501054e-05, + "loss": 45.2236, + "step": 160000 + }, + { + "epoch": 0.6464606471474686, + "grad_norm": 547.2116088867188, + "learning_rate": 1.3817004321941032e-05, + "loss": 69.2956, + "step": 160010 + }, + { + "epoch": 0.6465010484128363, + "grad_norm": 759.5550537109375, + "learning_rate": 1.3814348651921698e-05, + "loss": 79.1806, + "step": 160020 + }, + { + "epoch": 0.6465414496782039, + "grad_norm": 954.0886840820312, + "learning_rate": 1.3811693102494823e-05, + "loss": 77.0236, + "step": 160030 + }, + { + "epoch": 0.6465818509435716, + "grad_norm": 645.9011840820312, + "learning_rate": 1.3809037673712168e-05, + "loss": 58.7272, + "step": 160040 + }, + { + "epoch": 0.6466222522089392, + "grad_norm": 775.5703125, + "learning_rate": 1.3806382365625519e-05, + "loss": 61.7363, + "step": 160050 + }, + { + "epoch": 0.6466626534743068, + "grad_norm": 922.2274169921875, + "learning_rate": 1.3803727178286634e-05, + "loss": 49.813, + "step": 160060 + }, + { + "epoch": 0.6467030547396745, + "grad_norm": 540.667236328125, + "learning_rate": 1.3801072111747269e-05, + "loss": 53.7087, + "step": 160070 + }, + { + "epoch": 0.6467434560050421, + "grad_norm": 426.2093505859375, + "learning_rate": 1.3798417166059203e-05, + "loss": 73.6936, + "step": 160080 + }, + { + "epoch": 0.6467838572704097, + "grad_norm": 2345.4267578125, + "learning_rate": 1.3795762341274186e-05, + "loss": 87.4049, + "step": 160090 + }, + { + "epoch": 0.6468242585357773, + "grad_norm": 398.5237731933594, + "learning_rate": 1.3793107637443978e-05, + "loss": 60.7388, + "step": 160100 + }, + { + "epoch": 0.6468646598011449, + "grad_norm": 971.08642578125, + "learning_rate": 1.3790453054620333e-05, + "loss": 75.5744, + "step": 160110 + }, + { + "epoch": 0.6469050610665126, + "grad_norm": 599.1119384765625, + "learning_rate": 1.3787798592854994e-05, + "loss": 81.2106, + "step": 160120 + }, + { + "epoch": 0.6469454623318802, + "grad_norm": 588.934814453125, + "learning_rate": 1.3785144252199727e-05, + "loss": 90.5467, + "step": 160130 + }, + { + "epoch": 0.6469858635972479, + "grad_norm": 1046.8115234375, + "learning_rate": 1.3782490032706273e-05, + "loss": 63.5263, + "step": 160140 + }, + { + "epoch": 0.6470262648626155, + "grad_norm": 432.7254333496094, + "learning_rate": 1.3779835934426368e-05, + "loss": 53.2061, + "step": 160150 + }, + { + "epoch": 0.6470666661279831, + "grad_norm": 699.08349609375, + "learning_rate": 1.3777181957411775e-05, + "loss": 60.3271, + "step": 160160 + }, + { + "epoch": 0.6471070673933508, + "grad_norm": 955.0413208007812, + "learning_rate": 1.3774528101714215e-05, + "loss": 70.7889, + "step": 160170 + }, + { + "epoch": 0.6471474686587184, + "grad_norm": 462.00616455078125, + "learning_rate": 1.3771874367385437e-05, + "loss": 74.2226, + "step": 160180 + }, + { + "epoch": 0.6471878699240861, + "grad_norm": 1274.90283203125, + "learning_rate": 1.3769220754477178e-05, + "loss": 104.1167, + "step": 160190 + }, + { + "epoch": 0.6472282711894537, + "grad_norm": 903.522216796875, + "learning_rate": 1.3766567263041163e-05, + "loss": 62.7749, + "step": 160200 + }, + { + "epoch": 0.6472686724548213, + "grad_norm": 648.7487182617188, + "learning_rate": 1.3763913893129135e-05, + "loss": 43.8241, + "step": 160210 + }, + { + "epoch": 0.6473090737201889, + "grad_norm": 1287.9500732421875, + "learning_rate": 1.3761260644792807e-05, + "loss": 97.626, + "step": 160220 + }, + { + "epoch": 0.6473494749855565, + "grad_norm": 373.8180236816406, + "learning_rate": 1.3758607518083924e-05, + "loss": 56.2497, + "step": 160230 + }, + { + "epoch": 0.6473898762509241, + "grad_norm": 511.0581359863281, + "learning_rate": 1.37559545130542e-05, + "loss": 56.5279, + "step": 160240 + }, + { + "epoch": 0.6474302775162918, + "grad_norm": 932.7310180664062, + "learning_rate": 1.375330162975535e-05, + "loss": 70.3802, + "step": 160250 + }, + { + "epoch": 0.6474706787816594, + "grad_norm": 716.8372192382812, + "learning_rate": 1.3750648868239105e-05, + "loss": 74.1838, + "step": 160260 + }, + { + "epoch": 0.6475110800470271, + "grad_norm": 564.082275390625, + "learning_rate": 1.374799622855718e-05, + "loss": 73.21, + "step": 160270 + }, + { + "epoch": 0.6475514813123947, + "grad_norm": 776.65966796875, + "learning_rate": 1.3745343710761283e-05, + "loss": 72.1794, + "step": 160280 + }, + { + "epoch": 0.6475918825777623, + "grad_norm": 211.73529052734375, + "learning_rate": 1.3742691314903138e-05, + "loss": 42.4964, + "step": 160290 + }, + { + "epoch": 0.64763228384313, + "grad_norm": 715.339111328125, + "learning_rate": 1.3740039041034434e-05, + "loss": 88.9262, + "step": 160300 + }, + { + "epoch": 0.6476726851084976, + "grad_norm": 752.4271850585938, + "learning_rate": 1.3737386889206902e-05, + "loss": 60.3951, + "step": 160310 + }, + { + "epoch": 0.6477130863738653, + "grad_norm": 770.5107421875, + "learning_rate": 1.3734734859472238e-05, + "loss": 81.2563, + "step": 160320 + }, + { + "epoch": 0.6477534876392329, + "grad_norm": 693.3829345703125, + "learning_rate": 1.3732082951882136e-05, + "loss": 96.9872, + "step": 160330 + }, + { + "epoch": 0.6477938889046005, + "grad_norm": 661.836669921875, + "learning_rate": 1.372943116648831e-05, + "loss": 85.2413, + "step": 160340 + }, + { + "epoch": 0.6478342901699681, + "grad_norm": 722.52685546875, + "learning_rate": 1.372677950334245e-05, + "loss": 86.2864, + "step": 160350 + }, + { + "epoch": 0.6478746914353357, + "grad_norm": 847.0784301757812, + "learning_rate": 1.3724127962496252e-05, + "loss": 89.6241, + "step": 160360 + }, + { + "epoch": 0.6479150927007034, + "grad_norm": 438.812255859375, + "learning_rate": 1.3721476544001413e-05, + "loss": 53.0998, + "step": 160370 + }, + { + "epoch": 0.647955493966071, + "grad_norm": 467.0127868652344, + "learning_rate": 1.3718825247909615e-05, + "loss": 75.5601, + "step": 160380 + }, + { + "epoch": 0.6479958952314386, + "grad_norm": 897.6209106445312, + "learning_rate": 1.3716174074272561e-05, + "loss": 84.5261, + "step": 160390 + }, + { + "epoch": 0.6480362964968063, + "grad_norm": 885.0318603515625, + "learning_rate": 1.3713523023141926e-05, + "loss": 78.75, + "step": 160400 + }, + { + "epoch": 0.6480766977621739, + "grad_norm": 516.3770141601562, + "learning_rate": 1.3710872094569387e-05, + "loss": 70.433, + "step": 160410 + }, + { + "epoch": 0.6481170990275416, + "grad_norm": 419.6017761230469, + "learning_rate": 1.3708221288606647e-05, + "loss": 74.1045, + "step": 160420 + }, + { + "epoch": 0.6481575002929092, + "grad_norm": 779.0675659179688, + "learning_rate": 1.370557060530537e-05, + "loss": 97.1904, + "step": 160430 + }, + { + "epoch": 0.6481979015582768, + "grad_norm": 1088.6451416015625, + "learning_rate": 1.370292004471723e-05, + "loss": 96.7889, + "step": 160440 + }, + { + "epoch": 0.6482383028236445, + "grad_norm": 750.1451416015625, + "learning_rate": 1.370026960689391e-05, + "loss": 61.3904, + "step": 160450 + }, + { + "epoch": 0.6482787040890121, + "grad_norm": 644.9822387695312, + "learning_rate": 1.369761929188707e-05, + "loss": 65.7152, + "step": 160460 + }, + { + "epoch": 0.6483191053543798, + "grad_norm": 1084.1964111328125, + "learning_rate": 1.3694969099748394e-05, + "loss": 87.6655, + "step": 160470 + }, + { + "epoch": 0.6483595066197473, + "grad_norm": 797.7989501953125, + "learning_rate": 1.3692319030529539e-05, + "loss": 74.964, + "step": 160480 + }, + { + "epoch": 0.6483999078851149, + "grad_norm": 982.2517700195312, + "learning_rate": 1.3689669084282167e-05, + "loss": 74.3063, + "step": 160490 + }, + { + "epoch": 0.6484403091504826, + "grad_norm": 605.3255004882812, + "learning_rate": 1.3687019261057955e-05, + "loss": 47.8839, + "step": 160500 + }, + { + "epoch": 0.6484807104158502, + "grad_norm": 435.4517822265625, + "learning_rate": 1.3684369560908547e-05, + "loss": 58.2129, + "step": 160510 + }, + { + "epoch": 0.6485211116812178, + "grad_norm": 1156.7591552734375, + "learning_rate": 1.3681719983885603e-05, + "loss": 62.6308, + "step": 160520 + }, + { + "epoch": 0.6485615129465855, + "grad_norm": 526.5201416015625, + "learning_rate": 1.3679070530040786e-05, + "loss": 68.5471, + "step": 160530 + }, + { + "epoch": 0.6486019142119531, + "grad_norm": 801.8320922851562, + "learning_rate": 1.3676421199425738e-05, + "loss": 77.8758, + "step": 160540 + }, + { + "epoch": 0.6486423154773208, + "grad_norm": 458.3919372558594, + "learning_rate": 1.3673771992092122e-05, + "loss": 49.5038, + "step": 160550 + }, + { + "epoch": 0.6486827167426884, + "grad_norm": 697.7495727539062, + "learning_rate": 1.3671122908091577e-05, + "loss": 93.9122, + "step": 160560 + }, + { + "epoch": 0.648723118008056, + "grad_norm": 1332.7796630859375, + "learning_rate": 1.3668473947475738e-05, + "loss": 77.6737, + "step": 160570 + }, + { + "epoch": 0.6487635192734237, + "grad_norm": 479.3167724609375, + "learning_rate": 1.3665825110296272e-05, + "loss": 55.3662, + "step": 160580 + }, + { + "epoch": 0.6488039205387913, + "grad_norm": 664.0673217773438, + "learning_rate": 1.3663176396604803e-05, + "loss": 55.2879, + "step": 160590 + }, + { + "epoch": 0.6488443218041589, + "grad_norm": 587.7325439453125, + "learning_rate": 1.3660527806452965e-05, + "loss": 81.0573, + "step": 160600 + }, + { + "epoch": 0.6488847230695265, + "grad_norm": 578.8350219726562, + "learning_rate": 1.365787933989241e-05, + "loss": 55.8804, + "step": 160610 + }, + { + "epoch": 0.6489251243348941, + "grad_norm": 384.7897644042969, + "learning_rate": 1.3655230996974758e-05, + "loss": 78.1416, + "step": 160620 + }, + { + "epoch": 0.6489655256002618, + "grad_norm": 1332.18798828125, + "learning_rate": 1.365258277775165e-05, + "loss": 95.7777, + "step": 160630 + }, + { + "epoch": 0.6490059268656294, + "grad_norm": 733.2000122070312, + "learning_rate": 1.3649934682274706e-05, + "loss": 77.3906, + "step": 160640 + }, + { + "epoch": 0.649046328130997, + "grad_norm": 503.46728515625, + "learning_rate": 1.3647286710595547e-05, + "loss": 69.7331, + "step": 160650 + }, + { + "epoch": 0.6490867293963647, + "grad_norm": 712.6195068359375, + "learning_rate": 1.3644638862765814e-05, + "loss": 77.539, + "step": 160660 + }, + { + "epoch": 0.6491271306617323, + "grad_norm": 778.7798461914062, + "learning_rate": 1.3641991138837116e-05, + "loss": 94.9975, + "step": 160670 + }, + { + "epoch": 0.6491675319271, + "grad_norm": 367.2314147949219, + "learning_rate": 1.3639343538861068e-05, + "loss": 54.8503, + "step": 160680 + }, + { + "epoch": 0.6492079331924676, + "grad_norm": 535.1397094726562, + "learning_rate": 1.3636696062889301e-05, + "loss": 77.0529, + "step": 160690 + }, + { + "epoch": 0.6492483344578353, + "grad_norm": 775.1535034179688, + "learning_rate": 1.3634048710973413e-05, + "loss": 49.8361, + "step": 160700 + }, + { + "epoch": 0.6492887357232029, + "grad_norm": 758.524169921875, + "learning_rate": 1.3631401483165027e-05, + "loss": 60.9669, + "step": 160710 + }, + { + "epoch": 0.6493291369885705, + "grad_norm": 552.0361938476562, + "learning_rate": 1.3628754379515751e-05, + "loss": 36.741, + "step": 160720 + }, + { + "epoch": 0.6493695382539381, + "grad_norm": 994.1198120117188, + "learning_rate": 1.362610740007718e-05, + "loss": 75.5942, + "step": 160730 + }, + { + "epoch": 0.6494099395193057, + "grad_norm": 376.8166198730469, + "learning_rate": 1.3623460544900935e-05, + "loss": 99.5738, + "step": 160740 + }, + { + "epoch": 0.6494503407846733, + "grad_norm": 786.2276000976562, + "learning_rate": 1.36208138140386e-05, + "loss": 74.4602, + "step": 160750 + }, + { + "epoch": 0.649490742050041, + "grad_norm": 584.73095703125, + "learning_rate": 1.3618167207541792e-05, + "loss": 93.6861, + "step": 160760 + }, + { + "epoch": 0.6495311433154086, + "grad_norm": 1273.15380859375, + "learning_rate": 1.36155207254621e-05, + "loss": 82.141, + "step": 160770 + }, + { + "epoch": 0.6495715445807763, + "grad_norm": 496.6265869140625, + "learning_rate": 1.3612874367851114e-05, + "loss": 97.3545, + "step": 160780 + }, + { + "epoch": 0.6496119458461439, + "grad_norm": 395.5869140625, + "learning_rate": 1.3610228134760435e-05, + "loss": 111.5826, + "step": 160790 + }, + { + "epoch": 0.6496523471115115, + "grad_norm": 1039.7791748046875, + "learning_rate": 1.3607582026241644e-05, + "loss": 98.7043, + "step": 160800 + }, + { + "epoch": 0.6496927483768792, + "grad_norm": 714.786865234375, + "learning_rate": 1.3604936042346332e-05, + "loss": 61.7114, + "step": 160810 + }, + { + "epoch": 0.6497331496422468, + "grad_norm": 583.4796752929688, + "learning_rate": 1.360229018312609e-05, + "loss": 56.0971, + "step": 160820 + }, + { + "epoch": 0.6497735509076145, + "grad_norm": 1312.52880859375, + "learning_rate": 1.3599644448632486e-05, + "loss": 58.5244, + "step": 160830 + }, + { + "epoch": 0.6498139521729821, + "grad_norm": 589.6276245117188, + "learning_rate": 1.3596998838917115e-05, + "loss": 82.765, + "step": 160840 + }, + { + "epoch": 0.6498543534383497, + "grad_norm": 821.6893310546875, + "learning_rate": 1.359435335403155e-05, + "loss": 71.6941, + "step": 160850 + }, + { + "epoch": 0.6498947547037173, + "grad_norm": 577.155029296875, + "learning_rate": 1.3591707994027355e-05, + "loss": 84.5722, + "step": 160860 + }, + { + "epoch": 0.6499351559690849, + "grad_norm": 444.7301940917969, + "learning_rate": 1.358906275895612e-05, + "loss": 78.9322, + "step": 160870 + }, + { + "epoch": 0.6499755572344526, + "grad_norm": 1840.0020751953125, + "learning_rate": 1.358641764886941e-05, + "loss": 114.2937, + "step": 160880 + }, + { + "epoch": 0.6500159584998202, + "grad_norm": 274.7701721191406, + "learning_rate": 1.3583772663818781e-05, + "loss": 67.494, + "step": 160890 + }, + { + "epoch": 0.6500563597651878, + "grad_norm": 792.23583984375, + "learning_rate": 1.3581127803855814e-05, + "loss": 58.221, + "step": 160900 + }, + { + "epoch": 0.6500967610305555, + "grad_norm": 691.0342407226562, + "learning_rate": 1.3578483069032058e-05, + "loss": 86.0385, + "step": 160910 + }, + { + "epoch": 0.6501371622959231, + "grad_norm": 404.88970947265625, + "learning_rate": 1.3575838459399087e-05, + "loss": 58.4155, + "step": 160920 + }, + { + "epoch": 0.6501775635612908, + "grad_norm": 770.9337158203125, + "learning_rate": 1.3573193975008451e-05, + "loss": 85.0797, + "step": 160930 + }, + { + "epoch": 0.6502179648266584, + "grad_norm": 1255.2366943359375, + "learning_rate": 1.35705496159117e-05, + "loss": 74.5445, + "step": 160940 + }, + { + "epoch": 0.650258366092026, + "grad_norm": 924.8212280273438, + "learning_rate": 1.3567905382160406e-05, + "loss": 76.9298, + "step": 160950 + }, + { + "epoch": 0.6502987673573937, + "grad_norm": 620.1349487304688, + "learning_rate": 1.3565261273806108e-05, + "loss": 57.4106, + "step": 160960 + }, + { + "epoch": 0.6503391686227613, + "grad_norm": 641.9114379882812, + "learning_rate": 1.3562617290900346e-05, + "loss": 91.1644, + "step": 160970 + }, + { + "epoch": 0.650379569888129, + "grad_norm": 436.22265625, + "learning_rate": 1.355997343349468e-05, + "loss": 106.2327, + "step": 160980 + }, + { + "epoch": 0.6504199711534965, + "grad_norm": 590.812255859375, + "learning_rate": 1.355732970164064e-05, + "loss": 71.7804, + "step": 160990 + }, + { + "epoch": 0.6504603724188641, + "grad_norm": 600.588134765625, + "learning_rate": 1.3554686095389782e-05, + "loss": 65.6049, + "step": 161000 + }, + { + "epoch": 0.6505007736842318, + "grad_norm": 709.0258178710938, + "learning_rate": 1.3552042614793637e-05, + "loss": 43.4696, + "step": 161010 + }, + { + "epoch": 0.6505411749495994, + "grad_norm": 752.660888671875, + "learning_rate": 1.354939925990373e-05, + "loss": 63.2267, + "step": 161020 + }, + { + "epoch": 0.650581576214967, + "grad_norm": 747.1776733398438, + "learning_rate": 1.3546756030771614e-05, + "loss": 69.8672, + "step": 161030 + }, + { + "epoch": 0.6506219774803347, + "grad_norm": 1205.7171630859375, + "learning_rate": 1.3544112927448816e-05, + "loss": 96.8168, + "step": 161040 + }, + { + "epoch": 0.6506623787457023, + "grad_norm": 685.8651733398438, + "learning_rate": 1.3541469949986854e-05, + "loss": 101.6601, + "step": 161050 + }, + { + "epoch": 0.65070278001107, + "grad_norm": 2847.016357421875, + "learning_rate": 1.3538827098437264e-05, + "loss": 84.0328, + "step": 161060 + }, + { + "epoch": 0.6507431812764376, + "grad_norm": 1069.424072265625, + "learning_rate": 1.3536184372851558e-05, + "loss": 64.924, + "step": 161070 + }, + { + "epoch": 0.6507835825418052, + "grad_norm": 785.033935546875, + "learning_rate": 1.3533541773281274e-05, + "loss": 76.4062, + "step": 161080 + }, + { + "epoch": 0.6508239838071729, + "grad_norm": 536.116943359375, + "learning_rate": 1.353089929977792e-05, + "loss": 67.8483, + "step": 161090 + }, + { + "epoch": 0.6508643850725405, + "grad_norm": 1682.790283203125, + "learning_rate": 1.352825695239301e-05, + "loss": 110.1849, + "step": 161100 + }, + { + "epoch": 0.650904786337908, + "grad_norm": 285.4573059082031, + "learning_rate": 1.3525614731178069e-05, + "loss": 54.1471, + "step": 161110 + }, + { + "epoch": 0.6509451876032757, + "grad_norm": 989.8858032226562, + "learning_rate": 1.3522972636184601e-05, + "loss": 69.4348, + "step": 161120 + }, + { + "epoch": 0.6509855888686433, + "grad_norm": 502.4445495605469, + "learning_rate": 1.3520330667464114e-05, + "loss": 112.5589, + "step": 161130 + }, + { + "epoch": 0.651025990134011, + "grad_norm": 1335.2984619140625, + "learning_rate": 1.3517688825068116e-05, + "loss": 73.5718, + "step": 161140 + }, + { + "epoch": 0.6510663913993786, + "grad_norm": 353.62213134765625, + "learning_rate": 1.3515047109048111e-05, + "loss": 64.3545, + "step": 161150 + }, + { + "epoch": 0.6511067926647462, + "grad_norm": 550.1016235351562, + "learning_rate": 1.3512405519455608e-05, + "loss": 54.599, + "step": 161160 + }, + { + "epoch": 0.6511471939301139, + "grad_norm": 856.041015625, + "learning_rate": 1.3509764056342096e-05, + "loss": 64.7074, + "step": 161170 + }, + { + "epoch": 0.6511875951954815, + "grad_norm": 667.9754638671875, + "learning_rate": 1.3507122719759067e-05, + "loss": 50.8567, + "step": 161180 + }, + { + "epoch": 0.6512279964608492, + "grad_norm": 646.44677734375, + "learning_rate": 1.3504481509758033e-05, + "loss": 72.5102, + "step": 161190 + }, + { + "epoch": 0.6512683977262168, + "grad_norm": 614.1642456054688, + "learning_rate": 1.3501840426390476e-05, + "loss": 66.226, + "step": 161200 + }, + { + "epoch": 0.6513087989915844, + "grad_norm": 686.0556640625, + "learning_rate": 1.3499199469707877e-05, + "loss": 86.8722, + "step": 161210 + }, + { + "epoch": 0.6513492002569521, + "grad_norm": 723.0970458984375, + "learning_rate": 1.3496558639761741e-05, + "loss": 69.3851, + "step": 161220 + }, + { + "epoch": 0.6513896015223197, + "grad_norm": 632.8616333007812, + "learning_rate": 1.3493917936603535e-05, + "loss": 69.0914, + "step": 161230 + }, + { + "epoch": 0.6514300027876873, + "grad_norm": 275.064453125, + "learning_rate": 1.3491277360284754e-05, + "loss": 42.687, + "step": 161240 + }, + { + "epoch": 0.6514704040530549, + "grad_norm": 966.8950805664062, + "learning_rate": 1.3488636910856871e-05, + "loss": 72.1204, + "step": 161250 + }, + { + "epoch": 0.6515108053184225, + "grad_norm": 394.5533752441406, + "learning_rate": 1.3485996588371357e-05, + "loss": 71.7363, + "step": 161260 + }, + { + "epoch": 0.6515512065837902, + "grad_norm": 400.4728088378906, + "learning_rate": 1.3483356392879704e-05, + "loss": 60.2767, + "step": 161270 + }, + { + "epoch": 0.6515916078491578, + "grad_norm": 477.8827209472656, + "learning_rate": 1.348071632443336e-05, + "loss": 93.8329, + "step": 161280 + }, + { + "epoch": 0.6516320091145255, + "grad_norm": 511.89825439453125, + "learning_rate": 1.3478076383083818e-05, + "loss": 66.7087, + "step": 161290 + }, + { + "epoch": 0.6516724103798931, + "grad_norm": 523.6215209960938, + "learning_rate": 1.3475436568882533e-05, + "loss": 77.129, + "step": 161300 + }, + { + "epoch": 0.6517128116452607, + "grad_norm": 464.6569519042969, + "learning_rate": 1.347279688188097e-05, + "loss": 60.1459, + "step": 161310 + }, + { + "epoch": 0.6517532129106284, + "grad_norm": 712.9063720703125, + "learning_rate": 1.3470157322130595e-05, + "loss": 60.7592, + "step": 161320 + }, + { + "epoch": 0.651793614175996, + "grad_norm": 592.58056640625, + "learning_rate": 1.3467517889682861e-05, + "loss": 71.6511, + "step": 161330 + }, + { + "epoch": 0.6518340154413637, + "grad_norm": 432.8743591308594, + "learning_rate": 1.3464878584589225e-05, + "loss": 43.2813, + "step": 161340 + }, + { + "epoch": 0.6518744167067313, + "grad_norm": 656.1505737304688, + "learning_rate": 1.3462239406901155e-05, + "loss": 68.3061, + "step": 161350 + }, + { + "epoch": 0.6519148179720989, + "grad_norm": 850.76611328125, + "learning_rate": 1.3459600356670084e-05, + "loss": 71.7847, + "step": 161360 + }, + { + "epoch": 0.6519552192374665, + "grad_norm": 872.5621948242188, + "learning_rate": 1.3456961433947478e-05, + "loss": 118.6288, + "step": 161370 + }, + { + "epoch": 0.6519956205028341, + "grad_norm": 470.193359375, + "learning_rate": 1.3454322638784775e-05, + "loss": 75.1044, + "step": 161380 + }, + { + "epoch": 0.6520360217682017, + "grad_norm": 624.7136840820312, + "learning_rate": 1.3451683971233424e-05, + "loss": 59.6014, + "step": 161390 + }, + { + "epoch": 0.6520764230335694, + "grad_norm": 468.70849609375, + "learning_rate": 1.3449045431344865e-05, + "loss": 58.0786, + "step": 161400 + }, + { + "epoch": 0.652116824298937, + "grad_norm": 1100.5264892578125, + "learning_rate": 1.344640701917054e-05, + "loss": 100.2504, + "step": 161410 + }, + { + "epoch": 0.6521572255643047, + "grad_norm": 922.7738647460938, + "learning_rate": 1.3443768734761875e-05, + "loss": 70.6258, + "step": 161420 + }, + { + "epoch": 0.6521976268296723, + "grad_norm": 754.5645751953125, + "learning_rate": 1.3441130578170322e-05, + "loss": 112.7248, + "step": 161430 + }, + { + "epoch": 0.65223802809504, + "grad_norm": 211.24819946289062, + "learning_rate": 1.34384925494473e-05, + "loss": 64.4188, + "step": 161440 + }, + { + "epoch": 0.6522784293604076, + "grad_norm": 271.5140380859375, + "learning_rate": 1.3435854648644248e-05, + "loss": 76.4791, + "step": 161450 + }, + { + "epoch": 0.6523188306257752, + "grad_norm": 562.59375, + "learning_rate": 1.3433216875812592e-05, + "loss": 88.0456, + "step": 161460 + }, + { + "epoch": 0.6523592318911429, + "grad_norm": 661.5233154296875, + "learning_rate": 1.343057923100375e-05, + "loss": 81.5796, + "step": 161470 + }, + { + "epoch": 0.6523996331565105, + "grad_norm": 752.134033203125, + "learning_rate": 1.3427941714269148e-05, + "loss": 110.8604, + "step": 161480 + }, + { + "epoch": 0.6524400344218781, + "grad_norm": 436.72174072265625, + "learning_rate": 1.3425304325660205e-05, + "loss": 66.3401, + "step": 161490 + }, + { + "epoch": 0.6524804356872457, + "grad_norm": 695.7260131835938, + "learning_rate": 1.3422667065228336e-05, + "loss": 76.9202, + "step": 161500 + }, + { + "epoch": 0.6525208369526133, + "grad_norm": 455.6388244628906, + "learning_rate": 1.3420029933024967e-05, + "loss": 65.3791, + "step": 161510 + }, + { + "epoch": 0.652561238217981, + "grad_norm": 673.3358154296875, + "learning_rate": 1.3417392929101491e-05, + "loss": 80.2538, + "step": 161520 + }, + { + "epoch": 0.6526016394833486, + "grad_norm": 1552.604736328125, + "learning_rate": 1.3414756053509338e-05, + "loss": 105.0456, + "step": 161530 + }, + { + "epoch": 0.6526420407487162, + "grad_norm": 394.3142395019531, + "learning_rate": 1.3412119306299908e-05, + "loss": 52.1294, + "step": 161540 + }, + { + "epoch": 0.6526824420140839, + "grad_norm": 259.7141418457031, + "learning_rate": 1.3409482687524592e-05, + "loss": 55.0289, + "step": 161550 + }, + { + "epoch": 0.6527228432794515, + "grad_norm": 654.1292114257812, + "learning_rate": 1.3406846197234814e-05, + "loss": 76.5715, + "step": 161560 + }, + { + "epoch": 0.6527632445448192, + "grad_norm": 1038.5487060546875, + "learning_rate": 1.3404209835481963e-05, + "loss": 109.9557, + "step": 161570 + }, + { + "epoch": 0.6528036458101868, + "grad_norm": 296.17657470703125, + "learning_rate": 1.3401573602317433e-05, + "loss": 79.3898, + "step": 161580 + }, + { + "epoch": 0.6528440470755544, + "grad_norm": 1478.6693115234375, + "learning_rate": 1.3398937497792628e-05, + "loss": 63.504, + "step": 161590 + }, + { + "epoch": 0.6528844483409221, + "grad_norm": 504.5005187988281, + "learning_rate": 1.3396301521958926e-05, + "loss": 132.2019, + "step": 161600 + }, + { + "epoch": 0.6529248496062897, + "grad_norm": 1156.9683837890625, + "learning_rate": 1.3393665674867734e-05, + "loss": 61.1861, + "step": 161610 + }, + { + "epoch": 0.6529652508716574, + "grad_norm": 425.641357421875, + "learning_rate": 1.3391029956570432e-05, + "loss": 71.1377, + "step": 161620 + }, + { + "epoch": 0.6530056521370249, + "grad_norm": 519.8108520507812, + "learning_rate": 1.3388394367118393e-05, + "loss": 51.4026, + "step": 161630 + }, + { + "epoch": 0.6530460534023925, + "grad_norm": 457.7241516113281, + "learning_rate": 1.3385758906563017e-05, + "loss": 72.2662, + "step": 161640 + }, + { + "epoch": 0.6530864546677602, + "grad_norm": 907.3209838867188, + "learning_rate": 1.338312357495568e-05, + "loss": 57.9254, + "step": 161650 + }, + { + "epoch": 0.6531268559331278, + "grad_norm": 1647.94775390625, + "learning_rate": 1.3380488372347751e-05, + "loss": 73.0616, + "step": 161660 + }, + { + "epoch": 0.6531672571984954, + "grad_norm": 661.9964599609375, + "learning_rate": 1.337785329879061e-05, + "loss": 52.582, + "step": 161670 + }, + { + "epoch": 0.6532076584638631, + "grad_norm": 1040.6243896484375, + "learning_rate": 1.3375218354335621e-05, + "loss": 49.5019, + "step": 161680 + }, + { + "epoch": 0.6532480597292307, + "grad_norm": 937.0400390625, + "learning_rate": 1.3372583539034171e-05, + "loss": 73.0211, + "step": 161690 + }, + { + "epoch": 0.6532884609945984, + "grad_norm": 582.1188354492188, + "learning_rate": 1.3369948852937617e-05, + "loss": 50.2254, + "step": 161700 + }, + { + "epoch": 0.653328862259966, + "grad_norm": 598.3118286132812, + "learning_rate": 1.3367314296097314e-05, + "loss": 63.526, + "step": 161710 + }, + { + "epoch": 0.6533692635253336, + "grad_norm": 295.6277160644531, + "learning_rate": 1.336467986856464e-05, + "loss": 55.5136, + "step": 161720 + }, + { + "epoch": 0.6534096647907013, + "grad_norm": 766.13818359375, + "learning_rate": 1.3362045570390953e-05, + "loss": 79.1598, + "step": 161730 + }, + { + "epoch": 0.6534500660560689, + "grad_norm": 1282.21240234375, + "learning_rate": 1.3359411401627599e-05, + "loss": 90.7797, + "step": 161740 + }, + { + "epoch": 0.6534904673214365, + "grad_norm": 889.6817016601562, + "learning_rate": 1.335677736232594e-05, + "loss": 113.7475, + "step": 161750 + }, + { + "epoch": 0.6535308685868041, + "grad_norm": 891.2369995117188, + "learning_rate": 1.335414345253732e-05, + "loss": 55.8518, + "step": 161760 + }, + { + "epoch": 0.6535712698521717, + "grad_norm": 900.3671264648438, + "learning_rate": 1.3351509672313104e-05, + "loss": 83.89, + "step": 161770 + }, + { + "epoch": 0.6536116711175394, + "grad_norm": 892.0913696289062, + "learning_rate": 1.3348876021704626e-05, + "loss": 92.1599, + "step": 161780 + }, + { + "epoch": 0.653652072382907, + "grad_norm": 1214.791748046875, + "learning_rate": 1.3346242500763232e-05, + "loss": 70.7672, + "step": 161790 + }, + { + "epoch": 0.6536924736482747, + "grad_norm": 527.7857666015625, + "learning_rate": 1.334360910954027e-05, + "loss": 93.657, + "step": 161800 + }, + { + "epoch": 0.6537328749136423, + "grad_norm": 629.2025756835938, + "learning_rate": 1.3340975848087073e-05, + "loss": 57.7886, + "step": 161810 + }, + { + "epoch": 0.6537732761790099, + "grad_norm": 1139.6903076171875, + "learning_rate": 1.3338342716454982e-05, + "loss": 72.6553, + "step": 161820 + }, + { + "epoch": 0.6538136774443776, + "grad_norm": 584.1442260742188, + "learning_rate": 1.3335709714695328e-05, + "loss": 70.6085, + "step": 161830 + }, + { + "epoch": 0.6538540787097452, + "grad_norm": 1007.7686157226562, + "learning_rate": 1.3333076842859442e-05, + "loss": 52.4561, + "step": 161840 + }, + { + "epoch": 0.6538944799751129, + "grad_norm": 542.9125366210938, + "learning_rate": 1.3330444100998661e-05, + "loss": 52.9735, + "step": 161850 + }, + { + "epoch": 0.6539348812404805, + "grad_norm": 909.5845947265625, + "learning_rate": 1.3327811489164307e-05, + "loss": 61.6618, + "step": 161860 + }, + { + "epoch": 0.6539752825058481, + "grad_norm": 545.774658203125, + "learning_rate": 1.3325179007407691e-05, + "loss": 80.9739, + "step": 161870 + }, + { + "epoch": 0.6540156837712157, + "grad_norm": 987.7091064453125, + "learning_rate": 1.3322546655780157e-05, + "loss": 58.6951, + "step": 161880 + }, + { + "epoch": 0.6540560850365833, + "grad_norm": 676.7633056640625, + "learning_rate": 1.3319914434333007e-05, + "loss": 60.6353, + "step": 161890 + }, + { + "epoch": 0.6540964863019509, + "grad_norm": 1026.47705078125, + "learning_rate": 1.331728234311757e-05, + "loss": 56.0162, + "step": 161900 + }, + { + "epoch": 0.6541368875673186, + "grad_norm": 1010.1213989257812, + "learning_rate": 1.3314650382185157e-05, + "loss": 77.6137, + "step": 161910 + }, + { + "epoch": 0.6541772888326862, + "grad_norm": 346.5876770019531, + "learning_rate": 1.3312018551587069e-05, + "loss": 71.8248, + "step": 161920 + }, + { + "epoch": 0.6542176900980539, + "grad_norm": 615.5674438476562, + "learning_rate": 1.330938685137463e-05, + "loss": 57.9244, + "step": 161930 + }, + { + "epoch": 0.6542580913634215, + "grad_norm": 1105.2025146484375, + "learning_rate": 1.3306755281599135e-05, + "loss": 68.3786, + "step": 161940 + }, + { + "epoch": 0.6542984926287891, + "grad_norm": 649.3919067382812, + "learning_rate": 1.3304123842311887e-05, + "loss": 66.6242, + "step": 161950 + }, + { + "epoch": 0.6543388938941568, + "grad_norm": 872.7116088867188, + "learning_rate": 1.33014925335642e-05, + "loss": 74.8301, + "step": 161960 + }, + { + "epoch": 0.6543792951595244, + "grad_norm": 972.5166625976562, + "learning_rate": 1.3298861355407355e-05, + "loss": 82.7535, + "step": 161970 + }, + { + "epoch": 0.6544196964248921, + "grad_norm": 917.50537109375, + "learning_rate": 1.3296230307892665e-05, + "loss": 65.3815, + "step": 161980 + }, + { + "epoch": 0.6544600976902597, + "grad_norm": 759.1207885742188, + "learning_rate": 1.3293599391071422e-05, + "loss": 86.6079, + "step": 161990 + }, + { + "epoch": 0.6545004989556273, + "grad_norm": 947.5846557617188, + "learning_rate": 1.3290968604994903e-05, + "loss": 67.1139, + "step": 162000 + }, + { + "epoch": 0.6545409002209949, + "grad_norm": 791.1720581054688, + "learning_rate": 1.3288337949714407e-05, + "loss": 79.7381, + "step": 162010 + }, + { + "epoch": 0.6545813014863625, + "grad_norm": 543.3690185546875, + "learning_rate": 1.3285707425281222e-05, + "loss": 63.2529, + "step": 162020 + }, + { + "epoch": 0.6546217027517302, + "grad_norm": 364.7917785644531, + "learning_rate": 1.3283077031746619e-05, + "loss": 121.3299, + "step": 162030 + }, + { + "epoch": 0.6546621040170978, + "grad_norm": 883.3060913085938, + "learning_rate": 1.3280446769161895e-05, + "loss": 72.3911, + "step": 162040 + }, + { + "epoch": 0.6547025052824654, + "grad_norm": 566.2040405273438, + "learning_rate": 1.3277816637578312e-05, + "loss": 45.3708, + "step": 162050 + }, + { + "epoch": 0.6547429065478331, + "grad_norm": 856.8551635742188, + "learning_rate": 1.3275186637047162e-05, + "loss": 55.6507, + "step": 162060 + }, + { + "epoch": 0.6547833078132007, + "grad_norm": 634.8489990234375, + "learning_rate": 1.327255676761971e-05, + "loss": 68.6878, + "step": 162070 + }, + { + "epoch": 0.6548237090785684, + "grad_norm": 530.1317138671875, + "learning_rate": 1.3269927029347227e-05, + "loss": 66.6493, + "step": 162080 + }, + { + "epoch": 0.654864110343936, + "grad_norm": 960.0006103515625, + "learning_rate": 1.3267297422280982e-05, + "loss": 55.0057, + "step": 162090 + }, + { + "epoch": 0.6549045116093036, + "grad_norm": 763.1298828125, + "learning_rate": 1.326466794647224e-05, + "loss": 49.1444, + "step": 162100 + }, + { + "epoch": 0.6549449128746713, + "grad_norm": 620.3780517578125, + "learning_rate": 1.326203860197226e-05, + "loss": 30.2903, + "step": 162110 + }, + { + "epoch": 0.6549853141400389, + "grad_norm": 755.6101684570312, + "learning_rate": 1.325940938883231e-05, + "loss": 66.8344, + "step": 162120 + }, + { + "epoch": 0.6550257154054066, + "grad_norm": 359.6354675292969, + "learning_rate": 1.3256780307103638e-05, + "loss": 49.4897, + "step": 162130 + }, + { + "epoch": 0.6550661166707741, + "grad_norm": 451.46929931640625, + "learning_rate": 1.3254151356837515e-05, + "loss": 82.0691, + "step": 162140 + }, + { + "epoch": 0.6551065179361417, + "grad_norm": 813.3724365234375, + "learning_rate": 1.325152253808518e-05, + "loss": 78.7522, + "step": 162150 + }, + { + "epoch": 0.6551469192015094, + "grad_norm": 649.9912719726562, + "learning_rate": 1.3248893850897888e-05, + "loss": 98.0552, + "step": 162160 + }, + { + "epoch": 0.655187320466877, + "grad_norm": 511.0652160644531, + "learning_rate": 1.3246265295326883e-05, + "loss": 51.0318, + "step": 162170 + }, + { + "epoch": 0.6552277217322446, + "grad_norm": 1944.7783203125, + "learning_rate": 1.3243636871423418e-05, + "loss": 101.7423, + "step": 162180 + }, + { + "epoch": 0.6552681229976123, + "grad_norm": 861.6324462890625, + "learning_rate": 1.3241008579238728e-05, + "loss": 121.0775, + "step": 162190 + }, + { + "epoch": 0.6553085242629799, + "grad_norm": 841.708740234375, + "learning_rate": 1.3238380418824059e-05, + "loss": 61.7578, + "step": 162200 + }, + { + "epoch": 0.6553489255283476, + "grad_norm": 1190.0865478515625, + "learning_rate": 1.3235752390230637e-05, + "loss": 104.4876, + "step": 162210 + }, + { + "epoch": 0.6553893267937152, + "grad_norm": 957.6503295898438, + "learning_rate": 1.323312449350971e-05, + "loss": 83.8859, + "step": 162220 + }, + { + "epoch": 0.6554297280590828, + "grad_norm": 497.73724365234375, + "learning_rate": 1.3230496728712508e-05, + "loss": 69.8298, + "step": 162230 + }, + { + "epoch": 0.6554701293244505, + "grad_norm": 566.8706665039062, + "learning_rate": 1.3227869095890246e-05, + "loss": 101.9528, + "step": 162240 + }, + { + "epoch": 0.6555105305898181, + "grad_norm": 569.1433715820312, + "learning_rate": 1.3225241595094173e-05, + "loss": 42.2475, + "step": 162250 + }, + { + "epoch": 0.6555509318551858, + "grad_norm": 569.1734008789062, + "learning_rate": 1.3222614226375506e-05, + "loss": 53.5244, + "step": 162260 + }, + { + "epoch": 0.6555913331205533, + "grad_norm": 490.70172119140625, + "learning_rate": 1.3219986989785453e-05, + "loss": 64.866, + "step": 162270 + }, + { + "epoch": 0.6556317343859209, + "grad_norm": 582.62744140625, + "learning_rate": 1.3217359885375252e-05, + "loss": 68.3456, + "step": 162280 + }, + { + "epoch": 0.6556721356512886, + "grad_norm": 1308.380859375, + "learning_rate": 1.3214732913196105e-05, + "loss": 101.8817, + "step": 162290 + }, + { + "epoch": 0.6557125369166562, + "grad_norm": 836.5472412109375, + "learning_rate": 1.321210607329924e-05, + "loss": 88.6232, + "step": 162300 + }, + { + "epoch": 0.6557529381820238, + "grad_norm": 1205.0787353515625, + "learning_rate": 1.3209479365735862e-05, + "loss": 81.9118, + "step": 162310 + }, + { + "epoch": 0.6557933394473915, + "grad_norm": 728.8317260742188, + "learning_rate": 1.320685279055717e-05, + "loss": 59.5813, + "step": 162320 + }, + { + "epoch": 0.6558337407127591, + "grad_norm": 385.7361755371094, + "learning_rate": 1.320422634781439e-05, + "loss": 123.2012, + "step": 162330 + }, + { + "epoch": 0.6558741419781268, + "grad_norm": 2698.7880859375, + "learning_rate": 1.3201600037558712e-05, + "loss": 93.7562, + "step": 162340 + }, + { + "epoch": 0.6559145432434944, + "grad_norm": 969.4913330078125, + "learning_rate": 1.3198973859841347e-05, + "loss": 58.9619, + "step": 162350 + }, + { + "epoch": 0.655954944508862, + "grad_norm": 949.0149536132812, + "learning_rate": 1.3196347814713483e-05, + "loss": 121.9926, + "step": 162360 + }, + { + "epoch": 0.6559953457742297, + "grad_norm": 1150.511474609375, + "learning_rate": 1.3193721902226317e-05, + "loss": 78.754, + "step": 162370 + }, + { + "epoch": 0.6560357470395973, + "grad_norm": 464.0296630859375, + "learning_rate": 1.3191096122431052e-05, + "loss": 42.8923, + "step": 162380 + }, + { + "epoch": 0.6560761483049649, + "grad_norm": 623.8461303710938, + "learning_rate": 1.3188470475378878e-05, + "loss": 50.7004, + "step": 162390 + }, + { + "epoch": 0.6561165495703325, + "grad_norm": 2313.32763671875, + "learning_rate": 1.3185844961120969e-05, + "loss": 80.8254, + "step": 162400 + }, + { + "epoch": 0.6561569508357001, + "grad_norm": 794.862548828125, + "learning_rate": 1.3183219579708526e-05, + "loss": 71.6139, + "step": 162410 + }, + { + "epoch": 0.6561973521010678, + "grad_norm": 605.0182495117188, + "learning_rate": 1.3180594331192724e-05, + "loss": 48.644, + "step": 162420 + }, + { + "epoch": 0.6562377533664354, + "grad_norm": 1608.7205810546875, + "learning_rate": 1.3177969215624753e-05, + "loss": 93.1049, + "step": 162430 + }, + { + "epoch": 0.6562781546318031, + "grad_norm": 1073.2041015625, + "learning_rate": 1.317534423305578e-05, + "loss": 79.8644, + "step": 162440 + }, + { + "epoch": 0.6563185558971707, + "grad_norm": 852.2994384765625, + "learning_rate": 1.317271938353698e-05, + "loss": 105.1243, + "step": 162450 + }, + { + "epoch": 0.6563589571625383, + "grad_norm": 905.4722290039062, + "learning_rate": 1.3170094667119537e-05, + "loss": 85.6243, + "step": 162460 + }, + { + "epoch": 0.656399358427906, + "grad_norm": 573.2252807617188, + "learning_rate": 1.3167470083854617e-05, + "loss": 74.2065, + "step": 162470 + }, + { + "epoch": 0.6564397596932736, + "grad_norm": 877.1802978515625, + "learning_rate": 1.3164845633793375e-05, + "loss": 69.383, + "step": 162480 + }, + { + "epoch": 0.6564801609586413, + "grad_norm": 556.0078735351562, + "learning_rate": 1.3162221316986995e-05, + "loss": 50.119, + "step": 162490 + }, + { + "epoch": 0.6565205622240089, + "grad_norm": 801.4729614257812, + "learning_rate": 1.3159597133486628e-05, + "loss": 77.5159, + "step": 162500 + }, + { + "epoch": 0.6565609634893765, + "grad_norm": 322.7227783203125, + "learning_rate": 1.3156973083343433e-05, + "loss": 99.668, + "step": 162510 + }, + { + "epoch": 0.6566013647547441, + "grad_norm": 591.3479614257812, + "learning_rate": 1.3154349166608578e-05, + "loss": 57.394, + "step": 162520 + }, + { + "epoch": 0.6566417660201117, + "grad_norm": 1480.6282958984375, + "learning_rate": 1.3151725383333207e-05, + "loss": 87.9757, + "step": 162530 + }, + { + "epoch": 0.6566821672854793, + "grad_norm": 330.1382141113281, + "learning_rate": 1.3149101733568476e-05, + "loss": 44.3307, + "step": 162540 + }, + { + "epoch": 0.656722568550847, + "grad_norm": 632.9432983398438, + "learning_rate": 1.3146478217365537e-05, + "loss": 73.2335, + "step": 162550 + }, + { + "epoch": 0.6567629698162146, + "grad_norm": 679.910400390625, + "learning_rate": 1.3143854834775524e-05, + "loss": 69.2606, + "step": 162560 + }, + { + "epoch": 0.6568033710815823, + "grad_norm": 916.4014892578125, + "learning_rate": 1.3141231585849598e-05, + "loss": 77.6366, + "step": 162570 + }, + { + "epoch": 0.6568437723469499, + "grad_norm": 1252.4212646484375, + "learning_rate": 1.3138608470638887e-05, + "loss": 81.1889, + "step": 162580 + }, + { + "epoch": 0.6568841736123175, + "grad_norm": 802.8052978515625, + "learning_rate": 1.3135985489194543e-05, + "loss": 53.8909, + "step": 162590 + }, + { + "epoch": 0.6569245748776852, + "grad_norm": 655.087890625, + "learning_rate": 1.3133362641567697e-05, + "loss": 77.6268, + "step": 162600 + }, + { + "epoch": 0.6569649761430528, + "grad_norm": 845.80078125, + "learning_rate": 1.3130739927809476e-05, + "loss": 69.8691, + "step": 162610 + }, + { + "epoch": 0.6570053774084205, + "grad_norm": 816.5472412109375, + "learning_rate": 1.3128117347971023e-05, + "loss": 104.1305, + "step": 162620 + }, + { + "epoch": 0.6570457786737881, + "grad_norm": 1293.4429931640625, + "learning_rate": 1.312549490210346e-05, + "loss": 114.273, + "step": 162630 + }, + { + "epoch": 0.6570861799391557, + "grad_norm": 528.7364501953125, + "learning_rate": 1.3122872590257902e-05, + "loss": 72.0258, + "step": 162640 + }, + { + "epoch": 0.6571265812045233, + "grad_norm": 565.7294921875, + "learning_rate": 1.3120250412485493e-05, + "loss": 61.1255, + "step": 162650 + }, + { + "epoch": 0.6571669824698909, + "grad_norm": 1237.9794921875, + "learning_rate": 1.3117628368837339e-05, + "loss": 56.6205, + "step": 162660 + }, + { + "epoch": 0.6572073837352586, + "grad_norm": 954.3428955078125, + "learning_rate": 1.3115006459364567e-05, + "loss": 55.5886, + "step": 162670 + }, + { + "epoch": 0.6572477850006262, + "grad_norm": 845.1995849609375, + "learning_rate": 1.311238468411829e-05, + "loss": 69.7064, + "step": 162680 + }, + { + "epoch": 0.6572881862659938, + "grad_norm": 51.99916458129883, + "learning_rate": 1.3109763043149617e-05, + "loss": 113.04, + "step": 162690 + }, + { + "epoch": 0.6573285875313615, + "grad_norm": 819.9505615234375, + "learning_rate": 1.3107141536509662e-05, + "loss": 71.8302, + "step": 162700 + }, + { + "epoch": 0.6573689887967291, + "grad_norm": 724.6686401367188, + "learning_rate": 1.3104520164249534e-05, + "loss": 101.6376, + "step": 162710 + }, + { + "epoch": 0.6574093900620968, + "grad_norm": 657.60205078125, + "learning_rate": 1.3101898926420326e-05, + "loss": 76.3235, + "step": 162720 + }, + { + "epoch": 0.6574497913274644, + "grad_norm": 879.225341796875, + "learning_rate": 1.309927782307316e-05, + "loss": 62.3352, + "step": 162730 + }, + { + "epoch": 0.657490192592832, + "grad_norm": 774.8482055664062, + "learning_rate": 1.3096656854259115e-05, + "loss": 78.1338, + "step": 162740 + }, + { + "epoch": 0.6575305938581997, + "grad_norm": 855.8924560546875, + "learning_rate": 1.3094036020029307e-05, + "loss": 84.6432, + "step": 162750 + }, + { + "epoch": 0.6575709951235673, + "grad_norm": 240.22894287109375, + "learning_rate": 1.3091415320434826e-05, + "loss": 71.9522, + "step": 162760 + }, + { + "epoch": 0.657611396388935, + "grad_norm": 637.4486694335938, + "learning_rate": 1.3088794755526753e-05, + "loss": 73.3256, + "step": 162770 + }, + { + "epoch": 0.6576517976543025, + "grad_norm": 985.8680419921875, + "learning_rate": 1.3086174325356188e-05, + "loss": 63.965, + "step": 162780 + }, + { + "epoch": 0.6576921989196701, + "grad_norm": 448.2850036621094, + "learning_rate": 1.3083554029974206e-05, + "loss": 88.6702, + "step": 162790 + }, + { + "epoch": 0.6577326001850378, + "grad_norm": 836.6506958007812, + "learning_rate": 1.3080933869431906e-05, + "loss": 72.5014, + "step": 162800 + }, + { + "epoch": 0.6577730014504054, + "grad_norm": 647.3243408203125, + "learning_rate": 1.3078313843780363e-05, + "loss": 89.7044, + "step": 162810 + }, + { + "epoch": 0.657813402715773, + "grad_norm": 502.1864929199219, + "learning_rate": 1.3075693953070649e-05, + "loss": 71.1867, + "step": 162820 + }, + { + "epoch": 0.6578538039811407, + "grad_norm": 209.5566864013672, + "learning_rate": 1.3073074197353855e-05, + "loss": 60.0068, + "step": 162830 + }, + { + "epoch": 0.6578942052465083, + "grad_norm": 540.8405151367188, + "learning_rate": 1.3070454576681044e-05, + "loss": 63.7319, + "step": 162840 + }, + { + "epoch": 0.657934606511876, + "grad_norm": 860.0282592773438, + "learning_rate": 1.306783509110328e-05, + "loss": 69.9546, + "step": 162850 + }, + { + "epoch": 0.6579750077772436, + "grad_norm": 465.2800598144531, + "learning_rate": 1.3065215740671648e-05, + "loss": 59.6154, + "step": 162860 + }, + { + "epoch": 0.6580154090426112, + "grad_norm": 684.0255737304688, + "learning_rate": 1.3062596525437204e-05, + "loss": 79.0835, + "step": 162870 + }, + { + "epoch": 0.6580558103079789, + "grad_norm": 879.6417846679688, + "learning_rate": 1.3059977445451013e-05, + "loss": 61.1362, + "step": 162880 + }, + { + "epoch": 0.6580962115733465, + "grad_norm": 473.4673767089844, + "learning_rate": 1.3057358500764135e-05, + "loss": 54.2607, + "step": 162890 + }, + { + "epoch": 0.6581366128387142, + "grad_norm": 335.14837646484375, + "learning_rate": 1.305473969142762e-05, + "loss": 101.9063, + "step": 162900 + }, + { + "epoch": 0.6581770141040817, + "grad_norm": 836.22607421875, + "learning_rate": 1.3052121017492539e-05, + "loss": 92.5224, + "step": 162910 + }, + { + "epoch": 0.6582174153694493, + "grad_norm": 2491.544677734375, + "learning_rate": 1.3049502479009935e-05, + "loss": 65.7323, + "step": 162920 + }, + { + "epoch": 0.658257816634817, + "grad_norm": 588.1403198242188, + "learning_rate": 1.3046884076030853e-05, + "loss": 77.9564, + "step": 162930 + }, + { + "epoch": 0.6582982179001846, + "grad_norm": 575.6414184570312, + "learning_rate": 1.304426580860635e-05, + "loss": 57.5127, + "step": 162940 + }, + { + "epoch": 0.6583386191655523, + "grad_norm": 757.3257446289062, + "learning_rate": 1.3041647676787464e-05, + "loss": 61.1591, + "step": 162950 + }, + { + "epoch": 0.6583790204309199, + "grad_norm": 287.9200439453125, + "learning_rate": 1.3039029680625242e-05, + "loss": 78.1648, + "step": 162960 + }, + { + "epoch": 0.6584194216962875, + "grad_norm": 956.102783203125, + "learning_rate": 1.303641182017072e-05, + "loss": 74.9101, + "step": 162970 + }, + { + "epoch": 0.6584598229616552, + "grad_norm": 814.851806640625, + "learning_rate": 1.3033794095474927e-05, + "loss": 77.447, + "step": 162980 + }, + { + "epoch": 0.6585002242270228, + "grad_norm": 730.9616088867188, + "learning_rate": 1.3031176506588912e-05, + "loss": 34.6117, + "step": 162990 + }, + { + "epoch": 0.6585406254923905, + "grad_norm": 1323.069091796875, + "learning_rate": 1.3028559053563701e-05, + "loss": 91.8896, + "step": 163000 + }, + { + "epoch": 0.6585810267577581, + "grad_norm": 1096.9459228515625, + "learning_rate": 1.3025941736450311e-05, + "loss": 90.9173, + "step": 163010 + }, + { + "epoch": 0.6586214280231257, + "grad_norm": 658.8229370117188, + "learning_rate": 1.3023324555299786e-05, + "loss": 53.2634, + "step": 163020 + }, + { + "epoch": 0.6586618292884933, + "grad_norm": 605.7014770507812, + "learning_rate": 1.3020707510163136e-05, + "loss": 71.0151, + "step": 163030 + }, + { + "epoch": 0.6587022305538609, + "grad_norm": 1108.0189208984375, + "learning_rate": 1.301809060109139e-05, + "loss": 69.549, + "step": 163040 + }, + { + "epoch": 0.6587426318192285, + "grad_norm": 539.2125854492188, + "learning_rate": 1.3015473828135563e-05, + "loss": 47.8811, + "step": 163050 + }, + { + "epoch": 0.6587830330845962, + "grad_norm": 552.6699829101562, + "learning_rate": 1.3012857191346662e-05, + "loss": 87.0088, + "step": 163060 + }, + { + "epoch": 0.6588234343499638, + "grad_norm": 718.1719970703125, + "learning_rate": 1.3010240690775713e-05, + "loss": 88.4073, + "step": 163070 + }, + { + "epoch": 0.6588638356153315, + "grad_norm": 1012.406494140625, + "learning_rate": 1.3007624326473723e-05, + "loss": 76.847, + "step": 163080 + }, + { + "epoch": 0.6589042368806991, + "grad_norm": 436.86151123046875, + "learning_rate": 1.3005008098491687e-05, + "loss": 77.3907, + "step": 163090 + }, + { + "epoch": 0.6589446381460667, + "grad_norm": 474.73681640625, + "learning_rate": 1.3002392006880626e-05, + "loss": 73.6357, + "step": 163100 + }, + { + "epoch": 0.6589850394114344, + "grad_norm": 904.4951171875, + "learning_rate": 1.2999776051691532e-05, + "loss": 93.2727, + "step": 163110 + }, + { + "epoch": 0.659025440676802, + "grad_norm": 859.4202880859375, + "learning_rate": 1.2997160232975415e-05, + "loss": 93.6704, + "step": 163120 + }, + { + "epoch": 0.6590658419421697, + "grad_norm": 500.0219421386719, + "learning_rate": 1.299454455078326e-05, + "loss": 47.9223, + "step": 163130 + }, + { + "epoch": 0.6591062432075373, + "grad_norm": 628.7444458007812, + "learning_rate": 1.2991929005166064e-05, + "loss": 49.0926, + "step": 163140 + }, + { + "epoch": 0.6591466444729049, + "grad_norm": 695.0300903320312, + "learning_rate": 1.2989313596174824e-05, + "loss": 68.7391, + "step": 163150 + }, + { + "epoch": 0.6591870457382725, + "grad_norm": 651.5755615234375, + "learning_rate": 1.2986698323860525e-05, + "loss": 66.4738, + "step": 163160 + }, + { + "epoch": 0.6592274470036401, + "grad_norm": 677.1963500976562, + "learning_rate": 1.2984083188274145e-05, + "loss": 53.3787, + "step": 163170 + }, + { + "epoch": 0.6592678482690078, + "grad_norm": 506.3239440917969, + "learning_rate": 1.2981468189466684e-05, + "loss": 59.778, + "step": 163180 + }, + { + "epoch": 0.6593082495343754, + "grad_norm": 531.9502563476562, + "learning_rate": 1.2978853327489102e-05, + "loss": 101.51, + "step": 163190 + }, + { + "epoch": 0.659348650799743, + "grad_norm": 571.9376831054688, + "learning_rate": 1.2976238602392402e-05, + "loss": 80.3153, + "step": 163200 + }, + { + "epoch": 0.6593890520651107, + "grad_norm": 321.15277099609375, + "learning_rate": 1.2973624014227544e-05, + "loss": 43.3252, + "step": 163210 + }, + { + "epoch": 0.6594294533304783, + "grad_norm": 621.4388427734375, + "learning_rate": 1.2971009563045502e-05, + "loss": 103.8959, + "step": 163220 + }, + { + "epoch": 0.659469854595846, + "grad_norm": 695.497314453125, + "learning_rate": 1.296839524889725e-05, + "loss": 61.2112, + "step": 163230 + }, + { + "epoch": 0.6595102558612136, + "grad_norm": 773.4712524414062, + "learning_rate": 1.2965781071833752e-05, + "loss": 69.1971, + "step": 163240 + }, + { + "epoch": 0.6595506571265812, + "grad_norm": 753.624755859375, + "learning_rate": 1.2963167031905965e-05, + "loss": 65.29, + "step": 163250 + }, + { + "epoch": 0.6595910583919489, + "grad_norm": 1140.5521240234375, + "learning_rate": 1.2960553129164868e-05, + "loss": 72.8018, + "step": 163260 + }, + { + "epoch": 0.6596314596573165, + "grad_norm": 875.9502563476562, + "learning_rate": 1.2957939363661405e-05, + "loss": 85.2134, + "step": 163270 + }, + { + "epoch": 0.6596718609226842, + "grad_norm": 388.39495849609375, + "learning_rate": 1.2955325735446547e-05, + "loss": 79.9508, + "step": 163280 + }, + { + "epoch": 0.6597122621880517, + "grad_norm": 587.2880859375, + "learning_rate": 1.295271224457124e-05, + "loss": 52.6669, + "step": 163290 + }, + { + "epoch": 0.6597526634534193, + "grad_norm": 717.3226928710938, + "learning_rate": 1.295009889108643e-05, + "loss": 80.8833, + "step": 163300 + }, + { + "epoch": 0.659793064718787, + "grad_norm": 797.6387329101562, + "learning_rate": 1.2947485675043078e-05, + "loss": 65.9774, + "step": 163310 + }, + { + "epoch": 0.6598334659841546, + "grad_norm": 507.2778015136719, + "learning_rate": 1.2944872596492118e-05, + "loss": 40.6026, + "step": 163320 + }, + { + "epoch": 0.6598738672495222, + "grad_norm": 790.5523681640625, + "learning_rate": 1.2942259655484504e-05, + "loss": 70.6933, + "step": 163330 + }, + { + "epoch": 0.6599142685148899, + "grad_norm": 624.9216918945312, + "learning_rate": 1.2939646852071172e-05, + "loss": 77.2558, + "step": 163340 + }, + { + "epoch": 0.6599546697802575, + "grad_norm": 387.4658508300781, + "learning_rate": 1.293703418630305e-05, + "loss": 42.3223, + "step": 163350 + }, + { + "epoch": 0.6599950710456252, + "grad_norm": 1332.978515625, + "learning_rate": 1.2934421658231094e-05, + "loss": 78.3253, + "step": 163360 + }, + { + "epoch": 0.6600354723109928, + "grad_norm": 1188.1224365234375, + "learning_rate": 1.2931809267906226e-05, + "loss": 96.9274, + "step": 163370 + }, + { + "epoch": 0.6600758735763604, + "grad_norm": 1817.00439453125, + "learning_rate": 1.2929197015379367e-05, + "loss": 70.8916, + "step": 163380 + }, + { + "epoch": 0.6601162748417281, + "grad_norm": 537.6634521484375, + "learning_rate": 1.2926584900701458e-05, + "loss": 59.4758, + "step": 163390 + }, + { + "epoch": 0.6601566761070957, + "grad_norm": 524.4244995117188, + "learning_rate": 1.2923972923923412e-05, + "loss": 63.8237, + "step": 163400 + }, + { + "epoch": 0.6601970773724634, + "grad_norm": 1965.5623779296875, + "learning_rate": 1.2921361085096162e-05, + "loss": 133.4546, + "step": 163410 + }, + { + "epoch": 0.6602374786378309, + "grad_norm": 1140.5338134765625, + "learning_rate": 1.2918749384270623e-05, + "loss": 70.1521, + "step": 163420 + }, + { + "epoch": 0.6602778799031985, + "grad_norm": 542.899169921875, + "learning_rate": 1.2916137821497704e-05, + "loss": 52.207, + "step": 163430 + }, + { + "epoch": 0.6603182811685662, + "grad_norm": 955.0792236328125, + "learning_rate": 1.2913526396828333e-05, + "loss": 68.1006, + "step": 163440 + }, + { + "epoch": 0.6603586824339338, + "grad_norm": 528.7733154296875, + "learning_rate": 1.2910915110313412e-05, + "loss": 63.9712, + "step": 163450 + }, + { + "epoch": 0.6603990836993014, + "grad_norm": 390.0037536621094, + "learning_rate": 1.2908303962003848e-05, + "loss": 75.6853, + "step": 163460 + }, + { + "epoch": 0.6604394849646691, + "grad_norm": 296.12310791015625, + "learning_rate": 1.2905692951950548e-05, + "loss": 68.837, + "step": 163470 + }, + { + "epoch": 0.6604798862300367, + "grad_norm": 505.7550354003906, + "learning_rate": 1.2903082080204414e-05, + "loss": 65.1653, + "step": 163480 + }, + { + "epoch": 0.6605202874954044, + "grad_norm": 383.55596923828125, + "learning_rate": 1.2900471346816356e-05, + "loss": 49.5155, + "step": 163490 + }, + { + "epoch": 0.660560688760772, + "grad_norm": 532.6437377929688, + "learning_rate": 1.2897860751837264e-05, + "loss": 67.6802, + "step": 163500 + }, + { + "epoch": 0.6606010900261396, + "grad_norm": 490.95013427734375, + "learning_rate": 1.2895250295318025e-05, + "loss": 36.2071, + "step": 163510 + }, + { + "epoch": 0.6606414912915073, + "grad_norm": 680.7877807617188, + "learning_rate": 1.2892639977309546e-05, + "loss": 56.3237, + "step": 163520 + }, + { + "epoch": 0.6606818925568749, + "grad_norm": 573.7606811523438, + "learning_rate": 1.2890029797862711e-05, + "loss": 82.4938, + "step": 163530 + }, + { + "epoch": 0.6607222938222426, + "grad_norm": 394.6151428222656, + "learning_rate": 1.2887419757028397e-05, + "loss": 71.3474, + "step": 163540 + }, + { + "epoch": 0.6607626950876101, + "grad_norm": 939.590576171875, + "learning_rate": 1.2884809854857503e-05, + "loss": 55.3991, + "step": 163550 + }, + { + "epoch": 0.6608030963529777, + "grad_norm": 800.2073364257812, + "learning_rate": 1.2882200091400903e-05, + "loss": 51.1655, + "step": 163560 + }, + { + "epoch": 0.6608434976183454, + "grad_norm": 1052.583984375, + "learning_rate": 1.287959046670948e-05, + "loss": 75.8156, + "step": 163570 + }, + { + "epoch": 0.660883898883713, + "grad_norm": 714.9737548828125, + "learning_rate": 1.2876980980834106e-05, + "loss": 82.8008, + "step": 163580 + }, + { + "epoch": 0.6609243001490807, + "grad_norm": 643.9711303710938, + "learning_rate": 1.2874371633825646e-05, + "loss": 60.9101, + "step": 163590 + }, + { + "epoch": 0.6609647014144483, + "grad_norm": 503.0452880859375, + "learning_rate": 1.2871762425734989e-05, + "loss": 67.7177, + "step": 163600 + }, + { + "epoch": 0.6610051026798159, + "grad_norm": 707.712646484375, + "learning_rate": 1.2869153356612994e-05, + "loss": 64.2133, + "step": 163610 + }, + { + "epoch": 0.6610455039451836, + "grad_norm": 801.5703125, + "learning_rate": 1.2866544426510515e-05, + "loss": 80.4986, + "step": 163620 + }, + { + "epoch": 0.6610859052105512, + "grad_norm": 977.6168823242188, + "learning_rate": 1.2863935635478435e-05, + "loss": 65.0057, + "step": 163630 + }, + { + "epoch": 0.6611263064759189, + "grad_norm": 300.2110595703125, + "learning_rate": 1.2861326983567599e-05, + "loss": 78.3967, + "step": 163640 + }, + { + "epoch": 0.6611667077412865, + "grad_norm": 707.4789428710938, + "learning_rate": 1.2858718470828873e-05, + "loss": 69.1868, + "step": 163650 + }, + { + "epoch": 0.6612071090066541, + "grad_norm": 734.19970703125, + "learning_rate": 1.2856110097313106e-05, + "loss": 61.5292, + "step": 163660 + }, + { + "epoch": 0.6612475102720217, + "grad_norm": 1028.734375, + "learning_rate": 1.2853501863071143e-05, + "loss": 54.6329, + "step": 163670 + }, + { + "epoch": 0.6612879115373893, + "grad_norm": 598.5595703125, + "learning_rate": 1.285089376815385e-05, + "loss": 73.8539, + "step": 163680 + }, + { + "epoch": 0.661328312802757, + "grad_norm": 765.9414672851562, + "learning_rate": 1.284828581261206e-05, + "loss": 65.1324, + "step": 163690 + }, + { + "epoch": 0.6613687140681246, + "grad_norm": 673.5892944335938, + "learning_rate": 1.2845677996496615e-05, + "loss": 44.9673, + "step": 163700 + }, + { + "epoch": 0.6614091153334922, + "grad_norm": 645.6421508789062, + "learning_rate": 1.2843070319858369e-05, + "loss": 73.4708, + "step": 163710 + }, + { + "epoch": 0.6614495165988599, + "grad_norm": 761.6123657226562, + "learning_rate": 1.2840462782748147e-05, + "loss": 83.6836, + "step": 163720 + }, + { + "epoch": 0.6614899178642275, + "grad_norm": 486.1203918457031, + "learning_rate": 1.2837855385216793e-05, + "loss": 57.2283, + "step": 163730 + }, + { + "epoch": 0.6615303191295951, + "grad_norm": 471.9978332519531, + "learning_rate": 1.2835248127315137e-05, + "loss": 49.2092, + "step": 163740 + }, + { + "epoch": 0.6615707203949628, + "grad_norm": 689.5774536132812, + "learning_rate": 1.2832641009093995e-05, + "loss": 51.7485, + "step": 163750 + }, + { + "epoch": 0.6616111216603304, + "grad_norm": 335.810302734375, + "learning_rate": 1.2830034030604219e-05, + "loss": 52.4853, + "step": 163760 + }, + { + "epoch": 0.6616515229256981, + "grad_norm": 318.2032775878906, + "learning_rate": 1.282742719189662e-05, + "loss": 52.4812, + "step": 163770 + }, + { + "epoch": 0.6616919241910657, + "grad_norm": 356.6837158203125, + "learning_rate": 1.282482049302201e-05, + "loss": 54.9037, + "step": 163780 + }, + { + "epoch": 0.6617323254564333, + "grad_norm": 746.3135375976562, + "learning_rate": 1.282221393403123e-05, + "loss": 75.2706, + "step": 163790 + }, + { + "epoch": 0.6617727267218009, + "grad_norm": 580.2225952148438, + "learning_rate": 1.2819607514975077e-05, + "loss": 70.2328, + "step": 163800 + }, + { + "epoch": 0.6618131279871685, + "grad_norm": 505.9123229980469, + "learning_rate": 1.2817001235904373e-05, + "loss": 109.1371, + "step": 163810 + }, + { + "epoch": 0.6618535292525362, + "grad_norm": 444.1342468261719, + "learning_rate": 1.2814395096869936e-05, + "loss": 41.8882, + "step": 163820 + }, + { + "epoch": 0.6618939305179038, + "grad_norm": 622.047119140625, + "learning_rate": 1.281178909792256e-05, + "loss": 94.4628, + "step": 163830 + }, + { + "epoch": 0.6619343317832714, + "grad_norm": 894.77197265625, + "learning_rate": 1.2809183239113059e-05, + "loss": 105.4047, + "step": 163840 + }, + { + "epoch": 0.6619747330486391, + "grad_norm": 783.7378540039062, + "learning_rate": 1.2806577520492225e-05, + "loss": 73.8258, + "step": 163850 + }, + { + "epoch": 0.6620151343140067, + "grad_norm": 595.0022583007812, + "learning_rate": 1.2803971942110875e-05, + "loss": 128.6606, + "step": 163860 + }, + { + "epoch": 0.6620555355793744, + "grad_norm": 996.3068237304688, + "learning_rate": 1.2801366504019797e-05, + "loss": 86.8564, + "step": 163870 + }, + { + "epoch": 0.662095936844742, + "grad_norm": 269.5316467285156, + "learning_rate": 1.2798761206269773e-05, + "loss": 60.8242, + "step": 163880 + }, + { + "epoch": 0.6621363381101096, + "grad_norm": 413.9193420410156, + "learning_rate": 1.2796156048911619e-05, + "loss": 45.6619, + "step": 163890 + }, + { + "epoch": 0.6621767393754773, + "grad_norm": 891.3602294921875, + "learning_rate": 1.279355103199611e-05, + "loss": 67.1903, + "step": 163900 + }, + { + "epoch": 0.6622171406408449, + "grad_norm": 767.490234375, + "learning_rate": 1.279094615557403e-05, + "loss": 79.457, + "step": 163910 + }, + { + "epoch": 0.6622575419062126, + "grad_norm": 808.9861450195312, + "learning_rate": 1.278834141969617e-05, + "loss": 58.23, + "step": 163920 + }, + { + "epoch": 0.6622979431715801, + "grad_norm": 364.12420654296875, + "learning_rate": 1.2785736824413299e-05, + "loss": 90.3226, + "step": 163930 + }, + { + "epoch": 0.6623383444369477, + "grad_norm": 459.7276611328125, + "learning_rate": 1.278313236977621e-05, + "loss": 61.2227, + "step": 163940 + }, + { + "epoch": 0.6623787457023154, + "grad_norm": 596.0558471679688, + "learning_rate": 1.2780528055835672e-05, + "loss": 70.2616, + "step": 163950 + }, + { + "epoch": 0.662419146967683, + "grad_norm": 1356.2408447265625, + "learning_rate": 1.277792388264245e-05, + "loss": 77.5955, + "step": 163960 + }, + { + "epoch": 0.6624595482330506, + "grad_norm": 594.5468139648438, + "learning_rate": 1.2775319850247325e-05, + "loss": 68.618, + "step": 163970 + }, + { + "epoch": 0.6624999494984183, + "grad_norm": 556.970947265625, + "learning_rate": 1.2772715958701061e-05, + "loss": 46.8667, + "step": 163980 + }, + { + "epoch": 0.6625403507637859, + "grad_norm": 1196.2359619140625, + "learning_rate": 1.2770112208054415e-05, + "loss": 64.4091, + "step": 163990 + }, + { + "epoch": 0.6625807520291536, + "grad_norm": 496.84588623046875, + "learning_rate": 1.2767508598358158e-05, + "loss": 94.2984, + "step": 164000 + }, + { + "epoch": 0.6626211532945212, + "grad_norm": 785.2151489257812, + "learning_rate": 1.2764905129663037e-05, + "loss": 51.036, + "step": 164010 + }, + { + "epoch": 0.6626615545598888, + "grad_norm": 539.268798828125, + "learning_rate": 1.2762301802019823e-05, + "loss": 55.0654, + "step": 164020 + }, + { + "epoch": 0.6627019558252565, + "grad_norm": 1014.3053588867188, + "learning_rate": 1.2759698615479263e-05, + "loss": 71.1372, + "step": 164030 + }, + { + "epoch": 0.6627423570906241, + "grad_norm": 962.7448120117188, + "learning_rate": 1.2757095570092095e-05, + "loss": 71.1523, + "step": 164040 + }, + { + "epoch": 0.6627827583559918, + "grad_norm": 453.8319396972656, + "learning_rate": 1.2754492665909088e-05, + "loss": 67.1578, + "step": 164050 + }, + { + "epoch": 0.6628231596213593, + "grad_norm": 599.8483276367188, + "learning_rate": 1.2751889902980977e-05, + "loss": 69.4516, + "step": 164060 + }, + { + "epoch": 0.6628635608867269, + "grad_norm": 544.8514404296875, + "learning_rate": 1.2749287281358498e-05, + "loss": 79.5923, + "step": 164070 + }, + { + "epoch": 0.6629039621520946, + "grad_norm": 434.2205810546875, + "learning_rate": 1.2746684801092402e-05, + "loss": 68.9575, + "step": 164080 + }, + { + "epoch": 0.6629443634174622, + "grad_norm": 607.1571655273438, + "learning_rate": 1.274408246223341e-05, + "loss": 75.5449, + "step": 164090 + }, + { + "epoch": 0.6629847646828299, + "grad_norm": 457.59344482421875, + "learning_rate": 1.2741480264832276e-05, + "loss": 69.5563, + "step": 164100 + }, + { + "epoch": 0.6630251659481975, + "grad_norm": 978.7197265625, + "learning_rate": 1.2738878208939722e-05, + "loss": 87.2308, + "step": 164110 + }, + { + "epoch": 0.6630655672135651, + "grad_norm": 1076.828125, + "learning_rate": 1.2736276294606466e-05, + "loss": 63.251, + "step": 164120 + }, + { + "epoch": 0.6631059684789328, + "grad_norm": 579.8359985351562, + "learning_rate": 1.273367452188325e-05, + "loss": 70.1568, + "step": 164130 + }, + { + "epoch": 0.6631463697443004, + "grad_norm": 754.3392944335938, + "learning_rate": 1.2731072890820792e-05, + "loss": 61.8639, + "step": 164140 + }, + { + "epoch": 0.663186771009668, + "grad_norm": 403.4517517089844, + "learning_rate": 1.2728471401469806e-05, + "loss": 55.2065, + "step": 164150 + }, + { + "epoch": 0.6632271722750357, + "grad_norm": 643.2023315429688, + "learning_rate": 1.2725870053881013e-05, + "loss": 74.3693, + "step": 164160 + }, + { + "epoch": 0.6632675735404033, + "grad_norm": 1075.7005615234375, + "learning_rate": 1.272326884810513e-05, + "loss": 88.6034, + "step": 164170 + }, + { + "epoch": 0.6633079748057709, + "grad_norm": 1031.884765625, + "learning_rate": 1.272066778419287e-05, + "loss": 95.3536, + "step": 164180 + }, + { + "epoch": 0.6633483760711385, + "grad_norm": 768.5598754882812, + "learning_rate": 1.271806686219494e-05, + "loss": 101.686, + "step": 164190 + }, + { + "epoch": 0.6633887773365061, + "grad_norm": 451.9229431152344, + "learning_rate": 1.2715466082162036e-05, + "loss": 58.7626, + "step": 164200 + }, + { + "epoch": 0.6634291786018738, + "grad_norm": 575.6402587890625, + "learning_rate": 1.271286544414488e-05, + "loss": 60.1065, + "step": 164210 + }, + { + "epoch": 0.6634695798672414, + "grad_norm": 947.1917724609375, + "learning_rate": 1.2710264948194162e-05, + "loss": 95.2893, + "step": 164220 + }, + { + "epoch": 0.6635099811326091, + "grad_norm": 414.499755859375, + "learning_rate": 1.2707664594360578e-05, + "loss": 52.031, + "step": 164230 + }, + { + "epoch": 0.6635503823979767, + "grad_norm": 486.61767578125, + "learning_rate": 1.2705064382694832e-05, + "loss": 40.8457, + "step": 164240 + }, + { + "epoch": 0.6635907836633443, + "grad_norm": 512.0439453125, + "learning_rate": 1.2702464313247606e-05, + "loss": 93.1892, + "step": 164250 + }, + { + "epoch": 0.663631184928712, + "grad_norm": 494.8249206542969, + "learning_rate": 1.2699864386069603e-05, + "loss": 83.4956, + "step": 164260 + }, + { + "epoch": 0.6636715861940796, + "grad_norm": 487.55194091796875, + "learning_rate": 1.26972646012115e-05, + "loss": 73.5705, + "step": 164270 + }, + { + "epoch": 0.6637119874594473, + "grad_norm": 773.4144897460938, + "learning_rate": 1.2694664958723974e-05, + "loss": 75.4842, + "step": 164280 + }, + { + "epoch": 0.6637523887248149, + "grad_norm": 450.8471984863281, + "learning_rate": 1.2692065458657727e-05, + "loss": 67.412, + "step": 164290 + }, + { + "epoch": 0.6637927899901825, + "grad_norm": 537.3154907226562, + "learning_rate": 1.2689466101063426e-05, + "loss": 40.1903, + "step": 164300 + }, + { + "epoch": 0.6638331912555501, + "grad_norm": 510.3832092285156, + "learning_rate": 1.2686866885991735e-05, + "loss": 80.2708, + "step": 164310 + }, + { + "epoch": 0.6638735925209177, + "grad_norm": 641.2826538085938, + "learning_rate": 1.2684267813493353e-05, + "loss": 63.0352, + "step": 164320 + }, + { + "epoch": 0.6639139937862854, + "grad_norm": 646.3179321289062, + "learning_rate": 1.268166888361893e-05, + "loss": 63.1281, + "step": 164330 + }, + { + "epoch": 0.663954395051653, + "grad_norm": 930.9502563476562, + "learning_rate": 1.2679070096419141e-05, + "loss": 58.5624, + "step": 164340 + }, + { + "epoch": 0.6639947963170206, + "grad_norm": 626.4747314453125, + "learning_rate": 1.2676471451944651e-05, + "loss": 76.6372, + "step": 164350 + }, + { + "epoch": 0.6640351975823883, + "grad_norm": 582.95068359375, + "learning_rate": 1.2673872950246113e-05, + "loss": 61.5528, + "step": 164360 + }, + { + "epoch": 0.6640755988477559, + "grad_norm": 712.9442749023438, + "learning_rate": 1.2671274591374203e-05, + "loss": 73.5243, + "step": 164370 + }, + { + "epoch": 0.6641160001131236, + "grad_norm": 781.6553955078125, + "learning_rate": 1.2668676375379558e-05, + "loss": 51.9257, + "step": 164380 + }, + { + "epoch": 0.6641564013784912, + "grad_norm": 332.02362060546875, + "learning_rate": 1.2666078302312852e-05, + "loss": 80.9664, + "step": 164390 + }, + { + "epoch": 0.6641968026438588, + "grad_norm": 460.5972595214844, + "learning_rate": 1.266348037222472e-05, + "loss": 52.8631, + "step": 164400 + }, + { + "epoch": 0.6642372039092265, + "grad_norm": 781.871337890625, + "learning_rate": 1.2660882585165817e-05, + "loss": 59.7023, + "step": 164410 + }, + { + "epoch": 0.6642776051745941, + "grad_norm": 685.9246826171875, + "learning_rate": 1.2658284941186787e-05, + "loss": 75.8905, + "step": 164420 + }, + { + "epoch": 0.6643180064399618, + "grad_norm": 426.0966796875, + "learning_rate": 1.265568744033827e-05, + "loss": 61.1958, + "step": 164430 + }, + { + "epoch": 0.6643584077053293, + "grad_norm": 761.6057739257812, + "learning_rate": 1.2653090082670905e-05, + "loss": 53.0966, + "step": 164440 + }, + { + "epoch": 0.6643988089706969, + "grad_norm": 764.7247314453125, + "learning_rate": 1.2650492868235337e-05, + "loss": 73.7391, + "step": 164450 + }, + { + "epoch": 0.6644392102360646, + "grad_norm": 164.6898651123047, + "learning_rate": 1.2647895797082185e-05, + "loss": 70.4515, + "step": 164460 + }, + { + "epoch": 0.6644796115014322, + "grad_norm": 670.4932250976562, + "learning_rate": 1.2645298869262099e-05, + "loss": 50.5572, + "step": 164470 + }, + { + "epoch": 0.6645200127667998, + "grad_norm": 682.3133544921875, + "learning_rate": 1.2642702084825698e-05, + "loss": 76.8459, + "step": 164480 + }, + { + "epoch": 0.6645604140321675, + "grad_norm": 867.5704345703125, + "learning_rate": 1.2640105443823602e-05, + "loss": 36.3316, + "step": 164490 + }, + { + "epoch": 0.6646008152975351, + "grad_norm": 768.0648803710938, + "learning_rate": 1.2637508946306443e-05, + "loss": 47.7674, + "step": 164500 + }, + { + "epoch": 0.6646412165629028, + "grad_norm": 582.4615478515625, + "learning_rate": 1.2634912592324837e-05, + "loss": 86.7193, + "step": 164510 + }, + { + "epoch": 0.6646816178282704, + "grad_norm": 746.5997314453125, + "learning_rate": 1.2632316381929402e-05, + "loss": 70.4799, + "step": 164520 + }, + { + "epoch": 0.664722019093638, + "grad_norm": 562.1595458984375, + "learning_rate": 1.2629720315170753e-05, + "loss": 55.7654, + "step": 164530 + }, + { + "epoch": 0.6647624203590057, + "grad_norm": 596.5144653320312, + "learning_rate": 1.2627124392099495e-05, + "loss": 72.741, + "step": 164540 + }, + { + "epoch": 0.6648028216243733, + "grad_norm": 1662.4600830078125, + "learning_rate": 1.2624528612766249e-05, + "loss": 95.8436, + "step": 164550 + }, + { + "epoch": 0.664843222889741, + "grad_norm": 840.958251953125, + "learning_rate": 1.2621932977221616e-05, + "loss": 71.8182, + "step": 164560 + }, + { + "epoch": 0.6648836241551085, + "grad_norm": 521.9920654296875, + "learning_rate": 1.261933748551619e-05, + "loss": 86.3905, + "step": 164570 + }, + { + "epoch": 0.6649240254204761, + "grad_norm": 1134.0340576171875, + "learning_rate": 1.2616742137700588e-05, + "loss": 55.7173, + "step": 164580 + }, + { + "epoch": 0.6649644266858438, + "grad_norm": 443.336669921875, + "learning_rate": 1.2614146933825397e-05, + "loss": 56.0771, + "step": 164590 + }, + { + "epoch": 0.6650048279512114, + "grad_norm": 667.9773559570312, + "learning_rate": 1.2611551873941213e-05, + "loss": 56.4001, + "step": 164600 + }, + { + "epoch": 0.665045229216579, + "grad_norm": 626.1738891601562, + "learning_rate": 1.2608956958098633e-05, + "loss": 58.742, + "step": 164610 + }, + { + "epoch": 0.6650856304819467, + "grad_norm": 603.0907592773438, + "learning_rate": 1.260636218634823e-05, + "loss": 81.2939, + "step": 164620 + }, + { + "epoch": 0.6651260317473143, + "grad_norm": 996.8347778320312, + "learning_rate": 1.2603767558740616e-05, + "loss": 86.6408, + "step": 164630 + }, + { + "epoch": 0.665166433012682, + "grad_norm": 743.0521240234375, + "learning_rate": 1.2601173075326359e-05, + "loss": 67.2222, + "step": 164640 + }, + { + "epoch": 0.6652068342780496, + "grad_norm": 601.7693481445312, + "learning_rate": 1.2598578736156033e-05, + "loss": 65.0944, + "step": 164650 + }, + { + "epoch": 0.6652472355434172, + "grad_norm": 913.5000610351562, + "learning_rate": 1.2595984541280232e-05, + "loss": 81.1366, + "step": 164660 + }, + { + "epoch": 0.6652876368087849, + "grad_norm": 369.56781005859375, + "learning_rate": 1.2593390490749529e-05, + "loss": 72.4916, + "step": 164670 + }, + { + "epoch": 0.6653280380741525, + "grad_norm": 782.8217163085938, + "learning_rate": 1.2590796584614484e-05, + "loss": 75.5894, + "step": 164680 + }, + { + "epoch": 0.6653684393395202, + "grad_norm": 378.9518737792969, + "learning_rate": 1.258820282292568e-05, + "loss": 51.5267, + "step": 164690 + }, + { + "epoch": 0.6654088406048877, + "grad_norm": 834.6336669921875, + "learning_rate": 1.2585609205733666e-05, + "loss": 69.0943, + "step": 164700 + }, + { + "epoch": 0.6654492418702553, + "grad_norm": 544.8213500976562, + "learning_rate": 1.258301573308903e-05, + "loss": 58.4755, + "step": 164710 + }, + { + "epoch": 0.665489643135623, + "grad_norm": 549.1813354492188, + "learning_rate": 1.2580422405042318e-05, + "loss": 67.0954, + "step": 164720 + }, + { + "epoch": 0.6655300444009906, + "grad_norm": 1120.6983642578125, + "learning_rate": 1.2577829221644086e-05, + "loss": 90.3229, + "step": 164730 + }, + { + "epoch": 0.6655704456663583, + "grad_norm": 582.8544311523438, + "learning_rate": 1.2575236182944901e-05, + "loss": 111.768, + "step": 164740 + }, + { + "epoch": 0.6656108469317259, + "grad_norm": 670.63330078125, + "learning_rate": 1.257264328899531e-05, + "loss": 61.1108, + "step": 164750 + }, + { + "epoch": 0.6656512481970935, + "grad_norm": 976.0435791015625, + "learning_rate": 1.2570050539845854e-05, + "loss": 63.3765, + "step": 164760 + }, + { + "epoch": 0.6656916494624612, + "grad_norm": 539.5623168945312, + "learning_rate": 1.2567457935547096e-05, + "loss": 62.4187, + "step": 164770 + }, + { + "epoch": 0.6657320507278288, + "grad_norm": 289.2821044921875, + "learning_rate": 1.2564865476149564e-05, + "loss": 64.9918, + "step": 164780 + }, + { + "epoch": 0.6657724519931965, + "grad_norm": 905.7900390625, + "learning_rate": 1.2562273161703814e-05, + "loss": 79.1574, + "step": 164790 + }, + { + "epoch": 0.6658128532585641, + "grad_norm": 636.4286499023438, + "learning_rate": 1.255968099226038e-05, + "loss": 88.3483, + "step": 164800 + }, + { + "epoch": 0.6658532545239317, + "grad_norm": 448.12396240234375, + "learning_rate": 1.2557088967869784e-05, + "loss": 70.1155, + "step": 164810 + }, + { + "epoch": 0.6658936557892993, + "grad_norm": 975.1693725585938, + "learning_rate": 1.255449708858258e-05, + "loss": 67.7664, + "step": 164820 + }, + { + "epoch": 0.6659340570546669, + "grad_norm": 680.4557495117188, + "learning_rate": 1.2551905354449288e-05, + "loss": 67.2388, + "step": 164830 + }, + { + "epoch": 0.6659744583200345, + "grad_norm": 1235.439453125, + "learning_rate": 1.254931376552043e-05, + "loss": 75.98, + "step": 164840 + }, + { + "epoch": 0.6660148595854022, + "grad_norm": 1155.7841796875, + "learning_rate": 1.2546722321846541e-05, + "loss": 78.0746, + "step": 164850 + }, + { + "epoch": 0.6660552608507698, + "grad_norm": 526.6534423828125, + "learning_rate": 1.2544131023478134e-05, + "loss": 58.7437, + "step": 164860 + }, + { + "epoch": 0.6660956621161375, + "grad_norm": 760.8956298828125, + "learning_rate": 1.2541539870465738e-05, + "loss": 66.2371, + "step": 164870 + }, + { + "epoch": 0.6661360633815051, + "grad_norm": 854.1358642578125, + "learning_rate": 1.2538948862859859e-05, + "loss": 69.0985, + "step": 164880 + }, + { + "epoch": 0.6661764646468727, + "grad_norm": 719.61962890625, + "learning_rate": 1.2536358000711007e-05, + "loss": 64.4407, + "step": 164890 + }, + { + "epoch": 0.6662168659122404, + "grad_norm": 2105.343017578125, + "learning_rate": 1.2533767284069707e-05, + "loss": 108.6591, + "step": 164900 + }, + { + "epoch": 0.666257267177608, + "grad_norm": 749.1727905273438, + "learning_rate": 1.2531176712986448e-05, + "loss": 67.4566, + "step": 164910 + }, + { + "epoch": 0.6662976684429757, + "grad_norm": 478.8449401855469, + "learning_rate": 1.252858628751175e-05, + "loss": 58.6638, + "step": 164920 + }, + { + "epoch": 0.6663380697083433, + "grad_norm": 536.7737426757812, + "learning_rate": 1.2525996007696113e-05, + "loss": 86.0519, + "step": 164930 + }, + { + "epoch": 0.666378470973711, + "grad_norm": 392.8258056640625, + "learning_rate": 1.2523405873590027e-05, + "loss": 67.2794, + "step": 164940 + }, + { + "epoch": 0.6664188722390785, + "grad_norm": 895.5051879882812, + "learning_rate": 1.2520815885243996e-05, + "loss": 101.0376, + "step": 164950 + }, + { + "epoch": 0.6664592735044461, + "grad_norm": 773.4527587890625, + "learning_rate": 1.251822604270851e-05, + "loss": 69.2208, + "step": 164960 + }, + { + "epoch": 0.6664996747698138, + "grad_norm": 493.3707275390625, + "learning_rate": 1.251563634603405e-05, + "loss": 54.3137, + "step": 164970 + }, + { + "epoch": 0.6665400760351814, + "grad_norm": 771.895751953125, + "learning_rate": 1.2513046795271122e-05, + "loss": 104.66, + "step": 164980 + }, + { + "epoch": 0.666580477300549, + "grad_norm": 591.312744140625, + "learning_rate": 1.2510457390470192e-05, + "loss": 59.3757, + "step": 164990 + }, + { + "epoch": 0.6666208785659167, + "grad_norm": 591.4776000976562, + "learning_rate": 1.250786813168176e-05, + "loss": 73.0267, + "step": 165000 + }, + { + "epoch": 0.6666612798312843, + "grad_norm": 1286.927001953125, + "learning_rate": 1.2505279018956295e-05, + "loss": 78.6216, + "step": 165010 + }, + { + "epoch": 0.666701681096652, + "grad_norm": 683.5189819335938, + "learning_rate": 1.250269005234427e-05, + "loss": 65.0744, + "step": 165020 + }, + { + "epoch": 0.6667420823620196, + "grad_norm": 708.02734375, + "learning_rate": 1.250010123189617e-05, + "loss": 49.7025, + "step": 165030 + }, + { + "epoch": 0.6667824836273872, + "grad_norm": 776.1995849609375, + "learning_rate": 1.2497512557662455e-05, + "loss": 51.4863, + "step": 165040 + }, + { + "epoch": 0.6668228848927549, + "grad_norm": 662.41650390625, + "learning_rate": 1.2494924029693586e-05, + "loss": 100.2398, + "step": 165050 + }, + { + "epoch": 0.6668632861581225, + "grad_norm": 762.2227172851562, + "learning_rate": 1.2492335648040046e-05, + "loss": 91.4273, + "step": 165060 + }, + { + "epoch": 0.6669036874234902, + "grad_norm": 447.3927307128906, + "learning_rate": 1.248974741275228e-05, + "loss": 68.1618, + "step": 165070 + }, + { + "epoch": 0.6669440886888577, + "grad_norm": 464.33929443359375, + "learning_rate": 1.2487159323880761e-05, + "loss": 98.1342, + "step": 165080 + }, + { + "epoch": 0.6669844899542253, + "grad_norm": 884.4959106445312, + "learning_rate": 1.2484571381475941e-05, + "loss": 98.4963, + "step": 165090 + }, + { + "epoch": 0.667024891219593, + "grad_norm": 784.82568359375, + "learning_rate": 1.2481983585588266e-05, + "loss": 100.9786, + "step": 165100 + }, + { + "epoch": 0.6670652924849606, + "grad_norm": 679.3125610351562, + "learning_rate": 1.2479395936268198e-05, + "loss": 64.2742, + "step": 165110 + }, + { + "epoch": 0.6671056937503282, + "grad_norm": 626.9381103515625, + "learning_rate": 1.247680843356617e-05, + "loss": 54.6444, + "step": 165120 + }, + { + "epoch": 0.6671460950156959, + "grad_norm": 448.9506530761719, + "learning_rate": 1.2474221077532638e-05, + "loss": 76.6746, + "step": 165130 + }, + { + "epoch": 0.6671864962810635, + "grad_norm": 446.64886474609375, + "learning_rate": 1.2471633868218046e-05, + "loss": 54.3846, + "step": 165140 + }, + { + "epoch": 0.6672268975464312, + "grad_norm": 828.7030029296875, + "learning_rate": 1.2469046805672816e-05, + "loss": 76.5914, + "step": 165150 + }, + { + "epoch": 0.6672672988117988, + "grad_norm": 573.408447265625, + "learning_rate": 1.2466459889947403e-05, + "loss": 59.0702, + "step": 165160 + }, + { + "epoch": 0.6673077000771664, + "grad_norm": 422.81256103515625, + "learning_rate": 1.2463873121092236e-05, + "loss": 90.1824, + "step": 165170 + }, + { + "epoch": 0.6673481013425341, + "grad_norm": 378.48345947265625, + "learning_rate": 1.2461286499157733e-05, + "loss": 59.5806, + "step": 165180 + }, + { + "epoch": 0.6673885026079017, + "grad_norm": 822.7426147460938, + "learning_rate": 1.2458700024194339e-05, + "loss": 56.7616, + "step": 165190 + }, + { + "epoch": 0.6674289038732694, + "grad_norm": 644.861083984375, + "learning_rate": 1.2456113696252471e-05, + "loss": 50.0629, + "step": 165200 + }, + { + "epoch": 0.6674693051386369, + "grad_norm": 867.478271484375, + "learning_rate": 1.2453527515382544e-05, + "loss": 59.028, + "step": 165210 + }, + { + "epoch": 0.6675097064040045, + "grad_norm": 884.64111328125, + "learning_rate": 1.245094148163499e-05, + "loss": 82.158, + "step": 165220 + }, + { + "epoch": 0.6675501076693722, + "grad_norm": 1443.7120361328125, + "learning_rate": 1.2448355595060208e-05, + "loss": 92.5875, + "step": 165230 + }, + { + "epoch": 0.6675905089347398, + "grad_norm": 640.9130249023438, + "learning_rate": 1.2445769855708634e-05, + "loss": 78.7477, + "step": 165240 + }, + { + "epoch": 0.6676309102001075, + "grad_norm": 965.7861938476562, + "learning_rate": 1.244318426363066e-05, + "loss": 71.8691, + "step": 165250 + }, + { + "epoch": 0.6676713114654751, + "grad_norm": 721.397705078125, + "learning_rate": 1.2440598818876697e-05, + "loss": 56.3166, + "step": 165260 + }, + { + "epoch": 0.6677117127308427, + "grad_norm": 1002.73876953125, + "learning_rate": 1.2438013521497156e-05, + "loss": 66.4909, + "step": 165270 + }, + { + "epoch": 0.6677521139962104, + "grad_norm": 471.5978088378906, + "learning_rate": 1.2435428371542435e-05, + "loss": 66.9739, + "step": 165280 + }, + { + "epoch": 0.667792515261578, + "grad_norm": 768.2531127929688, + "learning_rate": 1.2432843369062931e-05, + "loss": 76.327, + "step": 165290 + }, + { + "epoch": 0.6678329165269457, + "grad_norm": 506.47857666015625, + "learning_rate": 1.2430258514109043e-05, + "loss": 71.1391, + "step": 165300 + }, + { + "epoch": 0.6678733177923133, + "grad_norm": 900.3447265625, + "learning_rate": 1.2427673806731156e-05, + "loss": 70.9567, + "step": 165310 + }, + { + "epoch": 0.6679137190576809, + "grad_norm": 754.1245727539062, + "learning_rate": 1.2425089246979672e-05, + "loss": 60.5651, + "step": 165320 + }, + { + "epoch": 0.6679541203230486, + "grad_norm": 792.5277099609375, + "learning_rate": 1.2422504834904973e-05, + "loss": 77.8409, + "step": 165330 + }, + { + "epoch": 0.6679945215884161, + "grad_norm": 691.044189453125, + "learning_rate": 1.241992057055744e-05, + "loss": 84.13, + "step": 165340 + }, + { + "epoch": 0.6680349228537837, + "grad_norm": 1199.5198974609375, + "learning_rate": 1.2417336453987461e-05, + "loss": 92.2448, + "step": 165350 + }, + { + "epoch": 0.6680753241191514, + "grad_norm": 787.0972900390625, + "learning_rate": 1.2414752485245413e-05, + "loss": 88.2413, + "step": 165360 + }, + { + "epoch": 0.668115725384519, + "grad_norm": 782.9557495117188, + "learning_rate": 1.2412168664381668e-05, + "loss": 55.4247, + "step": 165370 + }, + { + "epoch": 0.6681561266498867, + "grad_norm": 982.5704956054688, + "learning_rate": 1.2409584991446603e-05, + "loss": 69.5115, + "step": 165380 + }, + { + "epoch": 0.6681965279152543, + "grad_norm": 2754.338623046875, + "learning_rate": 1.240700146649058e-05, + "loss": 74.4554, + "step": 165390 + }, + { + "epoch": 0.6682369291806219, + "grad_norm": 1186.745361328125, + "learning_rate": 1.2404418089563982e-05, + "loss": 71.8933, + "step": 165400 + }, + { + "epoch": 0.6682773304459896, + "grad_norm": 967.9552001953125, + "learning_rate": 1.2401834860717161e-05, + "loss": 85.8938, + "step": 165410 + }, + { + "epoch": 0.6683177317113572, + "grad_norm": 725.123291015625, + "learning_rate": 1.2399251780000474e-05, + "loss": 49.368, + "step": 165420 + }, + { + "epoch": 0.6683581329767249, + "grad_norm": 472.2947998046875, + "learning_rate": 1.2396668847464296e-05, + "loss": 54.1754, + "step": 165430 + }, + { + "epoch": 0.6683985342420925, + "grad_norm": 1490.9659423828125, + "learning_rate": 1.2394086063158968e-05, + "loss": 66.9115, + "step": 165440 + }, + { + "epoch": 0.6684389355074601, + "grad_norm": 1224.425537109375, + "learning_rate": 1.2391503427134852e-05, + "loss": 88.5017, + "step": 165450 + }, + { + "epoch": 0.6684793367728277, + "grad_norm": 500.935791015625, + "learning_rate": 1.2388920939442287e-05, + "loss": 62.1617, + "step": 165460 + }, + { + "epoch": 0.6685197380381953, + "grad_norm": 992.4223022460938, + "learning_rate": 1.2386338600131631e-05, + "loss": 115.9584, + "step": 165470 + }, + { + "epoch": 0.668560139303563, + "grad_norm": 755.9528198242188, + "learning_rate": 1.2383756409253228e-05, + "loss": 57.2509, + "step": 165480 + }, + { + "epoch": 0.6686005405689306, + "grad_norm": 324.3246765136719, + "learning_rate": 1.2381174366857413e-05, + "loss": 60.3939, + "step": 165490 + }, + { + "epoch": 0.6686409418342982, + "grad_norm": 425.8306884765625, + "learning_rate": 1.237859247299452e-05, + "loss": 48.9267, + "step": 165500 + }, + { + "epoch": 0.6686813430996659, + "grad_norm": 781.4254150390625, + "learning_rate": 1.2376010727714896e-05, + "loss": 63.2799, + "step": 165510 + }, + { + "epoch": 0.6687217443650335, + "grad_norm": 444.3708801269531, + "learning_rate": 1.2373429131068862e-05, + "loss": 74.9928, + "step": 165520 + }, + { + "epoch": 0.6687621456304012, + "grad_norm": 495.51397705078125, + "learning_rate": 1.237084768310676e-05, + "loss": 57.5232, + "step": 165530 + }, + { + "epoch": 0.6688025468957688, + "grad_norm": 328.41973876953125, + "learning_rate": 1.2368266383878908e-05, + "loss": 61.0105, + "step": 165540 + }, + { + "epoch": 0.6688429481611364, + "grad_norm": 948.8155517578125, + "learning_rate": 1.2365685233435634e-05, + "loss": 72.3629, + "step": 165550 + }, + { + "epoch": 0.6688833494265041, + "grad_norm": 998.72021484375, + "learning_rate": 1.2363104231827254e-05, + "loss": 71.6022, + "step": 165560 + }, + { + "epoch": 0.6689237506918717, + "grad_norm": 482.0772705078125, + "learning_rate": 1.2360523379104094e-05, + "loss": 39.4305, + "step": 165570 + }, + { + "epoch": 0.6689641519572394, + "grad_norm": 1934.9229736328125, + "learning_rate": 1.2357942675316452e-05, + "loss": 87.0681, + "step": 165580 + }, + { + "epoch": 0.6690045532226069, + "grad_norm": 1283.6273193359375, + "learning_rate": 1.2355362120514665e-05, + "loss": 90.9739, + "step": 165590 + }, + { + "epoch": 0.6690449544879745, + "grad_norm": 960.4305419921875, + "learning_rate": 1.2352781714749016e-05, + "loss": 57.9805, + "step": 165600 + }, + { + "epoch": 0.6690853557533422, + "grad_norm": 662.7755737304688, + "learning_rate": 1.2350201458069834e-05, + "loss": 61.429, + "step": 165610 + }, + { + "epoch": 0.6691257570187098, + "grad_norm": 494.8988952636719, + "learning_rate": 1.2347621350527415e-05, + "loss": 62.5858, + "step": 165620 + }, + { + "epoch": 0.6691661582840774, + "grad_norm": 643.9046630859375, + "learning_rate": 1.2345041392172052e-05, + "loss": 69.2719, + "step": 165630 + }, + { + "epoch": 0.6692065595494451, + "grad_norm": 728.0170288085938, + "learning_rate": 1.2342461583054051e-05, + "loss": 46.1444, + "step": 165640 + }, + { + "epoch": 0.6692469608148127, + "grad_norm": 830.3018798828125, + "learning_rate": 1.2339881923223706e-05, + "loss": 66.2625, + "step": 165650 + }, + { + "epoch": 0.6692873620801804, + "grad_norm": 492.59197998046875, + "learning_rate": 1.2337302412731298e-05, + "loss": 106.1516, + "step": 165660 + }, + { + "epoch": 0.669327763345548, + "grad_norm": 470.95880126953125, + "learning_rate": 1.2334723051627131e-05, + "loss": 65.0397, + "step": 165670 + }, + { + "epoch": 0.6693681646109156, + "grad_norm": 465.9328308105469, + "learning_rate": 1.2332143839961477e-05, + "loss": 67.7862, + "step": 165680 + }, + { + "epoch": 0.6694085658762833, + "grad_norm": 537.541015625, + "learning_rate": 1.2329564777784637e-05, + "loss": 53.5986, + "step": 165690 + }, + { + "epoch": 0.6694489671416509, + "grad_norm": 907.783935546875, + "learning_rate": 1.2326985865146877e-05, + "loss": 88.1286, + "step": 165700 + }, + { + "epoch": 0.6694893684070186, + "grad_norm": 349.593505859375, + "learning_rate": 1.2324407102098474e-05, + "loss": 66.9911, + "step": 165710 + }, + { + "epoch": 0.6695297696723861, + "grad_norm": 462.83587646484375, + "learning_rate": 1.2321828488689716e-05, + "loss": 82.5695, + "step": 165720 + }, + { + "epoch": 0.6695701709377537, + "grad_norm": 1262.3251953125, + "learning_rate": 1.2319250024970857e-05, + "loss": 76.5861, + "step": 165730 + }, + { + "epoch": 0.6696105722031214, + "grad_norm": 980.765625, + "learning_rate": 1.2316671710992169e-05, + "loss": 70.8996, + "step": 165740 + }, + { + "epoch": 0.669650973468489, + "grad_norm": 776.2144775390625, + "learning_rate": 1.2314093546803929e-05, + "loss": 93.0396, + "step": 165750 + }, + { + "epoch": 0.6696913747338566, + "grad_norm": 937.0484008789062, + "learning_rate": 1.2311515532456385e-05, + "loss": 55.9614, + "step": 165760 + }, + { + "epoch": 0.6697317759992243, + "grad_norm": 610.2728881835938, + "learning_rate": 1.2308937667999813e-05, + "loss": 67.6723, + "step": 165770 + }, + { + "epoch": 0.6697721772645919, + "grad_norm": 1199.007080078125, + "learning_rate": 1.2306359953484459e-05, + "loss": 95.622, + "step": 165780 + }, + { + "epoch": 0.6698125785299596, + "grad_norm": 904.8882446289062, + "learning_rate": 1.2303782388960572e-05, + "loss": 101.6446, + "step": 165790 + }, + { + "epoch": 0.6698529797953272, + "grad_norm": 731.574951171875, + "learning_rate": 1.2301204974478413e-05, + "loss": 121.5937, + "step": 165800 + }, + { + "epoch": 0.6698933810606948, + "grad_norm": 521.6705322265625, + "learning_rate": 1.2298627710088231e-05, + "loss": 62.1014, + "step": 165810 + }, + { + "epoch": 0.6699337823260625, + "grad_norm": 608.2456665039062, + "learning_rate": 1.229605059584026e-05, + "loss": 80.1789, + "step": 165820 + }, + { + "epoch": 0.6699741835914301, + "grad_norm": 447.94384765625, + "learning_rate": 1.2293473631784756e-05, + "loss": 115.0095, + "step": 165830 + }, + { + "epoch": 0.6700145848567978, + "grad_norm": 545.1532592773438, + "learning_rate": 1.229089681797194e-05, + "loss": 39.6873, + "step": 165840 + }, + { + "epoch": 0.6700549861221653, + "grad_norm": 844.9697875976562, + "learning_rate": 1.228832015445207e-05, + "loss": 60.3624, + "step": 165850 + }, + { + "epoch": 0.6700953873875329, + "grad_norm": 935.3510131835938, + "learning_rate": 1.2285743641275369e-05, + "loss": 62.4639, + "step": 165860 + }, + { + "epoch": 0.6701357886529006, + "grad_norm": 468.7918701171875, + "learning_rate": 1.2283167278492059e-05, + "loss": 56.8072, + "step": 165870 + }, + { + "epoch": 0.6701761899182682, + "grad_norm": 792.640625, + "learning_rate": 1.2280591066152383e-05, + "loss": 58.5852, + "step": 165880 + }, + { + "epoch": 0.6702165911836359, + "grad_norm": 707.6939697265625, + "learning_rate": 1.2278015004306554e-05, + "loss": 78.0063, + "step": 165890 + }, + { + "epoch": 0.6702569924490035, + "grad_norm": 901.7511596679688, + "learning_rate": 1.2275439093004801e-05, + "loss": 107.0062, + "step": 165900 + }, + { + "epoch": 0.6702973937143711, + "grad_norm": 752.1727905273438, + "learning_rate": 1.2272863332297345e-05, + "loss": 71.8458, + "step": 165910 + }, + { + "epoch": 0.6703377949797388, + "grad_norm": 424.38983154296875, + "learning_rate": 1.2270287722234384e-05, + "loss": 102.6954, + "step": 165920 + }, + { + "epoch": 0.6703781962451064, + "grad_norm": 309.2944030761719, + "learning_rate": 1.2267712262866152e-05, + "loss": 59.5774, + "step": 165930 + }, + { + "epoch": 0.6704185975104741, + "grad_norm": 682.3551025390625, + "learning_rate": 1.2265136954242852e-05, + "loss": 63.4396, + "step": 165940 + }, + { + "epoch": 0.6704589987758417, + "grad_norm": 1001.4012451171875, + "learning_rate": 1.2262561796414679e-05, + "loss": 63.0977, + "step": 165950 + }, + { + "epoch": 0.6704994000412093, + "grad_norm": 389.96014404296875, + "learning_rate": 1.2259986789431855e-05, + "loss": 64.4159, + "step": 165960 + }, + { + "epoch": 0.670539801306577, + "grad_norm": 496.08746337890625, + "learning_rate": 1.225741193334457e-05, + "loss": 58.6656, + "step": 165970 + }, + { + "epoch": 0.6705802025719445, + "grad_norm": 1169.660400390625, + "learning_rate": 1.2254837228203029e-05, + "loss": 83.4523, + "step": 165980 + }, + { + "epoch": 0.6706206038373121, + "grad_norm": 978.981201171875, + "learning_rate": 1.2252262674057427e-05, + "loss": 108.8227, + "step": 165990 + }, + { + "epoch": 0.6706610051026798, + "grad_norm": 1052.437255859375, + "learning_rate": 1.2249688270957942e-05, + "loss": 72.5224, + "step": 166000 + }, + { + "epoch": 0.6707014063680474, + "grad_norm": 935.1222534179688, + "learning_rate": 1.2247114018954781e-05, + "loss": 69.448, + "step": 166010 + }, + { + "epoch": 0.6707418076334151, + "grad_norm": 1109.8660888671875, + "learning_rate": 1.2244539918098127e-05, + "loss": 67.8779, + "step": 166020 + }, + { + "epoch": 0.6707822088987827, + "grad_norm": 1748.253173828125, + "learning_rate": 1.2241965968438149e-05, + "loss": 53.5715, + "step": 166030 + }, + { + "epoch": 0.6708226101641503, + "grad_norm": 749.5118408203125, + "learning_rate": 1.223939217002505e-05, + "loss": 68.0842, + "step": 166040 + }, + { + "epoch": 0.670863011429518, + "grad_norm": 315.0796813964844, + "learning_rate": 1.2236818522908989e-05, + "loss": 68.9191, + "step": 166050 + }, + { + "epoch": 0.6709034126948856, + "grad_norm": 514.7701416015625, + "learning_rate": 1.2234245027140154e-05, + "loss": 55.0747, + "step": 166060 + }, + { + "epoch": 0.6709438139602533, + "grad_norm": 692.8334350585938, + "learning_rate": 1.223167168276871e-05, + "loss": 66.3534, + "step": 166070 + }, + { + "epoch": 0.6709842152256209, + "grad_norm": 630.9699096679688, + "learning_rate": 1.2229098489844816e-05, + "loss": 117.4826, + "step": 166080 + }, + { + "epoch": 0.6710246164909885, + "grad_norm": 681.9636840820312, + "learning_rate": 1.2226525448418655e-05, + "loss": 47.8706, + "step": 166090 + }, + { + "epoch": 0.6710650177563561, + "grad_norm": 336.3450012207031, + "learning_rate": 1.2223952558540387e-05, + "loss": 76.0651, + "step": 166100 + }, + { + "epoch": 0.6711054190217237, + "grad_norm": 1147.4539794921875, + "learning_rate": 1.2221379820260156e-05, + "loss": 86.9108, + "step": 166110 + }, + { + "epoch": 0.6711458202870914, + "grad_norm": 612.3355102539062, + "learning_rate": 1.2218807233628138e-05, + "loss": 69.2344, + "step": 166120 + }, + { + "epoch": 0.671186221552459, + "grad_norm": 741.5166015625, + "learning_rate": 1.2216234798694476e-05, + "loss": 55.6385, + "step": 166130 + }, + { + "epoch": 0.6712266228178266, + "grad_norm": 596.8611450195312, + "learning_rate": 1.2213662515509323e-05, + "loss": 47.2258, + "step": 166140 + }, + { + "epoch": 0.6712670240831943, + "grad_norm": 686.26904296875, + "learning_rate": 1.2211090384122833e-05, + "loss": 65.3514, + "step": 166150 + }, + { + "epoch": 0.6713074253485619, + "grad_norm": 611.7229614257812, + "learning_rate": 1.2208518404585141e-05, + "loss": 70.4748, + "step": 166160 + }, + { + "epoch": 0.6713478266139296, + "grad_norm": 611.1117553710938, + "learning_rate": 1.2205946576946399e-05, + "loss": 60.937, + "step": 166170 + }, + { + "epoch": 0.6713882278792972, + "grad_norm": 815.8084106445312, + "learning_rate": 1.2203374901256742e-05, + "loss": 59.3587, + "step": 166180 + }, + { + "epoch": 0.6714286291446648, + "grad_norm": 777.8250732421875, + "learning_rate": 1.2200803377566293e-05, + "loss": 77.0953, + "step": 166190 + }, + { + "epoch": 0.6714690304100325, + "grad_norm": 1141.532470703125, + "learning_rate": 1.2198232005925208e-05, + "loss": 82.2117, + "step": 166200 + }, + { + "epoch": 0.6715094316754001, + "grad_norm": 1374.11181640625, + "learning_rate": 1.2195660786383599e-05, + "loss": 73.995, + "step": 166210 + }, + { + "epoch": 0.6715498329407678, + "grad_norm": 447.986572265625, + "learning_rate": 1.2193089718991612e-05, + "loss": 78.6983, + "step": 166220 + }, + { + "epoch": 0.6715902342061353, + "grad_norm": 876.3670043945312, + "learning_rate": 1.2190518803799356e-05, + "loss": 54.2557, + "step": 166230 + }, + { + "epoch": 0.6716306354715029, + "grad_norm": 804.3194580078125, + "learning_rate": 1.2187948040856955e-05, + "loss": 61.519, + "step": 166240 + }, + { + "epoch": 0.6716710367368706, + "grad_norm": 401.8904113769531, + "learning_rate": 1.218537743021453e-05, + "loss": 77.2621, + "step": 166250 + }, + { + "epoch": 0.6717114380022382, + "grad_norm": 1034.698974609375, + "learning_rate": 1.2182806971922197e-05, + "loss": 82.1181, + "step": 166260 + }, + { + "epoch": 0.6717518392676058, + "grad_norm": 464.5929260253906, + "learning_rate": 1.218023666603006e-05, + "loss": 52.9468, + "step": 166270 + }, + { + "epoch": 0.6717922405329735, + "grad_norm": 443.2475280761719, + "learning_rate": 1.217766651258824e-05, + "loss": 84.8093, + "step": 166280 + }, + { + "epoch": 0.6718326417983411, + "grad_norm": 648.0079345703125, + "learning_rate": 1.2175096511646829e-05, + "loss": 57.3537, + "step": 166290 + }, + { + "epoch": 0.6718730430637088, + "grad_norm": 386.97637939453125, + "learning_rate": 1.2172526663255953e-05, + "loss": 62.6712, + "step": 166300 + }, + { + "epoch": 0.6719134443290764, + "grad_norm": 495.72271728515625, + "learning_rate": 1.2169956967465698e-05, + "loss": 74.3666, + "step": 166310 + }, + { + "epoch": 0.671953845594444, + "grad_norm": 594.6167602539062, + "learning_rate": 1.2167387424326158e-05, + "loss": 53.5041, + "step": 166320 + }, + { + "epoch": 0.6719942468598117, + "grad_norm": 374.2408752441406, + "learning_rate": 1.2164818033887439e-05, + "loss": 52.9462, + "step": 166330 + }, + { + "epoch": 0.6720346481251793, + "grad_norm": 1366.7916259765625, + "learning_rate": 1.2162248796199623e-05, + "loss": 83.8834, + "step": 166340 + }, + { + "epoch": 0.672075049390547, + "grad_norm": 1245.2694091796875, + "learning_rate": 1.2159679711312794e-05, + "loss": 80.1758, + "step": 166350 + }, + { + "epoch": 0.6721154506559145, + "grad_norm": 410.37017822265625, + "learning_rate": 1.2157110779277055e-05, + "loss": 52.8341, + "step": 166360 + }, + { + "epoch": 0.6721558519212821, + "grad_norm": 674.4738159179688, + "learning_rate": 1.2154542000142473e-05, + "loss": 73.7036, + "step": 166370 + }, + { + "epoch": 0.6721962531866498, + "grad_norm": 2407.911865234375, + "learning_rate": 1.2151973373959138e-05, + "loss": 174.0523, + "step": 166380 + }, + { + "epoch": 0.6722366544520174, + "grad_norm": 339.7121276855469, + "learning_rate": 1.2149404900777127e-05, + "loss": 48.5388, + "step": 166390 + }, + { + "epoch": 0.672277055717385, + "grad_norm": 526.0704956054688, + "learning_rate": 1.2146836580646502e-05, + "loss": 59.7641, + "step": 166400 + }, + { + "epoch": 0.6723174569827527, + "grad_norm": 281.3658752441406, + "learning_rate": 1.2144268413617346e-05, + "loss": 72.2257, + "step": 166410 + }, + { + "epoch": 0.6723578582481203, + "grad_norm": 488.0711975097656, + "learning_rate": 1.2141700399739711e-05, + "loss": 71.1904, + "step": 166420 + }, + { + "epoch": 0.672398259513488, + "grad_norm": 974.0661010742188, + "learning_rate": 1.2139132539063683e-05, + "loss": 71.8517, + "step": 166430 + }, + { + "epoch": 0.6724386607788556, + "grad_norm": 525.4677734375, + "learning_rate": 1.2136564831639314e-05, + "loss": 74.9186, + "step": 166440 + }, + { + "epoch": 0.6724790620442233, + "grad_norm": 457.2035827636719, + "learning_rate": 1.2133997277516652e-05, + "loss": 74.7276, + "step": 166450 + }, + { + "epoch": 0.6725194633095909, + "grad_norm": 688.6111450195312, + "learning_rate": 1.2131429876745774e-05, + "loss": 54.6238, + "step": 166460 + }, + { + "epoch": 0.6725598645749585, + "grad_norm": 423.7134704589844, + "learning_rate": 1.212886262937672e-05, + "loss": 51.7085, + "step": 166470 + }, + { + "epoch": 0.6726002658403262, + "grad_norm": 562.6094360351562, + "learning_rate": 1.2126295535459529e-05, + "loss": 50.3932, + "step": 166480 + }, + { + "epoch": 0.6726406671056937, + "grad_norm": 628.3128051757812, + "learning_rate": 1.2123728595044273e-05, + "loss": 81.4048, + "step": 166490 + }, + { + "epoch": 0.6726810683710613, + "grad_norm": 1044.544921875, + "learning_rate": 1.212116180818098e-05, + "loss": 58.1344, + "step": 166500 + }, + { + "epoch": 0.672721469636429, + "grad_norm": 872.6652221679688, + "learning_rate": 1.2118595174919694e-05, + "loss": 61.7138, + "step": 166510 + }, + { + "epoch": 0.6727618709017966, + "grad_norm": 740.0682983398438, + "learning_rate": 1.2116028695310453e-05, + "loss": 63.5965, + "step": 166520 + }, + { + "epoch": 0.6728022721671643, + "grad_norm": 556.5186157226562, + "learning_rate": 1.2113462369403285e-05, + "loss": 90.3607, + "step": 166530 + }, + { + "epoch": 0.6728426734325319, + "grad_norm": 729.843505859375, + "learning_rate": 1.2110896197248236e-05, + "loss": 85.2275, + "step": 166540 + }, + { + "epoch": 0.6728830746978995, + "grad_norm": 945.718994140625, + "learning_rate": 1.2108330178895326e-05, + "loss": 76.5236, + "step": 166550 + }, + { + "epoch": 0.6729234759632672, + "grad_norm": 1623.5721435546875, + "learning_rate": 1.2105764314394578e-05, + "loss": 64.4294, + "step": 166560 + }, + { + "epoch": 0.6729638772286348, + "grad_norm": 803.6533203125, + "learning_rate": 1.2103198603796024e-05, + "loss": 43.3156, + "step": 166570 + }, + { + "epoch": 0.6730042784940025, + "grad_norm": 834.0337524414062, + "learning_rate": 1.2100633047149675e-05, + "loss": 57.6844, + "step": 166580 + }, + { + "epoch": 0.6730446797593701, + "grad_norm": 271.60931396484375, + "learning_rate": 1.2098067644505555e-05, + "loss": 64.8498, + "step": 166590 + }, + { + "epoch": 0.6730850810247377, + "grad_norm": 481.6324157714844, + "learning_rate": 1.2095502395913676e-05, + "loss": 79.9626, + "step": 166600 + }, + { + "epoch": 0.6731254822901054, + "grad_norm": 724.69580078125, + "learning_rate": 1.2092937301424039e-05, + "loss": 72.9217, + "step": 166610 + }, + { + "epoch": 0.6731658835554729, + "grad_norm": 387.5311279296875, + "learning_rate": 1.2090372361086668e-05, + "loss": 67.8799, + "step": 166620 + }, + { + "epoch": 0.6732062848208406, + "grad_norm": 384.6818542480469, + "learning_rate": 1.208780757495156e-05, + "loss": 58.945, + "step": 166630 + }, + { + "epoch": 0.6732466860862082, + "grad_norm": 406.7601013183594, + "learning_rate": 1.2085242943068707e-05, + "loss": 40.3522, + "step": 166640 + }, + { + "epoch": 0.6732870873515758, + "grad_norm": 510.3577575683594, + "learning_rate": 1.2082678465488127e-05, + "loss": 70.8132, + "step": 166650 + }, + { + "epoch": 0.6733274886169435, + "grad_norm": 576.9485473632812, + "learning_rate": 1.2080114142259802e-05, + "loss": 56.9543, + "step": 166660 + }, + { + "epoch": 0.6733678898823111, + "grad_norm": 673.7516479492188, + "learning_rate": 1.2077549973433736e-05, + "loss": 68.4182, + "step": 166670 + }, + { + "epoch": 0.6734082911476788, + "grad_norm": 1241.95849609375, + "learning_rate": 1.207498595905991e-05, + "loss": 83.6963, + "step": 166680 + }, + { + "epoch": 0.6734486924130464, + "grad_norm": 462.57562255859375, + "learning_rate": 1.2072422099188304e-05, + "loss": 80.0976, + "step": 166690 + }, + { + "epoch": 0.673489093678414, + "grad_norm": 861.7466430664062, + "learning_rate": 1.2069858393868919e-05, + "loss": 68.9454, + "step": 166700 + }, + { + "epoch": 0.6735294949437817, + "grad_norm": 909.3855590820312, + "learning_rate": 1.2067294843151726e-05, + "loss": 57.5464, + "step": 166710 + }, + { + "epoch": 0.6735698962091493, + "grad_norm": 760.2494506835938, + "learning_rate": 1.20647314470867e-05, + "loss": 102.1906, + "step": 166720 + }, + { + "epoch": 0.673610297474517, + "grad_norm": 2617.41162109375, + "learning_rate": 1.2062168205723825e-05, + "loss": 105.3829, + "step": 166730 + }, + { + "epoch": 0.6736506987398845, + "grad_norm": 1193.41796875, + "learning_rate": 1.2059605119113062e-05, + "loss": 72.3875, + "step": 166740 + }, + { + "epoch": 0.6736911000052521, + "grad_norm": 443.9475402832031, + "learning_rate": 1.205704218730439e-05, + "loss": 109.8769, + "step": 166750 + }, + { + "epoch": 0.6737315012706198, + "grad_norm": 1221.44140625, + "learning_rate": 1.2054479410347768e-05, + "loss": 77.1676, + "step": 166760 + }, + { + "epoch": 0.6737719025359874, + "grad_norm": 294.3841247558594, + "learning_rate": 1.2051916788293162e-05, + "loss": 58.2049, + "step": 166770 + }, + { + "epoch": 0.673812303801355, + "grad_norm": 225.46437072753906, + "learning_rate": 1.2049354321190529e-05, + "loss": 70.817, + "step": 166780 + }, + { + "epoch": 0.6738527050667227, + "grad_norm": 2442.77490234375, + "learning_rate": 1.204679200908983e-05, + "loss": 77.911, + "step": 166790 + }, + { + "epoch": 0.6738931063320903, + "grad_norm": 429.2742004394531, + "learning_rate": 1.2044229852041008e-05, + "loss": 98.2419, + "step": 166800 + }, + { + "epoch": 0.673933507597458, + "grad_norm": 807.3250732421875, + "learning_rate": 1.2041667850094028e-05, + "loss": 53.0465, + "step": 166810 + }, + { + "epoch": 0.6739739088628256, + "grad_norm": 588.813720703125, + "learning_rate": 1.2039106003298823e-05, + "loss": 37.1605, + "step": 166820 + }, + { + "epoch": 0.6740143101281932, + "grad_norm": 512.76806640625, + "learning_rate": 1.2036544311705349e-05, + "loss": 79.0908, + "step": 166830 + }, + { + "epoch": 0.6740547113935609, + "grad_norm": 1476.5064697265625, + "learning_rate": 1.203398277536355e-05, + "loss": 58.6217, + "step": 166840 + }, + { + "epoch": 0.6740951126589285, + "grad_norm": 659.18310546875, + "learning_rate": 1.2031421394323348e-05, + "loss": 86.9353, + "step": 166850 + }, + { + "epoch": 0.6741355139242962, + "grad_norm": 920.7645874023438, + "learning_rate": 1.2028860168634695e-05, + "loss": 113.1675, + "step": 166860 + }, + { + "epoch": 0.6741759151896637, + "grad_norm": 737.103759765625, + "learning_rate": 1.2026299098347516e-05, + "loss": 84.5206, + "step": 166870 + }, + { + "epoch": 0.6742163164550313, + "grad_norm": 783.69287109375, + "learning_rate": 1.2023738183511735e-05, + "loss": 53.1821, + "step": 166880 + }, + { + "epoch": 0.674256717720399, + "grad_norm": 1451.1268310546875, + "learning_rate": 1.2021177424177291e-05, + "loss": 63.2539, + "step": 166890 + }, + { + "epoch": 0.6742971189857666, + "grad_norm": 500.6903991699219, + "learning_rate": 1.2018616820394096e-05, + "loss": 67.8192, + "step": 166900 + }, + { + "epoch": 0.6743375202511342, + "grad_norm": 955.1897583007812, + "learning_rate": 1.2016056372212079e-05, + "loss": 68.4222, + "step": 166910 + }, + { + "epoch": 0.6743779215165019, + "grad_norm": 679.8262329101562, + "learning_rate": 1.2013496079681155e-05, + "loss": 70.3682, + "step": 166920 + }, + { + "epoch": 0.6744183227818695, + "grad_norm": 721.1195678710938, + "learning_rate": 1.2010935942851231e-05, + "loss": 53.7912, + "step": 166930 + }, + { + "epoch": 0.6744587240472372, + "grad_norm": 393.74810791015625, + "learning_rate": 1.2008375961772232e-05, + "loss": 40.6098, + "step": 166940 + }, + { + "epoch": 0.6744991253126048, + "grad_norm": 474.65655517578125, + "learning_rate": 1.2005816136494048e-05, + "loss": 62.3952, + "step": 166950 + }, + { + "epoch": 0.6745395265779724, + "grad_norm": 627.7875366210938, + "learning_rate": 1.2003256467066602e-05, + "loss": 75.535, + "step": 166960 + }, + { + "epoch": 0.6745799278433401, + "grad_norm": 1269.926025390625, + "learning_rate": 1.2000696953539786e-05, + "loss": 91.0771, + "step": 166970 + }, + { + "epoch": 0.6746203291087077, + "grad_norm": 397.8035888671875, + "learning_rate": 1.1998137595963497e-05, + "loss": 70.5689, + "step": 166980 + }, + { + "epoch": 0.6746607303740754, + "grad_norm": 487.7453918457031, + "learning_rate": 1.199557839438764e-05, + "loss": 44.0568, + "step": 166990 + }, + { + "epoch": 0.6747011316394429, + "grad_norm": 695.6802978515625, + "learning_rate": 1.1993019348862106e-05, + "loss": 64.0263, + "step": 167000 + }, + { + "epoch": 0.6747415329048105, + "grad_norm": 703.5667724609375, + "learning_rate": 1.1990460459436775e-05, + "loss": 88.2574, + "step": 167010 + }, + { + "epoch": 0.6747819341701782, + "grad_norm": 1236.902587890625, + "learning_rate": 1.1987901726161546e-05, + "loss": 99.1798, + "step": 167020 + }, + { + "epoch": 0.6748223354355458, + "grad_norm": 672.239501953125, + "learning_rate": 1.198534314908629e-05, + "loss": 65.2164, + "step": 167030 + }, + { + "epoch": 0.6748627367009135, + "grad_norm": 515.0927124023438, + "learning_rate": 1.1982784728260906e-05, + "loss": 125.349, + "step": 167040 + }, + { + "epoch": 0.6749031379662811, + "grad_norm": 380.5234069824219, + "learning_rate": 1.1980226463735259e-05, + "loss": 43.5791, + "step": 167050 + }, + { + "epoch": 0.6749435392316487, + "grad_norm": 495.08056640625, + "learning_rate": 1.1977668355559215e-05, + "loss": 45.1195, + "step": 167060 + }, + { + "epoch": 0.6749839404970164, + "grad_norm": 686.7434692382812, + "learning_rate": 1.197511040378267e-05, + "loss": 76.5299, + "step": 167070 + }, + { + "epoch": 0.675024341762384, + "grad_norm": 1143.6102294921875, + "learning_rate": 1.1972552608455477e-05, + "loss": 72.3588, + "step": 167080 + }, + { + "epoch": 0.6750647430277517, + "grad_norm": 1271.12744140625, + "learning_rate": 1.19699949696275e-05, + "loss": 91.4026, + "step": 167090 + }, + { + "epoch": 0.6751051442931193, + "grad_norm": 552.3604125976562, + "learning_rate": 1.1967437487348603e-05, + "loss": 57.9855, + "step": 167100 + }, + { + "epoch": 0.6751455455584869, + "grad_norm": 1048.807373046875, + "learning_rate": 1.1964880161668649e-05, + "loss": 73.4246, + "step": 167110 + }, + { + "epoch": 0.6751859468238546, + "grad_norm": 317.4715270996094, + "learning_rate": 1.1962322992637498e-05, + "loss": 53.5452, + "step": 167120 + }, + { + "epoch": 0.6752263480892221, + "grad_norm": 384.2103271484375, + "learning_rate": 1.1959765980304999e-05, + "loss": 85.8785, + "step": 167130 + }, + { + "epoch": 0.6752667493545897, + "grad_norm": 385.4689636230469, + "learning_rate": 1.195720912472099e-05, + "loss": 61.6669, + "step": 167140 + }, + { + "epoch": 0.6753071506199574, + "grad_norm": 1020.955322265625, + "learning_rate": 1.1954652425935342e-05, + "loss": 56.9206, + "step": 167150 + }, + { + "epoch": 0.675347551885325, + "grad_norm": 503.1684875488281, + "learning_rate": 1.1952095883997883e-05, + "loss": 38.3626, + "step": 167160 + }, + { + "epoch": 0.6753879531506927, + "grad_norm": 427.4813537597656, + "learning_rate": 1.1949539498958454e-05, + "loss": 65.9925, + "step": 167170 + }, + { + "epoch": 0.6754283544160603, + "grad_norm": 597.402099609375, + "learning_rate": 1.19469832708669e-05, + "loss": 79.4978, + "step": 167180 + }, + { + "epoch": 0.675468755681428, + "grad_norm": 544.3772583007812, + "learning_rate": 1.1944427199773054e-05, + "loss": 68.4573, + "step": 167190 + }, + { + "epoch": 0.6755091569467956, + "grad_norm": 974.5628662109375, + "learning_rate": 1.194187128572675e-05, + "loss": 115.501, + "step": 167200 + }, + { + "epoch": 0.6755495582121632, + "grad_norm": 715.9610595703125, + "learning_rate": 1.1939315528777812e-05, + "loss": 83.2299, + "step": 167210 + }, + { + "epoch": 0.6755899594775309, + "grad_norm": 1452.3458251953125, + "learning_rate": 1.1936759928976063e-05, + "loss": 81.0347, + "step": 167220 + }, + { + "epoch": 0.6756303607428985, + "grad_norm": 426.0718688964844, + "learning_rate": 1.1934204486371335e-05, + "loss": 66.3133, + "step": 167230 + }, + { + "epoch": 0.6756707620082661, + "grad_norm": 1433.46533203125, + "learning_rate": 1.1931649201013444e-05, + "loss": 86.0398, + "step": 167240 + }, + { + "epoch": 0.6757111632736337, + "grad_norm": 640.22314453125, + "learning_rate": 1.19290940729522e-05, + "loss": 50.321, + "step": 167250 + }, + { + "epoch": 0.6757515645390013, + "grad_norm": 1062.528564453125, + "learning_rate": 1.1926539102237427e-05, + "loss": 97.7654, + "step": 167260 + }, + { + "epoch": 0.675791965804369, + "grad_norm": 911.935791015625, + "learning_rate": 1.1923984288918928e-05, + "loss": 93.6734, + "step": 167270 + }, + { + "epoch": 0.6758323670697366, + "grad_norm": 1405.3450927734375, + "learning_rate": 1.1921429633046517e-05, + "loss": 56.6048, + "step": 167280 + }, + { + "epoch": 0.6758727683351042, + "grad_norm": 685.3563232421875, + "learning_rate": 1.1918875134669996e-05, + "loss": 60.2568, + "step": 167290 + }, + { + "epoch": 0.6759131696004719, + "grad_norm": 763.909912109375, + "learning_rate": 1.1916320793839154e-05, + "loss": 58.797, + "step": 167300 + }, + { + "epoch": 0.6759535708658395, + "grad_norm": 440.20379638671875, + "learning_rate": 1.191376661060381e-05, + "loss": 76.2107, + "step": 167310 + }, + { + "epoch": 0.6759939721312072, + "grad_norm": 851.7640991210938, + "learning_rate": 1.1911212585013749e-05, + "loss": 53.0123, + "step": 167320 + }, + { + "epoch": 0.6760343733965748, + "grad_norm": 459.8158874511719, + "learning_rate": 1.1908658717118752e-05, + "loss": 58.0758, + "step": 167330 + }, + { + "epoch": 0.6760747746619424, + "grad_norm": 740.8076171875, + "learning_rate": 1.1906105006968631e-05, + "loss": 50.2006, + "step": 167340 + }, + { + "epoch": 0.6761151759273101, + "grad_norm": 670.080322265625, + "learning_rate": 1.1903551454613154e-05, + "loss": 85.9193, + "step": 167350 + }, + { + "epoch": 0.6761555771926777, + "grad_norm": 658.568115234375, + "learning_rate": 1.1900998060102113e-05, + "loss": 83.1912, + "step": 167360 + }, + { + "epoch": 0.6761959784580454, + "grad_norm": 1233.806884765625, + "learning_rate": 1.1898444823485287e-05, + "loss": 100.9495, + "step": 167370 + }, + { + "epoch": 0.6762363797234129, + "grad_norm": 266.8747253417969, + "learning_rate": 1.189589174481244e-05, + "loss": 69.1908, + "step": 167380 + }, + { + "epoch": 0.6762767809887805, + "grad_norm": 577.4727783203125, + "learning_rate": 1.1893338824133363e-05, + "loss": 63.9103, + "step": 167390 + }, + { + "epoch": 0.6763171822541482, + "grad_norm": 568.1558227539062, + "learning_rate": 1.189078606149782e-05, + "loss": 65.6796, + "step": 167400 + }, + { + "epoch": 0.6763575835195158, + "grad_norm": 220.62049865722656, + "learning_rate": 1.1888233456955569e-05, + "loss": 67.5461, + "step": 167410 + }, + { + "epoch": 0.6763979847848834, + "grad_norm": 719.0338745117188, + "learning_rate": 1.1885681010556394e-05, + "loss": 53.5558, + "step": 167420 + }, + { + "epoch": 0.6764383860502511, + "grad_norm": 571.0634155273438, + "learning_rate": 1.188312872235004e-05, + "loss": 47.3405, + "step": 167430 + }, + { + "epoch": 0.6764787873156187, + "grad_norm": 964.6605834960938, + "learning_rate": 1.1880576592386273e-05, + "loss": 77.7957, + "step": 167440 + }, + { + "epoch": 0.6765191885809864, + "grad_norm": 559.594970703125, + "learning_rate": 1.1878024620714844e-05, + "loss": 109.4955, + "step": 167450 + }, + { + "epoch": 0.676559589846354, + "grad_norm": 941.114990234375, + "learning_rate": 1.1875472807385502e-05, + "loss": 86.1889, + "step": 167460 + }, + { + "epoch": 0.6765999911117216, + "grad_norm": 667.2324829101562, + "learning_rate": 1.1872921152448008e-05, + "loss": 46.4109, + "step": 167470 + }, + { + "epoch": 0.6766403923770893, + "grad_norm": 688.8880004882812, + "learning_rate": 1.1870369655952092e-05, + "loss": 76.9515, + "step": 167480 + }, + { + "epoch": 0.6766807936424569, + "grad_norm": 818.0971069335938, + "learning_rate": 1.1867818317947511e-05, + "loss": 60.8222, + "step": 167490 + }, + { + "epoch": 0.6767211949078246, + "grad_norm": 402.2546691894531, + "learning_rate": 1.1865267138484e-05, + "loss": 83.9644, + "step": 167500 + }, + { + "epoch": 0.6767615961731921, + "grad_norm": 1212.820556640625, + "learning_rate": 1.1862716117611287e-05, + "loss": 68.0296, + "step": 167510 + }, + { + "epoch": 0.6768019974385597, + "grad_norm": 726.25244140625, + "learning_rate": 1.1860165255379119e-05, + "loss": 79.5159, + "step": 167520 + }, + { + "epoch": 0.6768423987039274, + "grad_norm": 1219.4525146484375, + "learning_rate": 1.185761455183722e-05, + "loss": 74.133, + "step": 167530 + }, + { + "epoch": 0.676882799969295, + "grad_norm": 655.580810546875, + "learning_rate": 1.1855064007035316e-05, + "loss": 52.2812, + "step": 167540 + }, + { + "epoch": 0.6769232012346627, + "grad_norm": 862.7846069335938, + "learning_rate": 1.1852513621023131e-05, + "loss": 65.1836, + "step": 167550 + }, + { + "epoch": 0.6769636025000303, + "grad_norm": 470.1899108886719, + "learning_rate": 1.1849963393850384e-05, + "loss": 50.0978, + "step": 167560 + }, + { + "epoch": 0.6770040037653979, + "grad_norm": 591.3787841796875, + "learning_rate": 1.1847413325566803e-05, + "loss": 105.2022, + "step": 167570 + }, + { + "epoch": 0.6770444050307656, + "grad_norm": 403.0334167480469, + "learning_rate": 1.1844863416222098e-05, + "loss": 59.0735, + "step": 167580 + }, + { + "epoch": 0.6770848062961332, + "grad_norm": 372.9896545410156, + "learning_rate": 1.184231366586597e-05, + "loss": 49.3718, + "step": 167590 + }, + { + "epoch": 0.6771252075615009, + "grad_norm": 995.4000244140625, + "learning_rate": 1.1839764074548145e-05, + "loss": 98.018, + "step": 167600 + }, + { + "epoch": 0.6771656088268685, + "grad_norm": 499.52642822265625, + "learning_rate": 1.183721464231832e-05, + "loss": 61.4924, + "step": 167610 + }, + { + "epoch": 0.6772060100922361, + "grad_norm": 624.6778564453125, + "learning_rate": 1.1834665369226195e-05, + "loss": 61.4741, + "step": 167620 + }, + { + "epoch": 0.6772464113576038, + "grad_norm": 839.9358520507812, + "learning_rate": 1.1832116255321477e-05, + "loss": 66.4072, + "step": 167630 + }, + { + "epoch": 0.6772868126229713, + "grad_norm": 969.1309204101562, + "learning_rate": 1.182956730065385e-05, + "loss": 57.2066, + "step": 167640 + }, + { + "epoch": 0.6773272138883389, + "grad_norm": 753.1382446289062, + "learning_rate": 1.1827018505273022e-05, + "loss": 67.7781, + "step": 167650 + }, + { + "epoch": 0.6773676151537066, + "grad_norm": 955.0655517578125, + "learning_rate": 1.1824469869228677e-05, + "loss": 57.2009, + "step": 167660 + }, + { + "epoch": 0.6774080164190742, + "grad_norm": 351.1363525390625, + "learning_rate": 1.182192139257049e-05, + "loss": 63.2866, + "step": 167670 + }, + { + "epoch": 0.6774484176844419, + "grad_norm": 759.9522705078125, + "learning_rate": 1.1819373075348163e-05, + "loss": 70.9035, + "step": 167680 + }, + { + "epoch": 0.6774888189498095, + "grad_norm": 235.5863037109375, + "learning_rate": 1.1816824917611375e-05, + "loss": 58.1685, + "step": 167690 + }, + { + "epoch": 0.6775292202151771, + "grad_norm": 934.8428955078125, + "learning_rate": 1.181427691940979e-05, + "loss": 66.1864, + "step": 167700 + }, + { + "epoch": 0.6775696214805448, + "grad_norm": 462.5045166015625, + "learning_rate": 1.1811729080793098e-05, + "loss": 46.3916, + "step": 167710 + }, + { + "epoch": 0.6776100227459124, + "grad_norm": 957.8869018554688, + "learning_rate": 1.1809181401810951e-05, + "loss": 70.1115, + "step": 167720 + }, + { + "epoch": 0.6776504240112801, + "grad_norm": 770.7794799804688, + "learning_rate": 1.1806633882513042e-05, + "loss": 129.0947, + "step": 167730 + }, + { + "epoch": 0.6776908252766477, + "grad_norm": 1124.388671875, + "learning_rate": 1.180408652294902e-05, + "loss": 89.5311, + "step": 167740 + }, + { + "epoch": 0.6777312265420153, + "grad_norm": 342.8364562988281, + "learning_rate": 1.1801539323168545e-05, + "loss": 76.0996, + "step": 167750 + }, + { + "epoch": 0.677771627807383, + "grad_norm": 815.4966430664062, + "learning_rate": 1.1798992283221288e-05, + "loss": 72.8028, + "step": 167760 + }, + { + "epoch": 0.6778120290727505, + "grad_norm": 500.9242248535156, + "learning_rate": 1.17964454031569e-05, + "loss": 111.4141, + "step": 167770 + }, + { + "epoch": 0.6778524303381182, + "grad_norm": 896.1378173828125, + "learning_rate": 1.179389868302503e-05, + "loss": 72.5042, + "step": 167780 + }, + { + "epoch": 0.6778928316034858, + "grad_norm": 514.80029296875, + "learning_rate": 1.1791352122875326e-05, + "loss": 37.5459, + "step": 167790 + }, + { + "epoch": 0.6779332328688534, + "grad_norm": 1271.4891357421875, + "learning_rate": 1.1788805722757442e-05, + "loss": 80.5278, + "step": 167800 + }, + { + "epoch": 0.6779736341342211, + "grad_norm": 998.9160766601562, + "learning_rate": 1.178625948272102e-05, + "loss": 81.6218, + "step": 167810 + }, + { + "epoch": 0.6780140353995887, + "grad_norm": 600.8738403320312, + "learning_rate": 1.1783713402815696e-05, + "loss": 72.7283, + "step": 167820 + }, + { + "epoch": 0.6780544366649564, + "grad_norm": 841.9254760742188, + "learning_rate": 1.1781167483091104e-05, + "loss": 58.0905, + "step": 167830 + }, + { + "epoch": 0.678094837930324, + "grad_norm": 821.2103271484375, + "learning_rate": 1.1778621723596891e-05, + "loss": 73.5896, + "step": 167840 + }, + { + "epoch": 0.6781352391956916, + "grad_norm": 600.17724609375, + "learning_rate": 1.177607612438268e-05, + "loss": 80.1347, + "step": 167850 + }, + { + "epoch": 0.6781756404610593, + "grad_norm": 887.8407592773438, + "learning_rate": 1.1773530685498091e-05, + "loss": 79.7291, + "step": 167860 + }, + { + "epoch": 0.6782160417264269, + "grad_norm": 723.72998046875, + "learning_rate": 1.1770985406992765e-05, + "loss": 54.7439, + "step": 167870 + }, + { + "epoch": 0.6782564429917946, + "grad_norm": 1060.57080078125, + "learning_rate": 1.176844028891631e-05, + "loss": 56.1667, + "step": 167880 + }, + { + "epoch": 0.6782968442571621, + "grad_norm": 372.2135314941406, + "learning_rate": 1.1765895331318354e-05, + "loss": 38.6053, + "step": 167890 + }, + { + "epoch": 0.6783372455225297, + "grad_norm": 593.740234375, + "learning_rate": 1.1763350534248508e-05, + "loss": 63.3821, + "step": 167900 + }, + { + "epoch": 0.6783776467878974, + "grad_norm": 766.187744140625, + "learning_rate": 1.1760805897756375e-05, + "loss": 80.9068, + "step": 167910 + }, + { + "epoch": 0.678418048053265, + "grad_norm": 841.4970092773438, + "learning_rate": 1.1758261421891582e-05, + "loss": 106.7049, + "step": 167920 + }, + { + "epoch": 0.6784584493186326, + "grad_norm": 748.2615966796875, + "learning_rate": 1.1755717106703725e-05, + "loss": 69.2666, + "step": 167930 + }, + { + "epoch": 0.6784988505840003, + "grad_norm": 556.1793823242188, + "learning_rate": 1.17531729522424e-05, + "loss": 41.2951, + "step": 167940 + }, + { + "epoch": 0.6785392518493679, + "grad_norm": 676.7301635742188, + "learning_rate": 1.1750628958557219e-05, + "loss": 73.3166, + "step": 167950 + }, + { + "epoch": 0.6785796531147356, + "grad_norm": 862.8073120117188, + "learning_rate": 1.1748085125697774e-05, + "loss": 43.4557, + "step": 167960 + }, + { + "epoch": 0.6786200543801032, + "grad_norm": 579.6802368164062, + "learning_rate": 1.1745541453713657e-05, + "loss": 67.2458, + "step": 167970 + }, + { + "epoch": 0.6786604556454708, + "grad_norm": 1150.1484375, + "learning_rate": 1.1742997942654464e-05, + "loss": 115.7679, + "step": 167980 + }, + { + "epoch": 0.6787008569108385, + "grad_norm": 154.58078002929688, + "learning_rate": 1.1740454592569765e-05, + "loss": 52.513, + "step": 167990 + }, + { + "epoch": 0.6787412581762061, + "grad_norm": 2308.8984375, + "learning_rate": 1.1737911403509167e-05, + "loss": 111.5955, + "step": 168000 + }, + { + "epoch": 0.6787816594415738, + "grad_norm": 1024.0263671875, + "learning_rate": 1.1735368375522232e-05, + "loss": 99.2105, + "step": 168010 + }, + { + "epoch": 0.6788220607069413, + "grad_norm": 334.57318115234375, + "learning_rate": 1.1732825508658552e-05, + "loss": 53.6722, + "step": 168020 + }, + { + "epoch": 0.6788624619723089, + "grad_norm": 246.52320861816406, + "learning_rate": 1.1730282802967694e-05, + "loss": 96.9085, + "step": 168030 + }, + { + "epoch": 0.6789028632376766, + "grad_norm": 743.0842895507812, + "learning_rate": 1.172774025849923e-05, + "loss": 64.7522, + "step": 168040 + }, + { + "epoch": 0.6789432645030442, + "grad_norm": 380.72607421875, + "learning_rate": 1.1725197875302729e-05, + "loss": 56.0972, + "step": 168050 + }, + { + "epoch": 0.6789836657684118, + "grad_norm": 1005.4395141601562, + "learning_rate": 1.1722655653427757e-05, + "loss": 87.5377, + "step": 168060 + }, + { + "epoch": 0.6790240670337795, + "grad_norm": 761.7730712890625, + "learning_rate": 1.1720113592923865e-05, + "loss": 71.7586, + "step": 168070 + }, + { + "epoch": 0.6790644682991471, + "grad_norm": 891.33984375, + "learning_rate": 1.1717571693840632e-05, + "loss": 67.3383, + "step": 168080 + }, + { + "epoch": 0.6791048695645148, + "grad_norm": 682.4739379882812, + "learning_rate": 1.1715029956227593e-05, + "loss": 90.4025, + "step": 168090 + }, + { + "epoch": 0.6791452708298824, + "grad_norm": 1153.16357421875, + "learning_rate": 1.171248838013432e-05, + "loss": 105.3179, + "step": 168100 + }, + { + "epoch": 0.67918567209525, + "grad_norm": 605.0919799804688, + "learning_rate": 1.170994696561035e-05, + "loss": 60.4549, + "step": 168110 + }, + { + "epoch": 0.6792260733606177, + "grad_norm": 745.8259887695312, + "learning_rate": 1.1707405712705229e-05, + "loss": 76.2288, + "step": 168120 + }, + { + "epoch": 0.6792664746259853, + "grad_norm": 671.2483520507812, + "learning_rate": 1.1704864621468504e-05, + "loss": 55.501, + "step": 168130 + }, + { + "epoch": 0.679306875891353, + "grad_norm": 415.6047058105469, + "learning_rate": 1.1702323691949715e-05, + "loss": 71.7184, + "step": 168140 + }, + { + "epoch": 0.6793472771567205, + "grad_norm": 919.578857421875, + "learning_rate": 1.1699782924198398e-05, + "loss": 52.6729, + "step": 168150 + }, + { + "epoch": 0.6793876784220881, + "grad_norm": 675.0055541992188, + "learning_rate": 1.1697242318264089e-05, + "loss": 62.3935, + "step": 168160 + }, + { + "epoch": 0.6794280796874558, + "grad_norm": 894.4967041015625, + "learning_rate": 1.1694701874196306e-05, + "loss": 85.0189, + "step": 168170 + }, + { + "epoch": 0.6794684809528234, + "grad_norm": 728.6751708984375, + "learning_rate": 1.1692161592044593e-05, + "loss": 68.2073, + "step": 168180 + }, + { + "epoch": 0.6795088822181911, + "grad_norm": 476.74090576171875, + "learning_rate": 1.1689621471858468e-05, + "loss": 58.8045, + "step": 168190 + }, + { + "epoch": 0.6795492834835587, + "grad_norm": 437.4019775390625, + "learning_rate": 1.1687081513687444e-05, + "loss": 52.2863, + "step": 168200 + }, + { + "epoch": 0.6795896847489263, + "grad_norm": 479.1710205078125, + "learning_rate": 1.1684541717581053e-05, + "loss": 66.822, + "step": 168210 + }, + { + "epoch": 0.679630086014294, + "grad_norm": 368.0036926269531, + "learning_rate": 1.16820020835888e-05, + "loss": 74.7626, + "step": 168220 + }, + { + "epoch": 0.6796704872796616, + "grad_norm": 608.8155517578125, + "learning_rate": 1.1679462611760197e-05, + "loss": 45.205, + "step": 168230 + }, + { + "epoch": 0.6797108885450293, + "grad_norm": 1016.4539184570312, + "learning_rate": 1.1676923302144758e-05, + "loss": 57.9328, + "step": 168240 + }, + { + "epoch": 0.6797512898103969, + "grad_norm": 1017.4158325195312, + "learning_rate": 1.167438415479198e-05, + "loss": 66.4923, + "step": 168250 + }, + { + "epoch": 0.6797916910757645, + "grad_norm": 714.875, + "learning_rate": 1.1671845169751368e-05, + "loss": 62.7267, + "step": 168260 + }, + { + "epoch": 0.6798320923411322, + "grad_norm": 560.6956176757812, + "learning_rate": 1.1669306347072424e-05, + "loss": 71.0005, + "step": 168270 + }, + { + "epoch": 0.6798724936064997, + "grad_norm": 479.8105773925781, + "learning_rate": 1.166676768680464e-05, + "loss": 63.36, + "step": 168280 + }, + { + "epoch": 0.6799128948718673, + "grad_norm": 373.44854736328125, + "learning_rate": 1.166422918899752e-05, + "loss": 61.0654, + "step": 168290 + }, + { + "epoch": 0.679953296137235, + "grad_norm": 825.5673217773438, + "learning_rate": 1.1661690853700536e-05, + "loss": 67.8445, + "step": 168300 + }, + { + "epoch": 0.6799936974026026, + "grad_norm": 347.4096984863281, + "learning_rate": 1.165915268096318e-05, + "loss": 52.7944, + "step": 168310 + }, + { + "epoch": 0.6800340986679703, + "grad_norm": 912.6409301757812, + "learning_rate": 1.1656614670834939e-05, + "loss": 60.3428, + "step": 168320 + }, + { + "epoch": 0.6800744999333379, + "grad_norm": 528.2001953125, + "learning_rate": 1.1654076823365285e-05, + "loss": 85.527, + "step": 168330 + }, + { + "epoch": 0.6801149011987055, + "grad_norm": 1218.1666259765625, + "learning_rate": 1.1651539138603705e-05, + "loss": 79.249, + "step": 168340 + }, + { + "epoch": 0.6801553024640732, + "grad_norm": 280.96044921875, + "learning_rate": 1.1649001616599673e-05, + "loss": 122.2802, + "step": 168350 + }, + { + "epoch": 0.6801957037294408, + "grad_norm": 656.7274780273438, + "learning_rate": 1.1646464257402648e-05, + "loss": 75.9104, + "step": 168360 + }, + { + "epoch": 0.6802361049948085, + "grad_norm": 1223.8333740234375, + "learning_rate": 1.1643927061062101e-05, + "loss": 64.2182, + "step": 168370 + }, + { + "epoch": 0.6802765062601761, + "grad_norm": 504.78704833984375, + "learning_rate": 1.1641390027627505e-05, + "loss": 56.8686, + "step": 168380 + }, + { + "epoch": 0.6803169075255437, + "grad_norm": 629.530517578125, + "learning_rate": 1.1638853157148303e-05, + "loss": 69.9676, + "step": 168390 + }, + { + "epoch": 0.6803573087909114, + "grad_norm": 641.1826171875, + "learning_rate": 1.1636316449673974e-05, + "loss": 56.1209, + "step": 168400 + }, + { + "epoch": 0.6803977100562789, + "grad_norm": 1296.3712158203125, + "learning_rate": 1.1633779905253948e-05, + "loss": 50.8906, + "step": 168410 + }, + { + "epoch": 0.6804381113216466, + "grad_norm": 1412.01123046875, + "learning_rate": 1.1631243523937705e-05, + "loss": 60.7751, + "step": 168420 + }, + { + "epoch": 0.6804785125870142, + "grad_norm": 516.8693237304688, + "learning_rate": 1.1628707305774674e-05, + "loss": 59.7449, + "step": 168430 + }, + { + "epoch": 0.6805189138523818, + "grad_norm": 950.3474731445312, + "learning_rate": 1.1626171250814297e-05, + "loss": 102.2852, + "step": 168440 + }, + { + "epoch": 0.6805593151177495, + "grad_norm": 802.9367065429688, + "learning_rate": 1.1623635359106036e-05, + "loss": 96.855, + "step": 168450 + }, + { + "epoch": 0.6805997163831171, + "grad_norm": 878.2551879882812, + "learning_rate": 1.1621099630699306e-05, + "loss": 74.1152, + "step": 168460 + }, + { + "epoch": 0.6806401176484848, + "grad_norm": 1521.4918212890625, + "learning_rate": 1.1618564065643553e-05, + "loss": 67.9229, + "step": 168470 + }, + { + "epoch": 0.6806805189138524, + "grad_norm": 570.3583374023438, + "learning_rate": 1.1616028663988208e-05, + "loss": 43.811, + "step": 168480 + }, + { + "epoch": 0.68072092017922, + "grad_norm": 821.587158203125, + "learning_rate": 1.1613493425782703e-05, + "loss": 55.2566, + "step": 168490 + }, + { + "epoch": 0.6807613214445877, + "grad_norm": 1079.9188232421875, + "learning_rate": 1.1610958351076457e-05, + "loss": 70.1611, + "step": 168500 + }, + { + "epoch": 0.6808017227099553, + "grad_norm": 1062.065673828125, + "learning_rate": 1.1608423439918908e-05, + "loss": 61.9791, + "step": 168510 + }, + { + "epoch": 0.680842123975323, + "grad_norm": 659.7924194335938, + "learning_rate": 1.1605888692359455e-05, + "loss": 66.233, + "step": 168520 + }, + { + "epoch": 0.6808825252406905, + "grad_norm": 631.7788696289062, + "learning_rate": 1.1603354108447524e-05, + "loss": 81.2947, + "step": 168530 + }, + { + "epoch": 0.6809229265060581, + "grad_norm": 144.14576721191406, + "learning_rate": 1.1600819688232528e-05, + "loss": 59.2839, + "step": 168540 + }, + { + "epoch": 0.6809633277714258, + "grad_norm": 782.2645263671875, + "learning_rate": 1.1598285431763875e-05, + "loss": 66.517, + "step": 168550 + }, + { + "epoch": 0.6810037290367934, + "grad_norm": 606.5401611328125, + "learning_rate": 1.1595751339090981e-05, + "loss": 62.2984, + "step": 168560 + }, + { + "epoch": 0.681044130302161, + "grad_norm": 502.2530822753906, + "learning_rate": 1.1593217410263226e-05, + "loss": 62.7367, + "step": 168570 + }, + { + "epoch": 0.6810845315675287, + "grad_norm": 677.3665161132812, + "learning_rate": 1.1590683645330043e-05, + "loss": 66.2776, + "step": 168580 + }, + { + "epoch": 0.6811249328328963, + "grad_norm": 931.2003173828125, + "learning_rate": 1.1588150044340804e-05, + "loss": 56.6279, + "step": 168590 + }, + { + "epoch": 0.681165334098264, + "grad_norm": 1269.64990234375, + "learning_rate": 1.1585616607344909e-05, + "loss": 66.182, + "step": 168600 + }, + { + "epoch": 0.6812057353636316, + "grad_norm": 729.0940551757812, + "learning_rate": 1.158308333439175e-05, + "loss": 67.5764, + "step": 168610 + }, + { + "epoch": 0.6812461366289992, + "grad_norm": 744.4613037109375, + "learning_rate": 1.1580550225530715e-05, + "loss": 77.1028, + "step": 168620 + }, + { + "epoch": 0.6812865378943669, + "grad_norm": 903.7207641601562, + "learning_rate": 1.1578017280811195e-05, + "loss": 63.7993, + "step": 168630 + }, + { + "epoch": 0.6813269391597345, + "grad_norm": 529.43310546875, + "learning_rate": 1.1575484500282559e-05, + "loss": 66.6902, + "step": 168640 + }, + { + "epoch": 0.6813673404251022, + "grad_norm": 782.9328002929688, + "learning_rate": 1.1572951883994187e-05, + "loss": 71.0853, + "step": 168650 + }, + { + "epoch": 0.6814077416904697, + "grad_norm": 438.5823669433594, + "learning_rate": 1.1570419431995459e-05, + "loss": 81.077, + "step": 168660 + }, + { + "epoch": 0.6814481429558373, + "grad_norm": 468.1617736816406, + "learning_rate": 1.1567887144335752e-05, + "loss": 80.8987, + "step": 168670 + }, + { + "epoch": 0.681488544221205, + "grad_norm": 636.2667236328125, + "learning_rate": 1.1565355021064408e-05, + "loss": 66.0853, + "step": 168680 + }, + { + "epoch": 0.6815289454865726, + "grad_norm": 436.4906311035156, + "learning_rate": 1.1562823062230826e-05, + "loss": 50.3527, + "step": 168690 + }, + { + "epoch": 0.6815693467519403, + "grad_norm": 498.015625, + "learning_rate": 1.1560291267884346e-05, + "loss": 58.849, + "step": 168700 + }, + { + "epoch": 0.6816097480173079, + "grad_norm": 948.4981079101562, + "learning_rate": 1.1557759638074333e-05, + "loss": 101.4596, + "step": 168710 + }, + { + "epoch": 0.6816501492826755, + "grad_norm": 464.9395751953125, + "learning_rate": 1.1555228172850148e-05, + "loss": 88.4932, + "step": 168720 + }, + { + "epoch": 0.6816905505480432, + "grad_norm": 718.4957885742188, + "learning_rate": 1.1552696872261125e-05, + "loss": 49.1283, + "step": 168730 + }, + { + "epoch": 0.6817309518134108, + "grad_norm": 562.6753540039062, + "learning_rate": 1.1550165736356638e-05, + "loss": 66.4581, + "step": 168740 + }, + { + "epoch": 0.6817713530787785, + "grad_norm": 522.206298828125, + "learning_rate": 1.1547634765186016e-05, + "loss": 59.6819, + "step": 168750 + }, + { + "epoch": 0.6818117543441461, + "grad_norm": 477.88726806640625, + "learning_rate": 1.1545103958798603e-05, + "loss": 66.8941, + "step": 168760 + }, + { + "epoch": 0.6818521556095137, + "grad_norm": 380.3904724121094, + "learning_rate": 1.1542573317243742e-05, + "loss": 59.6456, + "step": 168770 + }, + { + "epoch": 0.6818925568748814, + "grad_norm": 1507.352783203125, + "learning_rate": 1.1540042840570766e-05, + "loss": 76.5064, + "step": 168780 + }, + { + "epoch": 0.6819329581402489, + "grad_norm": 731.3751831054688, + "learning_rate": 1.153751252882902e-05, + "loss": 65.915, + "step": 168790 + }, + { + "epoch": 0.6819733594056165, + "grad_norm": 968.099609375, + "learning_rate": 1.1534982382067819e-05, + "loss": 67.4513, + "step": 168800 + }, + { + "epoch": 0.6820137606709842, + "grad_norm": 1069.012451171875, + "learning_rate": 1.1532452400336491e-05, + "loss": 67.1571, + "step": 168810 + }, + { + "epoch": 0.6820541619363518, + "grad_norm": 695.016357421875, + "learning_rate": 1.1529922583684363e-05, + "loss": 70.2385, + "step": 168820 + }, + { + "epoch": 0.6820945632017195, + "grad_norm": 460.9320068359375, + "learning_rate": 1.1527392932160764e-05, + "loss": 82.311, + "step": 168830 + }, + { + "epoch": 0.6821349644670871, + "grad_norm": 929.8250122070312, + "learning_rate": 1.1524863445814986e-05, + "loss": 61.5102, + "step": 168840 + }, + { + "epoch": 0.6821753657324547, + "grad_norm": 530.8601684570312, + "learning_rate": 1.1522334124696373e-05, + "loss": 42.4212, + "step": 168850 + }, + { + "epoch": 0.6822157669978224, + "grad_norm": 377.58026123046875, + "learning_rate": 1.1519804968854214e-05, + "loss": 47.5444, + "step": 168860 + }, + { + "epoch": 0.68225616826319, + "grad_norm": 2425.79638671875, + "learning_rate": 1.1517275978337824e-05, + "loss": 75.7946, + "step": 168870 + }, + { + "epoch": 0.6822965695285577, + "grad_norm": 915.8695068359375, + "learning_rate": 1.151474715319651e-05, + "loss": 64.0789, + "step": 168880 + }, + { + "epoch": 0.6823369707939253, + "grad_norm": 1047.0841064453125, + "learning_rate": 1.1512218493479556e-05, + "loss": 60.205, + "step": 168890 + }, + { + "epoch": 0.6823773720592929, + "grad_norm": 966.9735107421875, + "learning_rate": 1.1509689999236288e-05, + "loss": 68.6371, + "step": 168900 + }, + { + "epoch": 0.6824177733246606, + "grad_norm": 781.7451782226562, + "learning_rate": 1.1507161670515976e-05, + "loss": 78.6325, + "step": 168910 + }, + { + "epoch": 0.6824581745900281, + "grad_norm": 793.0674438476562, + "learning_rate": 1.150463350736792e-05, + "loss": 62.7991, + "step": 168920 + }, + { + "epoch": 0.6824985758553958, + "grad_norm": 1062.7755126953125, + "learning_rate": 1.1502105509841408e-05, + "loss": 94.5557, + "step": 168930 + }, + { + "epoch": 0.6825389771207634, + "grad_norm": 979.8131103515625, + "learning_rate": 1.1499577677985725e-05, + "loss": 75.5616, + "step": 168940 + }, + { + "epoch": 0.682579378386131, + "grad_norm": 338.50958251953125, + "learning_rate": 1.1497050011850149e-05, + "loss": 64.4261, + "step": 168950 + }, + { + "epoch": 0.6826197796514987, + "grad_norm": 1059.023681640625, + "learning_rate": 1.1494522511483969e-05, + "loss": 54.4186, + "step": 168960 + }, + { + "epoch": 0.6826601809168663, + "grad_norm": 522.9120483398438, + "learning_rate": 1.1491995176936444e-05, + "loss": 60.5153, + "step": 168970 + }, + { + "epoch": 0.682700582182234, + "grad_norm": 359.6441345214844, + "learning_rate": 1.1489468008256855e-05, + "loss": 77.7611, + "step": 168980 + }, + { + "epoch": 0.6827409834476016, + "grad_norm": 962.939453125, + "learning_rate": 1.1486941005494464e-05, + "loss": 59.2681, + "step": 168990 + }, + { + "epoch": 0.6827813847129692, + "grad_norm": 1242.881591796875, + "learning_rate": 1.1484414168698547e-05, + "loss": 96.9103, + "step": 169000 + }, + { + "epoch": 0.6828217859783369, + "grad_norm": 571.0823364257812, + "learning_rate": 1.1481887497918364e-05, + "loss": 68.1024, + "step": 169010 + }, + { + "epoch": 0.6828621872437045, + "grad_norm": 1016.9942626953125, + "learning_rate": 1.1479360993203154e-05, + "loss": 85.9247, + "step": 169020 + }, + { + "epoch": 0.6829025885090722, + "grad_norm": 581.843017578125, + "learning_rate": 1.1476834654602205e-05, + "loss": 59.145, + "step": 169030 + }, + { + "epoch": 0.6829429897744398, + "grad_norm": 777.2709350585938, + "learning_rate": 1.1474308482164745e-05, + "loss": 63.2816, + "step": 169040 + }, + { + "epoch": 0.6829833910398073, + "grad_norm": 692.43408203125, + "learning_rate": 1.147178247594003e-05, + "loss": 85.2139, + "step": 169050 + }, + { + "epoch": 0.683023792305175, + "grad_norm": 1106.4705810546875, + "learning_rate": 1.1469256635977314e-05, + "loss": 80.1475, + "step": 169060 + }, + { + "epoch": 0.6830641935705426, + "grad_norm": 629.3031005859375, + "learning_rate": 1.1466730962325814e-05, + "loss": 66.894, + "step": 169070 + }, + { + "epoch": 0.6831045948359102, + "grad_norm": 903.1717529296875, + "learning_rate": 1.1464205455034807e-05, + "loss": 62.7596, + "step": 169080 + }, + { + "epoch": 0.6831449961012779, + "grad_norm": 788.1876220703125, + "learning_rate": 1.14616801141535e-05, + "loss": 54.4324, + "step": 169090 + }, + { + "epoch": 0.6831853973666455, + "grad_norm": 617.0003662109375, + "learning_rate": 1.1459154939731131e-05, + "loss": 71.8426, + "step": 169100 + }, + { + "epoch": 0.6832257986320132, + "grad_norm": 2085.249267578125, + "learning_rate": 1.1456629931816936e-05, + "loss": 60.4136, + "step": 169110 + }, + { + "epoch": 0.6832661998973808, + "grad_norm": 670.5505981445312, + "learning_rate": 1.1454105090460149e-05, + "loss": 67.6258, + "step": 169120 + }, + { + "epoch": 0.6833066011627484, + "grad_norm": 665.7911987304688, + "learning_rate": 1.1451580415709973e-05, + "loss": 85.6925, + "step": 169130 + }, + { + "epoch": 0.6833470024281161, + "grad_norm": 246.70425415039062, + "learning_rate": 1.1449055907615638e-05, + "loss": 80.1028, + "step": 169140 + }, + { + "epoch": 0.6833874036934837, + "grad_norm": 119.55757141113281, + "learning_rate": 1.1446531566226363e-05, + "loss": 45.8206, + "step": 169150 + }, + { + "epoch": 0.6834278049588514, + "grad_norm": 261.0589904785156, + "learning_rate": 1.1444007391591355e-05, + "loss": 46.9402, + "step": 169160 + }, + { + "epoch": 0.6834682062242189, + "grad_norm": 488.65496826171875, + "learning_rate": 1.1441483383759838e-05, + "loss": 42.2403, + "step": 169170 + }, + { + "epoch": 0.6835086074895865, + "grad_norm": 573.9131469726562, + "learning_rate": 1.1438959542780989e-05, + "loss": 52.7249, + "step": 169180 + }, + { + "epoch": 0.6835490087549542, + "grad_norm": 1043.4527587890625, + "learning_rate": 1.1436435868704052e-05, + "loss": 77.2955, + "step": 169190 + }, + { + "epoch": 0.6835894100203218, + "grad_norm": 709.8731689453125, + "learning_rate": 1.1433912361578196e-05, + "loss": 46.7496, + "step": 169200 + }, + { + "epoch": 0.6836298112856894, + "grad_norm": 466.0685119628906, + "learning_rate": 1.1431389021452631e-05, + "loss": 104.5651, + "step": 169210 + }, + { + "epoch": 0.6836702125510571, + "grad_norm": 787.2694702148438, + "learning_rate": 1.1428865848376554e-05, + "loss": 63.6203, + "step": 169220 + }, + { + "epoch": 0.6837106138164247, + "grad_norm": 403.7371826171875, + "learning_rate": 1.1426342842399136e-05, + "loss": 68.5214, + "step": 169230 + }, + { + "epoch": 0.6837510150817924, + "grad_norm": 819.4902954101562, + "learning_rate": 1.1423820003569592e-05, + "loss": 107.8724, + "step": 169240 + }, + { + "epoch": 0.68379141634716, + "grad_norm": 627.01904296875, + "learning_rate": 1.1421297331937086e-05, + "loss": 97.2072, + "step": 169250 + }, + { + "epoch": 0.6838318176125276, + "grad_norm": 542.25927734375, + "learning_rate": 1.1418774827550805e-05, + "loss": 95.8945, + "step": 169260 + }, + { + "epoch": 0.6838722188778953, + "grad_norm": 566.6044921875, + "learning_rate": 1.1416252490459925e-05, + "loss": 63.2975, + "step": 169270 + }, + { + "epoch": 0.6839126201432629, + "grad_norm": 983.3857421875, + "learning_rate": 1.1413730320713632e-05, + "loss": 48.7714, + "step": 169280 + }, + { + "epoch": 0.6839530214086306, + "grad_norm": 1201.1895751953125, + "learning_rate": 1.1411208318361069e-05, + "loss": 68.4784, + "step": 169290 + }, + { + "epoch": 0.6839934226739981, + "grad_norm": 630.52587890625, + "learning_rate": 1.1408686483451439e-05, + "loss": 64.963, + "step": 169300 + }, + { + "epoch": 0.6840338239393657, + "grad_norm": 876.9546508789062, + "learning_rate": 1.140616481603388e-05, + "loss": 58.9274, + "step": 169310 + }, + { + "epoch": 0.6840742252047334, + "grad_norm": 558.5263061523438, + "learning_rate": 1.1403643316157563e-05, + "loss": 68.2221, + "step": 169320 + }, + { + "epoch": 0.684114626470101, + "grad_norm": 1336.695068359375, + "learning_rate": 1.1401121983871653e-05, + "loss": 52.5321, + "step": 169330 + }, + { + "epoch": 0.6841550277354687, + "grad_norm": 675.3303833007812, + "learning_rate": 1.139860081922528e-05, + "loss": 64.1633, + "step": 169340 + }, + { + "epoch": 0.6841954290008363, + "grad_norm": 341.4596252441406, + "learning_rate": 1.1396079822267632e-05, + "loss": 89.8055, + "step": 169350 + }, + { + "epoch": 0.6842358302662039, + "grad_norm": 517.6049194335938, + "learning_rate": 1.1393558993047826e-05, + "loss": 78.6987, + "step": 169360 + }, + { + "epoch": 0.6842762315315716, + "grad_norm": 1236.895751953125, + "learning_rate": 1.139103833161502e-05, + "loss": 69.5982, + "step": 169370 + }, + { + "epoch": 0.6843166327969392, + "grad_norm": 515.6644897460938, + "learning_rate": 1.1388517838018354e-05, + "loss": 59.4184, + "step": 169380 + }, + { + "epoch": 0.6843570340623069, + "grad_norm": 530.1216430664062, + "learning_rate": 1.1385997512306966e-05, + "loss": 117.6296, + "step": 169390 + }, + { + "epoch": 0.6843974353276745, + "grad_norm": 703.8475341796875, + "learning_rate": 1.138347735453e-05, + "loss": 83.8708, + "step": 169400 + }, + { + "epoch": 0.6844378365930421, + "grad_norm": 503.04095458984375, + "learning_rate": 1.1380957364736567e-05, + "loss": 103.9953, + "step": 169410 + }, + { + "epoch": 0.6844782378584098, + "grad_norm": 527.3914184570312, + "learning_rate": 1.1378437542975811e-05, + "loss": 63.354, + "step": 169420 + }, + { + "epoch": 0.6845186391237773, + "grad_norm": 557.2896728515625, + "learning_rate": 1.1375917889296853e-05, + "loss": 84.9228, + "step": 169430 + }, + { + "epoch": 0.684559040389145, + "grad_norm": 418.055419921875, + "learning_rate": 1.1373398403748825e-05, + "loss": 54.9363, + "step": 169440 + }, + { + "epoch": 0.6845994416545126, + "grad_norm": 608.9022216796875, + "learning_rate": 1.1370879086380819e-05, + "loss": 58.7656, + "step": 169450 + }, + { + "epoch": 0.6846398429198802, + "grad_norm": 351.54779052734375, + "learning_rate": 1.1368359937241984e-05, + "loss": 44.0733, + "step": 169460 + }, + { + "epoch": 0.6846802441852479, + "grad_norm": 523.9404907226562, + "learning_rate": 1.1365840956381406e-05, + "loss": 71.0629, + "step": 169470 + }, + { + "epoch": 0.6847206454506155, + "grad_norm": 943.2615356445312, + "learning_rate": 1.1363322143848207e-05, + "loss": 69.7818, + "step": 169480 + }, + { + "epoch": 0.6847610467159831, + "grad_norm": 595.4908447265625, + "learning_rate": 1.1360803499691495e-05, + "loss": 88.6626, + "step": 169490 + }, + { + "epoch": 0.6848014479813508, + "grad_norm": 302.854248046875, + "learning_rate": 1.1358285023960351e-05, + "loss": 34.9608, + "step": 169500 + }, + { + "epoch": 0.6848418492467184, + "grad_norm": 515.0999145507812, + "learning_rate": 1.1355766716703909e-05, + "loss": 77.4052, + "step": 169510 + }, + { + "epoch": 0.6848822505120861, + "grad_norm": 303.788330078125, + "learning_rate": 1.1353248577971225e-05, + "loss": 104.1116, + "step": 169520 + }, + { + "epoch": 0.6849226517774537, + "grad_norm": 686.0322265625, + "learning_rate": 1.1350730607811429e-05, + "loss": 64.5159, + "step": 169530 + }, + { + "epoch": 0.6849630530428213, + "grad_norm": 1145.6875, + "learning_rate": 1.1348212806273583e-05, + "loss": 85.4398, + "step": 169540 + }, + { + "epoch": 0.685003454308189, + "grad_norm": 438.7492980957031, + "learning_rate": 1.1345695173406786e-05, + "loss": 53.0312, + "step": 169550 + }, + { + "epoch": 0.6850438555735565, + "grad_norm": 455.1004943847656, + "learning_rate": 1.1343177709260122e-05, + "loss": 57.8113, + "step": 169560 + }, + { + "epoch": 0.6850842568389242, + "grad_norm": 855.119384765625, + "learning_rate": 1.134066041388266e-05, + "loss": 61.6608, + "step": 169570 + }, + { + "epoch": 0.6851246581042918, + "grad_norm": 650.989013671875, + "learning_rate": 1.133814328732348e-05, + "loss": 55.1558, + "step": 169580 + }, + { + "epoch": 0.6851650593696594, + "grad_norm": 607.4940185546875, + "learning_rate": 1.1335626329631655e-05, + "loss": 55.8691, + "step": 169590 + }, + { + "epoch": 0.6852054606350271, + "grad_norm": 268.4606628417969, + "learning_rate": 1.1333109540856257e-05, + "loss": 73.5036, + "step": 169600 + }, + { + "epoch": 0.6852458619003947, + "grad_norm": 577.3482666015625, + "learning_rate": 1.133059292104635e-05, + "loss": 52.2734, + "step": 169610 + }, + { + "epoch": 0.6852862631657624, + "grad_norm": 632.328369140625, + "learning_rate": 1.1328076470251007e-05, + "loss": 70.3792, + "step": 169620 + }, + { + "epoch": 0.68532666443113, + "grad_norm": 290.99566650390625, + "learning_rate": 1.132556018851926e-05, + "loss": 79.6498, + "step": 169630 + }, + { + "epoch": 0.6853670656964976, + "grad_norm": 289.7782287597656, + "learning_rate": 1.1323044075900198e-05, + "loss": 80.7702, + "step": 169640 + }, + { + "epoch": 0.6854074669618653, + "grad_norm": 703.7431030273438, + "learning_rate": 1.1320528132442852e-05, + "loss": 71.1618, + "step": 169650 + }, + { + "epoch": 0.6854478682272329, + "grad_norm": 1495.007080078125, + "learning_rate": 1.131801235819628e-05, + "loss": 73.1452, + "step": 169660 + }, + { + "epoch": 0.6854882694926006, + "grad_norm": 659.1622314453125, + "learning_rate": 1.1315496753209531e-05, + "loss": 71.0184, + "step": 169670 + }, + { + "epoch": 0.6855286707579682, + "grad_norm": 817.4584350585938, + "learning_rate": 1.1312981317531633e-05, + "loss": 59.1941, + "step": 169680 + }, + { + "epoch": 0.6855690720233357, + "grad_norm": 1046.4639892578125, + "learning_rate": 1.1310466051211647e-05, + "loss": 76.8246, + "step": 169690 + }, + { + "epoch": 0.6856094732887034, + "grad_norm": 1424.8743896484375, + "learning_rate": 1.1307950954298597e-05, + "loss": 73.654, + "step": 169700 + }, + { + "epoch": 0.685649874554071, + "grad_norm": 462.9339599609375, + "learning_rate": 1.1305436026841514e-05, + "loss": 72.103, + "step": 169710 + }, + { + "epoch": 0.6856902758194386, + "grad_norm": 1118.06005859375, + "learning_rate": 1.1302921268889432e-05, + "loss": 80.988, + "step": 169720 + }, + { + "epoch": 0.6857306770848063, + "grad_norm": 901.210693359375, + "learning_rate": 1.130040668049139e-05, + "loss": 54.7837, + "step": 169730 + }, + { + "epoch": 0.6857710783501739, + "grad_norm": 298.67962646484375, + "learning_rate": 1.1297892261696384e-05, + "loss": 81.1369, + "step": 169740 + }, + { + "epoch": 0.6858114796155416, + "grad_norm": 506.9038391113281, + "learning_rate": 1.1295378012553454e-05, + "loss": 101.6828, + "step": 169750 + }, + { + "epoch": 0.6858518808809092, + "grad_norm": 565.6929931640625, + "learning_rate": 1.1292863933111607e-05, + "loss": 67.7951, + "step": 169760 + }, + { + "epoch": 0.6858922821462768, + "grad_norm": 568.670654296875, + "learning_rate": 1.1290350023419862e-05, + "loss": 54.5553, + "step": 169770 + }, + { + "epoch": 0.6859326834116445, + "grad_norm": 788.04248046875, + "learning_rate": 1.1287836283527236e-05, + "loss": 48.6146, + "step": 169780 + }, + { + "epoch": 0.6859730846770121, + "grad_norm": 2973.152587890625, + "learning_rate": 1.128532271348271e-05, + "loss": 103.2645, + "step": 169790 + }, + { + "epoch": 0.6860134859423798, + "grad_norm": 604.1214599609375, + "learning_rate": 1.1282809313335322e-05, + "loss": 69.0883, + "step": 169800 + }, + { + "epoch": 0.6860538872077473, + "grad_norm": 496.0331115722656, + "learning_rate": 1.1280296083134044e-05, + "loss": 62.7174, + "step": 169810 + }, + { + "epoch": 0.6860942884731149, + "grad_norm": 541.9548950195312, + "learning_rate": 1.1277783022927883e-05, + "loss": 41.1596, + "step": 169820 + }, + { + "epoch": 0.6861346897384826, + "grad_norm": 1179.6707763671875, + "learning_rate": 1.1275270132765843e-05, + "loss": 92.4057, + "step": 169830 + }, + { + "epoch": 0.6861750910038502, + "grad_norm": 428.2345275878906, + "learning_rate": 1.1272757412696884e-05, + "loss": 79.5643, + "step": 169840 + }, + { + "epoch": 0.6862154922692179, + "grad_norm": 863.383544921875, + "learning_rate": 1.1270244862770033e-05, + "loss": 68.5366, + "step": 169850 + }, + { + "epoch": 0.6862558935345855, + "grad_norm": 736.5138549804688, + "learning_rate": 1.1267732483034241e-05, + "loss": 53.9513, + "step": 169860 + }, + { + "epoch": 0.6862962947999531, + "grad_norm": 1805.254150390625, + "learning_rate": 1.1265220273538502e-05, + "loss": 108.2044, + "step": 169870 + }, + { + "epoch": 0.6863366960653208, + "grad_norm": 1606.963623046875, + "learning_rate": 1.126270823433179e-05, + "loss": 73.591, + "step": 169880 + }, + { + "epoch": 0.6863770973306884, + "grad_norm": 993.1559448242188, + "learning_rate": 1.1260196365463085e-05, + "loss": 57.562, + "step": 169890 + }, + { + "epoch": 0.686417498596056, + "grad_norm": 852.89501953125, + "learning_rate": 1.1257684666981348e-05, + "loss": 70.1756, + "step": 169900 + }, + { + "epoch": 0.6864578998614237, + "grad_norm": 364.760498046875, + "learning_rate": 1.1255173138935546e-05, + "loss": 53.3267, + "step": 169910 + }, + { + "epoch": 0.6864983011267913, + "grad_norm": 554.30029296875, + "learning_rate": 1.1252661781374648e-05, + "loss": 93.9288, + "step": 169920 + }, + { + "epoch": 0.686538702392159, + "grad_norm": 1306.660888671875, + "learning_rate": 1.125015059434761e-05, + "loss": 82.5421, + "step": 169930 + }, + { + "epoch": 0.6865791036575265, + "grad_norm": 849.1236572265625, + "learning_rate": 1.1247639577903397e-05, + "loss": 62.6991, + "step": 169940 + }, + { + "epoch": 0.6866195049228941, + "grad_norm": 337.93267822265625, + "learning_rate": 1.1245128732090944e-05, + "loss": 68.026, + "step": 169950 + }, + { + "epoch": 0.6866599061882618, + "grad_norm": 922.066650390625, + "learning_rate": 1.124261805695923e-05, + "loss": 129.3159, + "step": 169960 + }, + { + "epoch": 0.6867003074536294, + "grad_norm": 315.5467529296875, + "learning_rate": 1.1240107552557176e-05, + "loss": 72.066, + "step": 169970 + }, + { + "epoch": 0.6867407087189971, + "grad_norm": 755.0361938476562, + "learning_rate": 1.1237597218933733e-05, + "loss": 82.2767, + "step": 169980 + }, + { + "epoch": 0.6867811099843647, + "grad_norm": 901.6012573242188, + "learning_rate": 1.1235087056137849e-05, + "loss": 69.271, + "step": 169990 + }, + { + "epoch": 0.6868215112497323, + "grad_norm": 277.63177490234375, + "learning_rate": 1.123257706421845e-05, + "loss": 73.8856, + "step": 170000 + }, + { + "epoch": 0.6868619125151, + "grad_norm": 632.3141479492188, + "learning_rate": 1.1230067243224485e-05, + "loss": 69.5837, + "step": 170010 + }, + { + "epoch": 0.6869023137804676, + "grad_norm": 936.0242919921875, + "learning_rate": 1.1227557593204867e-05, + "loss": 68.7608, + "step": 170020 + }, + { + "epoch": 0.6869427150458353, + "grad_norm": 954.6158447265625, + "learning_rate": 1.122504811420853e-05, + "loss": 58.6869, + "step": 170030 + }, + { + "epoch": 0.6869831163112029, + "grad_norm": 902.296875, + "learning_rate": 1.1222538806284395e-05, + "loss": 76.3614, + "step": 170040 + }, + { + "epoch": 0.6870235175765705, + "grad_norm": 550.283203125, + "learning_rate": 1.1220029669481388e-05, + "loss": 66.7607, + "step": 170050 + }, + { + "epoch": 0.6870639188419382, + "grad_norm": 517.638916015625, + "learning_rate": 1.1217520703848421e-05, + "loss": 62.4976, + "step": 170060 + }, + { + "epoch": 0.6871043201073057, + "grad_norm": 999.3446655273438, + "learning_rate": 1.1215011909434419e-05, + "loss": 67.7988, + "step": 170070 + }, + { + "epoch": 0.6871447213726734, + "grad_norm": 644.5501098632812, + "learning_rate": 1.1212503286288272e-05, + "loss": 53.3776, + "step": 170080 + }, + { + "epoch": 0.687185122638041, + "grad_norm": 569.2724609375, + "learning_rate": 1.1209994834458899e-05, + "loss": 61.9794, + "step": 170090 + }, + { + "epoch": 0.6872255239034086, + "grad_norm": 854.2957763671875, + "learning_rate": 1.1207486553995213e-05, + "loss": 58.9296, + "step": 170100 + }, + { + "epoch": 0.6872659251687763, + "grad_norm": 344.0550537109375, + "learning_rate": 1.1204978444946084e-05, + "loss": 57.4208, + "step": 170110 + }, + { + "epoch": 0.6873063264341439, + "grad_norm": 738.8593139648438, + "learning_rate": 1.1202470507360443e-05, + "loss": 60.8665, + "step": 170120 + }, + { + "epoch": 0.6873467276995116, + "grad_norm": 773.6167602539062, + "learning_rate": 1.1199962741287154e-05, + "loss": 68.7945, + "step": 170130 + }, + { + "epoch": 0.6873871289648792, + "grad_norm": 1560.46533203125, + "learning_rate": 1.119745514677514e-05, + "loss": 85.5384, + "step": 170140 + }, + { + "epoch": 0.6874275302302468, + "grad_norm": 715.1464233398438, + "learning_rate": 1.119494772387326e-05, + "loss": 60.4295, + "step": 170150 + }, + { + "epoch": 0.6874679314956145, + "grad_norm": 354.99237060546875, + "learning_rate": 1.1192440472630408e-05, + "loss": 52.632, + "step": 170160 + }, + { + "epoch": 0.6875083327609821, + "grad_norm": 669.1986083984375, + "learning_rate": 1.118993339309547e-05, + "loss": 45.1908, + "step": 170170 + }, + { + "epoch": 0.6875487340263498, + "grad_norm": 473.1531066894531, + "learning_rate": 1.118742648531731e-05, + "loss": 101.9115, + "step": 170180 + }, + { + "epoch": 0.6875891352917174, + "grad_norm": 1083.9935302734375, + "learning_rate": 1.1184919749344807e-05, + "loss": 77.1716, + "step": 170190 + }, + { + "epoch": 0.6876295365570849, + "grad_norm": 788.794189453125, + "learning_rate": 1.1182413185226833e-05, + "loss": 54.2379, + "step": 170200 + }, + { + "epoch": 0.6876699378224526, + "grad_norm": 884.9489135742188, + "learning_rate": 1.117990679301225e-05, + "loss": 78.813, + "step": 170210 + }, + { + "epoch": 0.6877103390878202, + "grad_norm": 833.0835571289062, + "learning_rate": 1.1177400572749927e-05, + "loss": 88.2412, + "step": 170220 + }, + { + "epoch": 0.6877507403531878, + "grad_norm": 751.8715209960938, + "learning_rate": 1.117489452448873e-05, + "loss": 69.7383, + "step": 170230 + }, + { + "epoch": 0.6877911416185555, + "grad_norm": 382.5711975097656, + "learning_rate": 1.1172388648277498e-05, + "loss": 62.0864, + "step": 170240 + }, + { + "epoch": 0.6878315428839231, + "grad_norm": 493.5008239746094, + "learning_rate": 1.1169882944165097e-05, + "loss": 78.9679, + "step": 170250 + }, + { + "epoch": 0.6878719441492908, + "grad_norm": 7315.1064453125, + "learning_rate": 1.1167377412200376e-05, + "loss": 102.4346, + "step": 170260 + }, + { + "epoch": 0.6879123454146584, + "grad_norm": 861.1680908203125, + "learning_rate": 1.1164872052432173e-05, + "loss": 69.2643, + "step": 170270 + }, + { + "epoch": 0.687952746680026, + "grad_norm": 830.2495727539062, + "learning_rate": 1.1162366864909351e-05, + "loss": 58.5761, + "step": 170280 + }, + { + "epoch": 0.6879931479453937, + "grad_norm": 397.1677551269531, + "learning_rate": 1.115986184968072e-05, + "loss": 69.7504, + "step": 170290 + }, + { + "epoch": 0.6880335492107613, + "grad_norm": 414.6068115234375, + "learning_rate": 1.1157357006795149e-05, + "loss": 40.7245, + "step": 170300 + }, + { + "epoch": 0.688073950476129, + "grad_norm": 577.7195434570312, + "learning_rate": 1.1154852336301448e-05, + "loss": 40.8022, + "step": 170310 + }, + { + "epoch": 0.6881143517414965, + "grad_norm": 411.77166748046875, + "learning_rate": 1.1152347838248453e-05, + "loss": 56.3541, + "step": 170320 + }, + { + "epoch": 0.6881547530068641, + "grad_norm": 607.9141235351562, + "learning_rate": 1.1149843512684995e-05, + "loss": 94.2162, + "step": 170330 + }, + { + "epoch": 0.6881951542722318, + "grad_norm": 808.8927001953125, + "learning_rate": 1.1147339359659899e-05, + "loss": 83.7594, + "step": 170340 + }, + { + "epoch": 0.6882355555375994, + "grad_norm": 631.6223754882812, + "learning_rate": 1.1144835379221974e-05, + "loss": 67.0791, + "step": 170350 + }, + { + "epoch": 0.688275956802967, + "grad_norm": 596.3816528320312, + "learning_rate": 1.114233157142004e-05, + "loss": 57.7944, + "step": 170360 + }, + { + "epoch": 0.6883163580683347, + "grad_norm": 885.7195434570312, + "learning_rate": 1.1139827936302914e-05, + "loss": 116.216, + "step": 170370 + }, + { + "epoch": 0.6883567593337023, + "grad_norm": 413.2193298339844, + "learning_rate": 1.1137324473919405e-05, + "loss": 71.2106, + "step": 170380 + }, + { + "epoch": 0.68839716059907, + "grad_norm": 1438.957763671875, + "learning_rate": 1.1134821184318323e-05, + "loss": 61.7235, + "step": 170390 + }, + { + "epoch": 0.6884375618644376, + "grad_norm": 430.3702087402344, + "learning_rate": 1.1132318067548451e-05, + "loss": 62.6104, + "step": 170400 + }, + { + "epoch": 0.6884779631298052, + "grad_norm": 794.7964477539062, + "learning_rate": 1.1129815123658622e-05, + "loss": 64.73, + "step": 170410 + }, + { + "epoch": 0.6885183643951729, + "grad_norm": 826.8369750976562, + "learning_rate": 1.1127312352697603e-05, + "loss": 61.2301, + "step": 170420 + }, + { + "epoch": 0.6885587656605405, + "grad_norm": 183.0109100341797, + "learning_rate": 1.1124809754714198e-05, + "loss": 73.4023, + "step": 170430 + }, + { + "epoch": 0.6885991669259082, + "grad_norm": 1308.69287109375, + "learning_rate": 1.1122307329757207e-05, + "loss": 61.2556, + "step": 170440 + }, + { + "epoch": 0.6886395681912757, + "grad_norm": 949.5484008789062, + "learning_rate": 1.1119805077875388e-05, + "loss": 45.5966, + "step": 170450 + }, + { + "epoch": 0.6886799694566433, + "grad_norm": 1084.019287109375, + "learning_rate": 1.1117302999117557e-05, + "loss": 37.916, + "step": 170460 + }, + { + "epoch": 0.688720370722011, + "grad_norm": 732.361572265625, + "learning_rate": 1.1114801093532466e-05, + "loss": 66.7814, + "step": 170470 + }, + { + "epoch": 0.6887607719873786, + "grad_norm": 894.6026000976562, + "learning_rate": 1.1112299361168905e-05, + "loss": 49.6157, + "step": 170480 + }, + { + "epoch": 0.6888011732527463, + "grad_norm": 350.47271728515625, + "learning_rate": 1.1109797802075644e-05, + "loss": 97.3894, + "step": 170490 + }, + { + "epoch": 0.6888415745181139, + "grad_norm": 1561.4720458984375, + "learning_rate": 1.1107296416301456e-05, + "loss": 77.8794, + "step": 170500 + }, + { + "epoch": 0.6888819757834815, + "grad_norm": 1151.9366455078125, + "learning_rate": 1.1104795203895098e-05, + "loss": 59.7094, + "step": 170510 + }, + { + "epoch": 0.6889223770488492, + "grad_norm": 1375.0361328125, + "learning_rate": 1.1102294164905337e-05, + "loss": 47.6228, + "step": 170520 + }, + { + "epoch": 0.6889627783142168, + "grad_norm": 709.1282958984375, + "learning_rate": 1.109979329938093e-05, + "loss": 72.9083, + "step": 170530 + }, + { + "epoch": 0.6890031795795845, + "grad_norm": 993.3526000976562, + "learning_rate": 1.1097292607370637e-05, + "loss": 71.3096, + "step": 170540 + }, + { + "epoch": 0.6890435808449521, + "grad_norm": 409.3875427246094, + "learning_rate": 1.1094792088923217e-05, + "loss": 59.8062, + "step": 170550 + }, + { + "epoch": 0.6890839821103197, + "grad_norm": 583.9298706054688, + "learning_rate": 1.1092291744087391e-05, + "loss": 96.3697, + "step": 170560 + }, + { + "epoch": 0.6891243833756874, + "grad_norm": 852.6361083984375, + "learning_rate": 1.108979157291194e-05, + "loss": 70.2921, + "step": 170570 + }, + { + "epoch": 0.6891647846410549, + "grad_norm": 667.2911987304688, + "learning_rate": 1.1087291575445582e-05, + "loss": 58.3741, + "step": 170580 + }, + { + "epoch": 0.6892051859064225, + "grad_norm": 454.529541015625, + "learning_rate": 1.1084791751737063e-05, + "loss": 89.9509, + "step": 170590 + }, + { + "epoch": 0.6892455871717902, + "grad_norm": 1073.68408203125, + "learning_rate": 1.1082292101835121e-05, + "loss": 108.5789, + "step": 170600 + }, + { + "epoch": 0.6892859884371578, + "grad_norm": 514.6812133789062, + "learning_rate": 1.1079792625788484e-05, + "loss": 75.1857, + "step": 170610 + }, + { + "epoch": 0.6893263897025255, + "grad_norm": 852.1119384765625, + "learning_rate": 1.1077293323645887e-05, + "loss": 74.1803, + "step": 170620 + }, + { + "epoch": 0.6893667909678931, + "grad_norm": 638.98779296875, + "learning_rate": 1.1074794195456047e-05, + "loss": 47.274, + "step": 170630 + }, + { + "epoch": 0.6894071922332607, + "grad_norm": 639.303955078125, + "learning_rate": 1.107229524126769e-05, + "loss": 90.7979, + "step": 170640 + }, + { + "epoch": 0.6894475934986284, + "grad_norm": 464.4703674316406, + "learning_rate": 1.106979646112953e-05, + "loss": 64.3316, + "step": 170650 + }, + { + "epoch": 0.689487994763996, + "grad_norm": 580.2459716796875, + "learning_rate": 1.1067297855090287e-05, + "loss": 102.9526, + "step": 170660 + }, + { + "epoch": 0.6895283960293637, + "grad_norm": 366.8062438964844, + "learning_rate": 1.1064799423198669e-05, + "loss": 46.5642, + "step": 170670 + }, + { + "epoch": 0.6895687972947313, + "grad_norm": 1136.0418701171875, + "learning_rate": 1.1062301165503397e-05, + "loss": 87.8157, + "step": 170680 + }, + { + "epoch": 0.689609198560099, + "grad_norm": 230.91734313964844, + "learning_rate": 1.1059803082053158e-05, + "loss": 58.3428, + "step": 170690 + }, + { + "epoch": 0.6896495998254666, + "grad_norm": 669.0912475585938, + "learning_rate": 1.1057305172896657e-05, + "loss": 55.5075, + "step": 170700 + }, + { + "epoch": 0.6896900010908341, + "grad_norm": 399.64739990234375, + "learning_rate": 1.1054807438082608e-05, + "loss": 71.8661, + "step": 170710 + }, + { + "epoch": 0.6897304023562018, + "grad_norm": 516.4322509765625, + "learning_rate": 1.1052309877659675e-05, + "loss": 45.8104, + "step": 170720 + }, + { + "epoch": 0.6897708036215694, + "grad_norm": 1094.2000732421875, + "learning_rate": 1.1049812491676581e-05, + "loss": 66.1974, + "step": 170730 + }, + { + "epoch": 0.689811204886937, + "grad_norm": 1035.8543701171875, + "learning_rate": 1.1047315280181986e-05, + "loss": 67.5227, + "step": 170740 + }, + { + "epoch": 0.6898516061523047, + "grad_norm": 966.3618774414062, + "learning_rate": 1.1044818243224608e-05, + "loss": 119.2222, + "step": 170750 + }, + { + "epoch": 0.6898920074176723, + "grad_norm": 703.4903564453125, + "learning_rate": 1.1042321380853101e-05, + "loss": 84.0149, + "step": 170760 + }, + { + "epoch": 0.68993240868304, + "grad_norm": 531.223388671875, + "learning_rate": 1.1039824693116145e-05, + "loss": 50.3926, + "step": 170770 + }, + { + "epoch": 0.6899728099484076, + "grad_norm": 2195.149658203125, + "learning_rate": 1.1037328180062432e-05, + "loss": 104.2775, + "step": 170780 + }, + { + "epoch": 0.6900132112137752, + "grad_norm": 1229.27978515625, + "learning_rate": 1.1034831841740612e-05, + "loss": 70.4516, + "step": 170790 + }, + { + "epoch": 0.6900536124791429, + "grad_norm": 716.8158569335938, + "learning_rate": 1.1032335678199359e-05, + "loss": 67.7745, + "step": 170800 + }, + { + "epoch": 0.6900940137445105, + "grad_norm": 845.9943237304688, + "learning_rate": 1.1029839689487339e-05, + "loss": 75.3365, + "step": 170810 + }, + { + "epoch": 0.6901344150098782, + "grad_norm": 572.8285522460938, + "learning_rate": 1.1027343875653211e-05, + "loss": 70.19, + "step": 170820 + }, + { + "epoch": 0.6901748162752458, + "grad_norm": 1039.68798828125, + "learning_rate": 1.1024848236745637e-05, + "loss": 65.0603, + "step": 170830 + }, + { + "epoch": 0.6902152175406133, + "grad_norm": 1127.887451171875, + "learning_rate": 1.1022352772813274e-05, + "loss": 63.4828, + "step": 170840 + }, + { + "epoch": 0.690255618805981, + "grad_norm": 488.48687744140625, + "learning_rate": 1.101985748390476e-05, + "loss": 42.1404, + "step": 170850 + }, + { + "epoch": 0.6902960200713486, + "grad_norm": 1653.6617431640625, + "learning_rate": 1.1017362370068742e-05, + "loss": 81.6065, + "step": 170860 + }, + { + "epoch": 0.6903364213367162, + "grad_norm": 788.9071044921875, + "learning_rate": 1.1014867431353881e-05, + "loss": 77.8599, + "step": 170870 + }, + { + "epoch": 0.6903768226020839, + "grad_norm": 796.6068115234375, + "learning_rate": 1.101237266780879e-05, + "loss": 92.302, + "step": 170880 + }, + { + "epoch": 0.6904172238674515, + "grad_norm": 781.5486450195312, + "learning_rate": 1.1009878079482135e-05, + "loss": 73.3938, + "step": 170890 + }, + { + "epoch": 0.6904576251328192, + "grad_norm": 383.04827880859375, + "learning_rate": 1.100738366642252e-05, + "loss": 45.4773, + "step": 170900 + }, + { + "epoch": 0.6904980263981868, + "grad_norm": 534.9193725585938, + "learning_rate": 1.1004889428678606e-05, + "loss": 73.5285, + "step": 170910 + }, + { + "epoch": 0.6905384276635544, + "grad_norm": 537.728515625, + "learning_rate": 1.1002395366298994e-05, + "loss": 65.5574, + "step": 170920 + }, + { + "epoch": 0.6905788289289221, + "grad_norm": 1504.6822509765625, + "learning_rate": 1.099990147933232e-05, + "loss": 112.1978, + "step": 170930 + }, + { + "epoch": 0.6906192301942897, + "grad_norm": 628.0166625976562, + "learning_rate": 1.0997407767827198e-05, + "loss": 63.6757, + "step": 170940 + }, + { + "epoch": 0.6906596314596574, + "grad_norm": 691.5441284179688, + "learning_rate": 1.0994914231832254e-05, + "loss": 85.271, + "step": 170950 + }, + { + "epoch": 0.6907000327250249, + "grad_norm": 651.933837890625, + "learning_rate": 1.0992420871396088e-05, + "loss": 81.7237, + "step": 170960 + }, + { + "epoch": 0.6907404339903925, + "grad_norm": 430.8685302734375, + "learning_rate": 1.0989927686567312e-05, + "loss": 50.3058, + "step": 170970 + }, + { + "epoch": 0.6907808352557602, + "grad_norm": 638.0819091796875, + "learning_rate": 1.0987434677394536e-05, + "loss": 49.3573, + "step": 170980 + }, + { + "epoch": 0.6908212365211278, + "grad_norm": 1014.9270629882812, + "learning_rate": 1.098494184392636e-05, + "loss": 56.378, + "step": 170990 + }, + { + "epoch": 0.6908616377864955, + "grad_norm": 742.0651245117188, + "learning_rate": 1.098244918621139e-05, + "loss": 67.9979, + "step": 171000 + }, + { + "epoch": 0.6909020390518631, + "grad_norm": 772.7244262695312, + "learning_rate": 1.0979956704298202e-05, + "loss": 77.7171, + "step": 171010 + }, + { + "epoch": 0.6909424403172307, + "grad_norm": 587.01806640625, + "learning_rate": 1.097746439823542e-05, + "loss": 58.9913, + "step": 171020 + }, + { + "epoch": 0.6909828415825984, + "grad_norm": 1604.9053955078125, + "learning_rate": 1.0974972268071604e-05, + "loss": 104.3172, + "step": 171030 + }, + { + "epoch": 0.691023242847966, + "grad_norm": 1155.2435302734375, + "learning_rate": 1.0972480313855349e-05, + "loss": 65.4619, + "step": 171040 + }, + { + "epoch": 0.6910636441133337, + "grad_norm": 6916.79296875, + "learning_rate": 1.0969988535635247e-05, + "loss": 151.111, + "step": 171050 + }, + { + "epoch": 0.6911040453787013, + "grad_norm": 841.49853515625, + "learning_rate": 1.0967496933459851e-05, + "loss": 46.1744, + "step": 171060 + }, + { + "epoch": 0.6911444466440689, + "grad_norm": 635.8493041992188, + "learning_rate": 1.0965005507377771e-05, + "loss": 93.8837, + "step": 171070 + }, + { + "epoch": 0.6911848479094366, + "grad_norm": 685.275390625, + "learning_rate": 1.096251425743755e-05, + "loss": 59.8375, + "step": 171080 + }, + { + "epoch": 0.6912252491748041, + "grad_norm": 579.8828735351562, + "learning_rate": 1.0960023183687766e-05, + "loss": 59.0435, + "step": 171090 + }, + { + "epoch": 0.6912656504401717, + "grad_norm": 944.5254516601562, + "learning_rate": 1.0957532286176983e-05, + "loss": 55.4914, + "step": 171100 + }, + { + "epoch": 0.6913060517055394, + "grad_norm": 558.6163940429688, + "learning_rate": 1.0955041564953767e-05, + "loss": 93.0062, + "step": 171110 + }, + { + "epoch": 0.691346452970907, + "grad_norm": 372.6169128417969, + "learning_rate": 1.095255102006668e-05, + "loss": 54.8886, + "step": 171120 + }, + { + "epoch": 0.6913868542362747, + "grad_norm": 672.4386596679688, + "learning_rate": 1.0950060651564258e-05, + "loss": 72.078, + "step": 171130 + }, + { + "epoch": 0.6914272555016423, + "grad_norm": 1021.0538330078125, + "learning_rate": 1.0947570459495065e-05, + "loss": 54.774, + "step": 171140 + }, + { + "epoch": 0.6914676567670099, + "grad_norm": 1104.8399658203125, + "learning_rate": 1.0945080443907648e-05, + "loss": 73.572, + "step": 171150 + }, + { + "epoch": 0.6915080580323776, + "grad_norm": 632.65380859375, + "learning_rate": 1.0942590604850554e-05, + "loss": 75.7703, + "step": 171160 + }, + { + "epoch": 0.6915484592977452, + "grad_norm": 1066.7193603515625, + "learning_rate": 1.0940100942372305e-05, + "loss": 87.3709, + "step": 171170 + }, + { + "epoch": 0.6915888605631129, + "grad_norm": 446.73101806640625, + "learning_rate": 1.0937611456521467e-05, + "loss": 62.1427, + "step": 171180 + }, + { + "epoch": 0.6916292618284805, + "grad_norm": 1158.9124755859375, + "learning_rate": 1.0935122147346551e-05, + "loss": 105.4463, + "step": 171190 + }, + { + "epoch": 0.6916696630938481, + "grad_norm": 886.2297973632812, + "learning_rate": 1.0932633014896097e-05, + "loss": 62.6753, + "step": 171200 + }, + { + "epoch": 0.6917100643592158, + "grad_norm": 1166.757568359375, + "learning_rate": 1.0930144059218639e-05, + "loss": 81.515, + "step": 171210 + }, + { + "epoch": 0.6917504656245833, + "grad_norm": 1090.5025634765625, + "learning_rate": 1.0927655280362677e-05, + "loss": 61.9392, + "step": 171220 + }, + { + "epoch": 0.691790866889951, + "grad_norm": 846.1004638671875, + "learning_rate": 1.092516667837676e-05, + "loss": 71.9683, + "step": 171230 + }, + { + "epoch": 0.6918312681553186, + "grad_norm": 901.7445068359375, + "learning_rate": 1.0922678253309382e-05, + "loss": 42.6417, + "step": 171240 + }, + { + "epoch": 0.6918716694206862, + "grad_norm": 822.11376953125, + "learning_rate": 1.0920190005209066e-05, + "loss": 88.1688, + "step": 171250 + }, + { + "epoch": 0.6919120706860539, + "grad_norm": 886.5252685546875, + "learning_rate": 1.091770193412432e-05, + "loss": 66.4937, + "step": 171260 + }, + { + "epoch": 0.6919524719514215, + "grad_norm": 500.06884765625, + "learning_rate": 1.0915214040103652e-05, + "loss": 59.6578, + "step": 171270 + }, + { + "epoch": 0.6919928732167892, + "grad_norm": 838.9014282226562, + "learning_rate": 1.0912726323195564e-05, + "loss": 50.5852, + "step": 171280 + }, + { + "epoch": 0.6920332744821568, + "grad_norm": 803.5316772460938, + "learning_rate": 1.0910238783448559e-05, + "loss": 73.0007, + "step": 171290 + }, + { + "epoch": 0.6920736757475244, + "grad_norm": 857.1024169921875, + "learning_rate": 1.0907751420911124e-05, + "loss": 124.0513, + "step": 171300 + }, + { + "epoch": 0.6921140770128921, + "grad_norm": 564.76611328125, + "learning_rate": 1.0905264235631754e-05, + "loss": 81.4392, + "step": 171310 + }, + { + "epoch": 0.6921544782782597, + "grad_norm": 480.101318359375, + "learning_rate": 1.090277722765895e-05, + "loss": 40.5627, + "step": 171320 + }, + { + "epoch": 0.6921948795436274, + "grad_norm": 555.450927734375, + "learning_rate": 1.090029039704117e-05, + "loss": 93.7533, + "step": 171330 + }, + { + "epoch": 0.692235280808995, + "grad_norm": 732.5763549804688, + "learning_rate": 1.0897803743826932e-05, + "loss": 52.1487, + "step": 171340 + }, + { + "epoch": 0.6922756820743625, + "grad_norm": 434.3310852050781, + "learning_rate": 1.0895317268064676e-05, + "loss": 70.1627, + "step": 171350 + }, + { + "epoch": 0.6923160833397302, + "grad_norm": 570.81298828125, + "learning_rate": 1.0892830969802916e-05, + "loss": 64.7463, + "step": 171360 + }, + { + "epoch": 0.6923564846050978, + "grad_norm": 795.7868041992188, + "learning_rate": 1.0890344849090097e-05, + "loss": 88.4856, + "step": 171370 + }, + { + "epoch": 0.6923968858704654, + "grad_norm": 738.5986938476562, + "learning_rate": 1.0887858905974693e-05, + "loss": 72.1743, + "step": 171380 + }, + { + "epoch": 0.6924372871358331, + "grad_norm": 701.0777587890625, + "learning_rate": 1.0885373140505182e-05, + "loss": 60.3807, + "step": 171390 + }, + { + "epoch": 0.6924776884012007, + "grad_norm": 732.67236328125, + "learning_rate": 1.0882887552730006e-05, + "loss": 98.6312, + "step": 171400 + }, + { + "epoch": 0.6925180896665684, + "grad_norm": 368.51397705078125, + "learning_rate": 1.0880402142697629e-05, + "loss": 54.9513, + "step": 171410 + }, + { + "epoch": 0.692558490931936, + "grad_norm": 265.89044189453125, + "learning_rate": 1.0877916910456508e-05, + "loss": 71.6977, + "step": 171420 + }, + { + "epoch": 0.6925988921973036, + "grad_norm": 571.1914672851562, + "learning_rate": 1.0875431856055092e-05, + "loss": 72.4552, + "step": 171430 + }, + { + "epoch": 0.6926392934626713, + "grad_norm": 797.9956665039062, + "learning_rate": 1.0872946979541829e-05, + "loss": 97.244, + "step": 171440 + }, + { + "epoch": 0.6926796947280389, + "grad_norm": 335.3492431640625, + "learning_rate": 1.0870462280965175e-05, + "loss": 48.1712, + "step": 171450 + }, + { + "epoch": 0.6927200959934066, + "grad_norm": 1045.6043701171875, + "learning_rate": 1.0867977760373549e-05, + "loss": 52.9563, + "step": 171460 + }, + { + "epoch": 0.6927604972587742, + "grad_norm": 737.7849731445312, + "learning_rate": 1.0865493417815399e-05, + "loss": 86.0553, + "step": 171470 + }, + { + "epoch": 0.6928008985241417, + "grad_norm": 580.3380126953125, + "learning_rate": 1.0863009253339163e-05, + "loss": 56.691, + "step": 171480 + }, + { + "epoch": 0.6928412997895094, + "grad_norm": 976.778564453125, + "learning_rate": 1.086052526699325e-05, + "loss": 49.0536, + "step": 171490 + }, + { + "epoch": 0.692881701054877, + "grad_norm": 788.131591796875, + "learning_rate": 1.0858041458826123e-05, + "loss": 71.477, + "step": 171500 + }, + { + "epoch": 0.6929221023202446, + "grad_norm": 926.5791015625, + "learning_rate": 1.0855557828886165e-05, + "loss": 73.3991, + "step": 171510 + }, + { + "epoch": 0.6929625035856123, + "grad_norm": 522.2352294921875, + "learning_rate": 1.0853074377221832e-05, + "loss": 82.3226, + "step": 171520 + }, + { + "epoch": 0.6930029048509799, + "grad_norm": 801.1136474609375, + "learning_rate": 1.0850591103881514e-05, + "loss": 66.0896, + "step": 171530 + }, + { + "epoch": 0.6930433061163476, + "grad_norm": 1087.4598388671875, + "learning_rate": 1.0848108008913632e-05, + "loss": 75.8253, + "step": 171540 + }, + { + "epoch": 0.6930837073817152, + "grad_norm": 512.7349853515625, + "learning_rate": 1.0845625092366605e-05, + "loss": 46.9937, + "step": 171550 + }, + { + "epoch": 0.6931241086470828, + "grad_norm": 2235.86572265625, + "learning_rate": 1.0843142354288814e-05, + "loss": 70.6441, + "step": 171560 + }, + { + "epoch": 0.6931645099124505, + "grad_norm": 592.5457763671875, + "learning_rate": 1.0840659794728694e-05, + "loss": 62.25, + "step": 171570 + }, + { + "epoch": 0.6932049111778181, + "grad_norm": 686.6892700195312, + "learning_rate": 1.0838177413734618e-05, + "loss": 50.3707, + "step": 171580 + }, + { + "epoch": 0.6932453124431858, + "grad_norm": 1464.3907470703125, + "learning_rate": 1.0835695211354991e-05, + "loss": 94.2467, + "step": 171590 + }, + { + "epoch": 0.6932857137085533, + "grad_norm": 1343.3033447265625, + "learning_rate": 1.0833213187638203e-05, + "loss": 73.3857, + "step": 171600 + }, + { + "epoch": 0.6933261149739209, + "grad_norm": 419.3915100097656, + "learning_rate": 1.0830731342632651e-05, + "loss": 55.1395, + "step": 171610 + }, + { + "epoch": 0.6933665162392886, + "grad_norm": 632.2639770507812, + "learning_rate": 1.0828249676386698e-05, + "loss": 69.8246, + "step": 171620 + }, + { + "epoch": 0.6934069175046562, + "grad_norm": 455.3678283691406, + "learning_rate": 1.082576818894875e-05, + "loss": 37.9506, + "step": 171630 + }, + { + "epoch": 0.6934473187700239, + "grad_norm": 547.6918334960938, + "learning_rate": 1.0823286880367174e-05, + "loss": 53.3768, + "step": 171640 + }, + { + "epoch": 0.6934877200353915, + "grad_norm": 1228.1483154296875, + "learning_rate": 1.082080575069034e-05, + "loss": 59.0609, + "step": 171650 + }, + { + "epoch": 0.6935281213007591, + "grad_norm": 889.5916137695312, + "learning_rate": 1.0818324799966633e-05, + "loss": 85.0031, + "step": 171660 + }, + { + "epoch": 0.6935685225661268, + "grad_norm": 878.674560546875, + "learning_rate": 1.0815844028244396e-05, + "loss": 55.3746, + "step": 171670 + }, + { + "epoch": 0.6936089238314944, + "grad_norm": 497.1383972167969, + "learning_rate": 1.0813363435572022e-05, + "loss": 62.021, + "step": 171680 + }, + { + "epoch": 0.6936493250968621, + "grad_norm": 421.7792053222656, + "learning_rate": 1.081088302199785e-05, + "loss": 90.4624, + "step": 171690 + }, + { + "epoch": 0.6936897263622297, + "grad_norm": 642.3901977539062, + "learning_rate": 1.0808402787570245e-05, + "loss": 137.0716, + "step": 171700 + }, + { + "epoch": 0.6937301276275973, + "grad_norm": 588.8926391601562, + "learning_rate": 1.0805922732337559e-05, + "loss": 46.7877, + "step": 171710 + }, + { + "epoch": 0.693770528892965, + "grad_norm": 635.0932006835938, + "learning_rate": 1.0803442856348143e-05, + "loss": 51.1228, + "step": 171720 + }, + { + "epoch": 0.6938109301583325, + "grad_norm": 575.8355102539062, + "learning_rate": 1.0800963159650353e-05, + "loss": 55.1768, + "step": 171730 + }, + { + "epoch": 0.6938513314237001, + "grad_norm": 340.1691589355469, + "learning_rate": 1.0798483642292509e-05, + "loss": 82.0727, + "step": 171740 + }, + { + "epoch": 0.6938917326890678, + "grad_norm": 810.4700927734375, + "learning_rate": 1.0796004304322969e-05, + "loss": 66.6963, + "step": 171750 + }, + { + "epoch": 0.6939321339544354, + "grad_norm": 894.8250122070312, + "learning_rate": 1.0793525145790063e-05, + "loss": 120.6772, + "step": 171760 + }, + { + "epoch": 0.6939725352198031, + "grad_norm": 814.416259765625, + "learning_rate": 1.079104616674213e-05, + "loss": 74.0492, + "step": 171770 + }, + { + "epoch": 0.6940129364851707, + "grad_norm": 961.5559692382812, + "learning_rate": 1.0788567367227479e-05, + "loss": 76.3741, + "step": 171780 + }, + { + "epoch": 0.6940533377505383, + "grad_norm": 1197.0753173828125, + "learning_rate": 1.0786088747294462e-05, + "loss": 97.561, + "step": 171790 + }, + { + "epoch": 0.694093739015906, + "grad_norm": 646.81298828125, + "learning_rate": 1.0783610306991386e-05, + "loss": 68.3652, + "step": 171800 + }, + { + "epoch": 0.6941341402812736, + "grad_norm": 791.5086059570312, + "learning_rate": 1.0781132046366567e-05, + "loss": 50.765, + "step": 171810 + }, + { + "epoch": 0.6941745415466413, + "grad_norm": 525.9727783203125, + "learning_rate": 1.0778653965468337e-05, + "loss": 68.188, + "step": 171820 + }, + { + "epoch": 0.6942149428120089, + "grad_norm": 569.475830078125, + "learning_rate": 1.0776176064344976e-05, + "loss": 89.8805, + "step": 171830 + }, + { + "epoch": 0.6942553440773765, + "grad_norm": 519.2679443359375, + "learning_rate": 1.077369834304483e-05, + "loss": 66.5987, + "step": 171840 + }, + { + "epoch": 0.6942957453427442, + "grad_norm": 389.73699951171875, + "learning_rate": 1.0771220801616177e-05, + "loss": 58.8777, + "step": 171850 + }, + { + "epoch": 0.6943361466081117, + "grad_norm": 698.50537109375, + "learning_rate": 1.0768743440107322e-05, + "loss": 78.8438, + "step": 171860 + }, + { + "epoch": 0.6943765478734794, + "grad_norm": 652.3718872070312, + "learning_rate": 1.076626625856657e-05, + "loss": 83.5713, + "step": 171870 + }, + { + "epoch": 0.694416949138847, + "grad_norm": 552.1219482421875, + "learning_rate": 1.0763789257042209e-05, + "loss": 49.429, + "step": 171880 + }, + { + "epoch": 0.6944573504042146, + "grad_norm": 617.068603515625, + "learning_rate": 1.076131243558254e-05, + "loss": 84.3472, + "step": 171890 + }, + { + "epoch": 0.6944977516695823, + "grad_norm": 807.2919921875, + "learning_rate": 1.0758835794235834e-05, + "loss": 59.857, + "step": 171900 + }, + { + "epoch": 0.6945381529349499, + "grad_norm": 657.875, + "learning_rate": 1.075635933305038e-05, + "loss": 78.267, + "step": 171910 + }, + { + "epoch": 0.6945785542003176, + "grad_norm": 1176.5511474609375, + "learning_rate": 1.0753883052074463e-05, + "loss": 80.9196, + "step": 171920 + }, + { + "epoch": 0.6946189554656852, + "grad_norm": 644.6686401367188, + "learning_rate": 1.0751406951356362e-05, + "loss": 65.0508, + "step": 171930 + }, + { + "epoch": 0.6946593567310528, + "grad_norm": 601.62744140625, + "learning_rate": 1.0748931030944329e-05, + "loss": 72.3703, + "step": 171940 + }, + { + "epoch": 0.6946997579964205, + "grad_norm": 918.2655029296875, + "learning_rate": 1.0746455290886664e-05, + "loss": 88.1822, + "step": 171950 + }, + { + "epoch": 0.6947401592617881, + "grad_norm": 662.8955688476562, + "learning_rate": 1.0743979731231602e-05, + "loss": 60.0929, + "step": 171960 + }, + { + "epoch": 0.6947805605271558, + "grad_norm": 1294.0030517578125, + "learning_rate": 1.0741504352027435e-05, + "loss": 79.0128, + "step": 171970 + }, + { + "epoch": 0.6948209617925234, + "grad_norm": 681.3592529296875, + "learning_rate": 1.0739029153322397e-05, + "loss": 67.274, + "step": 171980 + }, + { + "epoch": 0.6948613630578909, + "grad_norm": 1027.0107421875, + "learning_rate": 1.0736554135164758e-05, + "loss": 57.3077, + "step": 171990 + }, + { + "epoch": 0.6949017643232586, + "grad_norm": 645.4635009765625, + "learning_rate": 1.0734079297602772e-05, + "loss": 100.2497, + "step": 172000 + }, + { + "epoch": 0.6949421655886262, + "grad_norm": 1811.3453369140625, + "learning_rate": 1.0731604640684673e-05, + "loss": 78.5891, + "step": 172010 + }, + { + "epoch": 0.6949825668539938, + "grad_norm": 802.6558227539062, + "learning_rate": 1.0729130164458712e-05, + "loss": 45.4093, + "step": 172020 + }, + { + "epoch": 0.6950229681193615, + "grad_norm": 478.5699462890625, + "learning_rate": 1.072665586897313e-05, + "loss": 52.0326, + "step": 172030 + }, + { + "epoch": 0.6950633693847291, + "grad_norm": 503.6991882324219, + "learning_rate": 1.0724181754276168e-05, + "loss": 79.5418, + "step": 172040 + }, + { + "epoch": 0.6951037706500968, + "grad_norm": 676.9739379882812, + "learning_rate": 1.0721707820416057e-05, + "loss": 55.9592, + "step": 172050 + }, + { + "epoch": 0.6951441719154644, + "grad_norm": 713.9236450195312, + "learning_rate": 1.0719234067441038e-05, + "loss": 62.0463, + "step": 172060 + }, + { + "epoch": 0.695184573180832, + "grad_norm": 476.35552978515625, + "learning_rate": 1.0716760495399321e-05, + "loss": 68.2985, + "step": 172070 + }, + { + "epoch": 0.6952249744461997, + "grad_norm": 2027.3673095703125, + "learning_rate": 1.0714287104339136e-05, + "loss": 71.6386, + "step": 172080 + }, + { + "epoch": 0.6952653757115673, + "grad_norm": 3370.59130859375, + "learning_rate": 1.0711813894308707e-05, + "loss": 114.8109, + "step": 172090 + }, + { + "epoch": 0.695305776976935, + "grad_norm": 690.0938110351562, + "learning_rate": 1.0709340865356247e-05, + "loss": 93.6415, + "step": 172100 + }, + { + "epoch": 0.6953461782423026, + "grad_norm": 229.73426818847656, + "learning_rate": 1.0706868017529977e-05, + "loss": 96.5642, + "step": 172110 + }, + { + "epoch": 0.6953865795076701, + "grad_norm": 644.107666015625, + "learning_rate": 1.0704395350878082e-05, + "loss": 74.7592, + "step": 172120 + }, + { + "epoch": 0.6954269807730378, + "grad_norm": 1066.85009765625, + "learning_rate": 1.0701922865448806e-05, + "loss": 77.1538, + "step": 172130 + }, + { + "epoch": 0.6954673820384054, + "grad_norm": 528.4202880859375, + "learning_rate": 1.069945056129032e-05, + "loss": 64.4675, + "step": 172140 + }, + { + "epoch": 0.695507783303773, + "grad_norm": 1783.09521484375, + "learning_rate": 1.0696978438450834e-05, + "loss": 71.9349, + "step": 172150 + }, + { + "epoch": 0.6955481845691407, + "grad_norm": 454.6905822753906, + "learning_rate": 1.0694506496978549e-05, + "loss": 47.8416, + "step": 172160 + }, + { + "epoch": 0.6955885858345083, + "grad_norm": 576.7335815429688, + "learning_rate": 1.0692034736921637e-05, + "loss": 51.5597, + "step": 172170 + }, + { + "epoch": 0.695628987099876, + "grad_norm": 450.7715759277344, + "learning_rate": 1.0689563158328316e-05, + "loss": 77.5398, + "step": 172180 + }, + { + "epoch": 0.6956693883652436, + "grad_norm": 415.6106872558594, + "learning_rate": 1.0687091761246746e-05, + "loss": 63.6526, + "step": 172190 + }, + { + "epoch": 0.6957097896306113, + "grad_norm": 1294.03515625, + "learning_rate": 1.0684620545725116e-05, + "loss": 107.6484, + "step": 172200 + }, + { + "epoch": 0.6957501908959789, + "grad_norm": 3625.677001953125, + "learning_rate": 1.0682149511811604e-05, + "loss": 109.1053, + "step": 172210 + }, + { + "epoch": 0.6957905921613465, + "grad_norm": 1004.201416015625, + "learning_rate": 1.0679678659554393e-05, + "loss": 69.1173, + "step": 172220 + }, + { + "epoch": 0.6958309934267142, + "grad_norm": 974.859619140625, + "learning_rate": 1.0677207989001637e-05, + "loss": 76.453, + "step": 172230 + }, + { + "epoch": 0.6958713946920817, + "grad_norm": 921.9066772460938, + "learning_rate": 1.0674737500201511e-05, + "loss": 53.2309, + "step": 172240 + }, + { + "epoch": 0.6959117959574493, + "grad_norm": 822.69189453125, + "learning_rate": 1.0672267193202178e-05, + "loss": 49.0424, + "step": 172250 + }, + { + "epoch": 0.695952197222817, + "grad_norm": 699.107666015625, + "learning_rate": 1.0669797068051797e-05, + "loss": 73.0365, + "step": 172260 + }, + { + "epoch": 0.6959925984881846, + "grad_norm": 886.8864135742188, + "learning_rate": 1.0667327124798537e-05, + "loss": 50.4326, + "step": 172270 + }, + { + "epoch": 0.6960329997535523, + "grad_norm": 553.4960327148438, + "learning_rate": 1.0664857363490522e-05, + "loss": 72.2531, + "step": 172280 + }, + { + "epoch": 0.6960734010189199, + "grad_norm": 719.7606811523438, + "learning_rate": 1.0662387784175934e-05, + "loss": 74.9054, + "step": 172290 + }, + { + "epoch": 0.6961138022842875, + "grad_norm": 530.0975952148438, + "learning_rate": 1.0659918386902897e-05, + "loss": 92.3417, + "step": 172300 + }, + { + "epoch": 0.6961542035496552, + "grad_norm": 964.0980834960938, + "learning_rate": 1.0657449171719558e-05, + "loss": 68.2931, + "step": 172310 + }, + { + "epoch": 0.6961946048150228, + "grad_norm": 915.2354736328125, + "learning_rate": 1.0654980138674058e-05, + "loss": 69.0247, + "step": 172320 + }, + { + "epoch": 0.6962350060803905, + "grad_norm": 722.9376831054688, + "learning_rate": 1.0652511287814531e-05, + "loss": 37.9394, + "step": 172330 + }, + { + "epoch": 0.6962754073457581, + "grad_norm": 403.175048828125, + "learning_rate": 1.0650042619189116e-05, + "loss": 79.828, + "step": 172340 + }, + { + "epoch": 0.6963158086111257, + "grad_norm": 667.48828125, + "learning_rate": 1.0647574132845929e-05, + "loss": 68.6508, + "step": 172350 + }, + { + "epoch": 0.6963562098764934, + "grad_norm": 124.95016479492188, + "learning_rate": 1.0645105828833095e-05, + "loss": 80.4912, + "step": 172360 + }, + { + "epoch": 0.6963966111418609, + "grad_norm": 1363.3160400390625, + "learning_rate": 1.0642637707198741e-05, + "loss": 79.0305, + "step": 172370 + }, + { + "epoch": 0.6964370124072286, + "grad_norm": 937.344970703125, + "learning_rate": 1.0640169767990992e-05, + "loss": 68.0163, + "step": 172380 + }, + { + "epoch": 0.6964774136725962, + "grad_norm": 831.77880859375, + "learning_rate": 1.0637702011257936e-05, + "loss": 60.28, + "step": 172390 + }, + { + "epoch": 0.6965178149379638, + "grad_norm": 356.55963134765625, + "learning_rate": 1.0635234437047712e-05, + "loss": 69.0567, + "step": 172400 + }, + { + "epoch": 0.6965582162033315, + "grad_norm": 653.2261962890625, + "learning_rate": 1.063276704540841e-05, + "loss": 61.9683, + "step": 172410 + }, + { + "epoch": 0.6965986174686991, + "grad_norm": 565.471435546875, + "learning_rate": 1.0630299836388135e-05, + "loss": 55.8114, + "step": 172420 + }, + { + "epoch": 0.6966390187340668, + "grad_norm": 705.4918212890625, + "learning_rate": 1.0627832810034996e-05, + "loss": 83.2872, + "step": 172430 + }, + { + "epoch": 0.6966794199994344, + "grad_norm": 1923.474365234375, + "learning_rate": 1.0625365966397068e-05, + "loss": 62.2628, + "step": 172440 + }, + { + "epoch": 0.696719821264802, + "grad_norm": 750.3751831054688, + "learning_rate": 1.062289930552247e-05, + "loss": 56.0264, + "step": 172450 + }, + { + "epoch": 0.6967602225301697, + "grad_norm": 792.204345703125, + "learning_rate": 1.0620432827459271e-05, + "loss": 51.65, + "step": 172460 + }, + { + "epoch": 0.6968006237955373, + "grad_norm": 333.4455871582031, + "learning_rate": 1.061796653225556e-05, + "loss": 58.6105, + "step": 172470 + }, + { + "epoch": 0.696841025060905, + "grad_norm": 549.9801025390625, + "learning_rate": 1.0615500419959428e-05, + "loss": 62.0303, + "step": 172480 + }, + { + "epoch": 0.6968814263262726, + "grad_norm": 849.53515625, + "learning_rate": 1.0613034490618942e-05, + "loss": 65.1754, + "step": 172490 + }, + { + "epoch": 0.6969218275916401, + "grad_norm": 574.5184326171875, + "learning_rate": 1.061056874428219e-05, + "loss": 89.4561, + "step": 172500 + }, + { + "epoch": 0.6969622288570078, + "grad_norm": 831.1426391601562, + "learning_rate": 1.0608103180997227e-05, + "loss": 75.6241, + "step": 172510 + }, + { + "epoch": 0.6970026301223754, + "grad_norm": 702.5039672851562, + "learning_rate": 1.0605637800812128e-05, + "loss": 50.3405, + "step": 172520 + }, + { + "epoch": 0.697043031387743, + "grad_norm": 1086.7628173828125, + "learning_rate": 1.0603172603774957e-05, + "loss": 76.4663, + "step": 172530 + }, + { + "epoch": 0.6970834326531107, + "grad_norm": 1401.9698486328125, + "learning_rate": 1.0600707589933781e-05, + "loss": 69.523, + "step": 172540 + }, + { + "epoch": 0.6971238339184783, + "grad_norm": 538.4107055664062, + "learning_rate": 1.0598242759336632e-05, + "loss": 73.906, + "step": 172550 + }, + { + "epoch": 0.697164235183846, + "grad_norm": 511.6389465332031, + "learning_rate": 1.0595778112031599e-05, + "loss": 77.5586, + "step": 172560 + }, + { + "epoch": 0.6972046364492136, + "grad_norm": 207.5604248046875, + "learning_rate": 1.0593313648066705e-05, + "loss": 52.4188, + "step": 172570 + }, + { + "epoch": 0.6972450377145812, + "grad_norm": 564.7003173828125, + "learning_rate": 1.0590849367490004e-05, + "loss": 65.0782, + "step": 172580 + }, + { + "epoch": 0.6972854389799489, + "grad_norm": 437.78662109375, + "learning_rate": 1.058838527034954e-05, + "loss": 65.8208, + "step": 172590 + }, + { + "epoch": 0.6973258402453165, + "grad_norm": 691.9917602539062, + "learning_rate": 1.0585921356693349e-05, + "loss": 71.1012, + "step": 172600 + }, + { + "epoch": 0.6973662415106842, + "grad_norm": 838.7626342773438, + "learning_rate": 1.0583457626569475e-05, + "loss": 121.9369, + "step": 172610 + }, + { + "epoch": 0.6974066427760518, + "grad_norm": 531.8385620117188, + "learning_rate": 1.0580994080025927e-05, + "loss": 69.2689, + "step": 172620 + }, + { + "epoch": 0.6974470440414193, + "grad_norm": 488.455322265625, + "learning_rate": 1.0578530717110767e-05, + "loss": 87.4904, + "step": 172630 + }, + { + "epoch": 0.697487445306787, + "grad_norm": 917.4891967773438, + "learning_rate": 1.0576067537871993e-05, + "loss": 86.3237, + "step": 172640 + }, + { + "epoch": 0.6975278465721546, + "grad_norm": 395.8478088378906, + "learning_rate": 1.057360454235763e-05, + "loss": 54.1169, + "step": 172650 + }, + { + "epoch": 0.6975682478375222, + "grad_norm": 513.9642333984375, + "learning_rate": 1.0571141730615704e-05, + "loss": 78.0164, + "step": 172660 + }, + { + "epoch": 0.6976086491028899, + "grad_norm": 435.078857421875, + "learning_rate": 1.056867910269423e-05, + "loss": 77.3586, + "step": 172670 + }, + { + "epoch": 0.6976490503682575, + "grad_norm": 657.447021484375, + "learning_rate": 1.0566216658641205e-05, + "loss": 48.0827, + "step": 172680 + }, + { + "epoch": 0.6976894516336252, + "grad_norm": 272.628662109375, + "learning_rate": 1.0563754398504642e-05, + "loss": 46.0665, + "step": 172690 + }, + { + "epoch": 0.6977298528989928, + "grad_norm": 840.4124145507812, + "learning_rate": 1.0561292322332548e-05, + "loss": 97.0345, + "step": 172700 + }, + { + "epoch": 0.6977702541643604, + "grad_norm": 679.85986328125, + "learning_rate": 1.0558830430172918e-05, + "loss": 50.4354, + "step": 172710 + }, + { + "epoch": 0.6978106554297281, + "grad_norm": 943.1403198242188, + "learning_rate": 1.0556368722073755e-05, + "loss": 83.6012, + "step": 172720 + }, + { + "epoch": 0.6978510566950957, + "grad_norm": 558.5919799804688, + "learning_rate": 1.0553907198083034e-05, + "loss": 66.0659, + "step": 172730 + }, + { + "epoch": 0.6978914579604634, + "grad_norm": 1163.632568359375, + "learning_rate": 1.0551445858248766e-05, + "loss": 61.7544, + "step": 172740 + }, + { + "epoch": 0.697931859225831, + "grad_norm": 347.5321350097656, + "learning_rate": 1.0548984702618923e-05, + "loss": 33.9122, + "step": 172750 + }, + { + "epoch": 0.6979722604911985, + "grad_norm": 699.390625, + "learning_rate": 1.0546523731241484e-05, + "loss": 74.1087, + "step": 172760 + }, + { + "epoch": 0.6980126617565662, + "grad_norm": 1002.4382934570312, + "learning_rate": 1.0544062944164444e-05, + "loss": 70.8043, + "step": 172770 + }, + { + "epoch": 0.6980530630219338, + "grad_norm": 787.5966796875, + "learning_rate": 1.0541602341435748e-05, + "loss": 67.6781, + "step": 172780 + }, + { + "epoch": 0.6980934642873015, + "grad_norm": 902.456298828125, + "learning_rate": 1.05391419231034e-05, + "loss": 57.6097, + "step": 172790 + }, + { + "epoch": 0.6981338655526691, + "grad_norm": 754.4395141601562, + "learning_rate": 1.053668168921534e-05, + "loss": 47.1647, + "step": 172800 + }, + { + "epoch": 0.6981742668180367, + "grad_norm": 459.5247497558594, + "learning_rate": 1.0534221639819546e-05, + "loss": 63.7771, + "step": 172810 + }, + { + "epoch": 0.6982146680834044, + "grad_norm": 3058.33984375, + "learning_rate": 1.0531761774963974e-05, + "loss": 105.0, + "step": 172820 + }, + { + "epoch": 0.698255069348772, + "grad_norm": 523.4500732421875, + "learning_rate": 1.0529302094696586e-05, + "loss": 68.9125, + "step": 172830 + }, + { + "epoch": 0.6982954706141397, + "grad_norm": 313.53619384765625, + "learning_rate": 1.0526842599065326e-05, + "loss": 57.4935, + "step": 172840 + }, + { + "epoch": 0.6983358718795073, + "grad_norm": 559.5223999023438, + "learning_rate": 1.0524383288118142e-05, + "loss": 167.0326, + "step": 172850 + }, + { + "epoch": 0.6983762731448749, + "grad_norm": 974.4124145507812, + "learning_rate": 1.0521924161902986e-05, + "loss": 71.064, + "step": 172860 + }, + { + "epoch": 0.6984166744102426, + "grad_norm": 541.2099609375, + "learning_rate": 1.05194652204678e-05, + "loss": 88.287, + "step": 172870 + }, + { + "epoch": 0.6984570756756101, + "grad_norm": 956.5816040039062, + "learning_rate": 1.0517006463860527e-05, + "loss": 115.0631, + "step": 172880 + }, + { + "epoch": 0.6984974769409777, + "grad_norm": 1049.515869140625, + "learning_rate": 1.0514547892129077e-05, + "loss": 72.5589, + "step": 172890 + }, + { + "epoch": 0.6985378782063454, + "grad_norm": 1552.0294189453125, + "learning_rate": 1.0512089505321419e-05, + "loss": 133.3646, + "step": 172900 + }, + { + "epoch": 0.698578279471713, + "grad_norm": 454.1815490722656, + "learning_rate": 1.050963130348545e-05, + "loss": 51.1889, + "step": 172910 + }, + { + "epoch": 0.6986186807370807, + "grad_norm": 741.5770874023438, + "learning_rate": 1.0507173286669108e-05, + "loss": 111.9853, + "step": 172920 + }, + { + "epoch": 0.6986590820024483, + "grad_norm": 1269.7552490234375, + "learning_rate": 1.0504715454920307e-05, + "loss": 86.4345, + "step": 172930 + }, + { + "epoch": 0.698699483267816, + "grad_norm": 1025.609619140625, + "learning_rate": 1.0502257808286967e-05, + "loss": 69.5427, + "step": 172940 + }, + { + "epoch": 0.6987398845331836, + "grad_norm": 494.7491760253906, + "learning_rate": 1.0499800346817009e-05, + "loss": 80.7987, + "step": 172950 + }, + { + "epoch": 0.6987802857985512, + "grad_norm": 558.2377319335938, + "learning_rate": 1.0497343070558329e-05, + "loss": 53.7904, + "step": 172960 + }, + { + "epoch": 0.6988206870639189, + "grad_norm": 592.6730346679688, + "learning_rate": 1.0494885979558834e-05, + "loss": 70.1588, + "step": 172970 + }, + { + "epoch": 0.6988610883292865, + "grad_norm": 899.3433227539062, + "learning_rate": 1.0492429073866432e-05, + "loss": 78.1024, + "step": 172980 + }, + { + "epoch": 0.6989014895946541, + "grad_norm": 1022.958740234375, + "learning_rate": 1.0489972353529027e-05, + "loss": 95.9347, + "step": 172990 + }, + { + "epoch": 0.6989418908600218, + "grad_norm": 573.3894653320312, + "learning_rate": 1.0487515818594494e-05, + "loss": 77.8646, + "step": 173000 + }, + { + "epoch": 0.6989822921253893, + "grad_norm": 642.2940063476562, + "learning_rate": 1.048505946911075e-05, + "loss": 61.1074, + "step": 173010 + }, + { + "epoch": 0.699022693390757, + "grad_norm": 1794.8680419921875, + "learning_rate": 1.0482603305125666e-05, + "loss": 66.3125, + "step": 173020 + }, + { + "epoch": 0.6990630946561246, + "grad_norm": 612.673828125, + "learning_rate": 1.0480147326687126e-05, + "loss": 49.0314, + "step": 173030 + }, + { + "epoch": 0.6991034959214922, + "grad_norm": 607.5330810546875, + "learning_rate": 1.0477691533843027e-05, + "loss": 85.0721, + "step": 173040 + }, + { + "epoch": 0.6991438971868599, + "grad_norm": 699.5153198242188, + "learning_rate": 1.0475235926641217e-05, + "loss": 74.2116, + "step": 173050 + }, + { + "epoch": 0.6991842984522275, + "grad_norm": 636.8626708984375, + "learning_rate": 1.04727805051296e-05, + "loss": 65.8682, + "step": 173060 + }, + { + "epoch": 0.6992246997175952, + "grad_norm": 718.0105590820312, + "learning_rate": 1.0470325269356025e-05, + "loss": 81.5019, + "step": 173070 + }, + { + "epoch": 0.6992651009829628, + "grad_norm": 469.662353515625, + "learning_rate": 1.0467870219368369e-05, + "loss": 76.0726, + "step": 173080 + }, + { + "epoch": 0.6993055022483304, + "grad_norm": 650.85009765625, + "learning_rate": 1.0465415355214485e-05, + "loss": 66.612, + "step": 173090 + }, + { + "epoch": 0.6993459035136981, + "grad_norm": 513.1318359375, + "learning_rate": 1.046296067694224e-05, + "loss": 45.4856, + "step": 173100 + }, + { + "epoch": 0.6993863047790657, + "grad_norm": 480.6987609863281, + "learning_rate": 1.0460506184599495e-05, + "loss": 82.2901, + "step": 173110 + }, + { + "epoch": 0.6994267060444334, + "grad_norm": 966.3629150390625, + "learning_rate": 1.0458051878234085e-05, + "loss": 86.0605, + "step": 173120 + }, + { + "epoch": 0.699467107309801, + "grad_norm": 284.2361755371094, + "learning_rate": 1.0455597757893865e-05, + "loss": 58.2388, + "step": 173130 + }, + { + "epoch": 0.6995075085751685, + "grad_norm": 427.1275329589844, + "learning_rate": 1.0453143823626682e-05, + "loss": 48.1717, + "step": 173140 + }, + { + "epoch": 0.6995479098405362, + "grad_norm": 1140.2840576171875, + "learning_rate": 1.0450690075480372e-05, + "loss": 96.8397, + "step": 173150 + }, + { + "epoch": 0.6995883111059038, + "grad_norm": 585.5446166992188, + "learning_rate": 1.0448236513502778e-05, + "loss": 90.7826, + "step": 173160 + }, + { + "epoch": 0.6996287123712714, + "grad_norm": 332.7326965332031, + "learning_rate": 1.044578313774174e-05, + "loss": 81.856, + "step": 173170 + }, + { + "epoch": 0.6996691136366391, + "grad_norm": 437.6309509277344, + "learning_rate": 1.044332994824507e-05, + "loss": 51.3817, + "step": 173180 + }, + { + "epoch": 0.6997095149020067, + "grad_norm": 816.458251953125, + "learning_rate": 1.04408769450606e-05, + "loss": 72.3934, + "step": 173190 + }, + { + "epoch": 0.6997499161673744, + "grad_norm": 387.5536804199219, + "learning_rate": 1.0438424128236157e-05, + "loss": 72.961, + "step": 173200 + }, + { + "epoch": 0.699790317432742, + "grad_norm": 656.644287109375, + "learning_rate": 1.043597149781956e-05, + "loss": 61.3891, + "step": 173210 + }, + { + "epoch": 0.6998307186981096, + "grad_norm": 509.66485595703125, + "learning_rate": 1.0433519053858627e-05, + "loss": 45.6358, + "step": 173220 + }, + { + "epoch": 0.6998711199634773, + "grad_norm": 830.2647705078125, + "learning_rate": 1.0431066796401152e-05, + "loss": 77.3922, + "step": 173230 + }, + { + "epoch": 0.6999115212288449, + "grad_norm": 700.6181030273438, + "learning_rate": 1.0428614725494972e-05, + "loss": 65.1329, + "step": 173240 + }, + { + "epoch": 0.6999519224942126, + "grad_norm": 441.3546142578125, + "learning_rate": 1.0426162841187866e-05, + "loss": 82.3115, + "step": 173250 + }, + { + "epoch": 0.6999923237595802, + "grad_norm": 1055.9093017578125, + "learning_rate": 1.0423711143527644e-05, + "loss": 53.7531, + "step": 173260 + }, + { + "epoch": 0.7000327250249477, + "grad_norm": 726.9658203125, + "learning_rate": 1.0421259632562107e-05, + "loss": 65.0874, + "step": 173270 + }, + { + "epoch": 0.7000731262903154, + "grad_norm": 986.3773193359375, + "learning_rate": 1.0418808308339047e-05, + "loss": 82.7398, + "step": 173280 + }, + { + "epoch": 0.700113527555683, + "grad_norm": 802.156494140625, + "learning_rate": 1.0416357170906251e-05, + "loss": 65.8483, + "step": 173290 + }, + { + "epoch": 0.7001539288210507, + "grad_norm": 452.1122131347656, + "learning_rate": 1.0413906220311501e-05, + "loss": 63.637, + "step": 173300 + }, + { + "epoch": 0.7001943300864183, + "grad_norm": 865.009521484375, + "learning_rate": 1.0411455456602586e-05, + "loss": 65.6719, + "step": 173310 + }, + { + "epoch": 0.7002347313517859, + "grad_norm": 682.5399169921875, + "learning_rate": 1.0409004879827282e-05, + "loss": 75.9043, + "step": 173320 + }, + { + "epoch": 0.7002751326171536, + "grad_norm": 1033.530517578125, + "learning_rate": 1.0406554490033375e-05, + "loss": 79.0259, + "step": 173330 + }, + { + "epoch": 0.7003155338825212, + "grad_norm": 524.0195922851562, + "learning_rate": 1.0404104287268612e-05, + "loss": 85.2028, + "step": 173340 + }, + { + "epoch": 0.7003559351478889, + "grad_norm": 636.2154541015625, + "learning_rate": 1.0401654271580792e-05, + "loss": 88.9321, + "step": 173350 + }, + { + "epoch": 0.7003963364132565, + "grad_norm": 588.769775390625, + "learning_rate": 1.0399204443017653e-05, + "loss": 40.6682, + "step": 173360 + }, + { + "epoch": 0.7004367376786241, + "grad_norm": 844.656494140625, + "learning_rate": 1.0396754801626971e-05, + "loss": 112.2059, + "step": 173370 + }, + { + "epoch": 0.7004771389439918, + "grad_norm": 752.506591796875, + "learning_rate": 1.0394305347456501e-05, + "loss": 48.9322, + "step": 173380 + }, + { + "epoch": 0.7005175402093593, + "grad_norm": 897.1605834960938, + "learning_rate": 1.0391856080553981e-05, + "loss": 86.4599, + "step": 173390 + }, + { + "epoch": 0.7005579414747269, + "grad_norm": 252.33349609375, + "learning_rate": 1.0389407000967186e-05, + "loss": 73.0664, + "step": 173400 + }, + { + "epoch": 0.7005983427400946, + "grad_norm": 1135.59765625, + "learning_rate": 1.0386958108743846e-05, + "loss": 58.8074, + "step": 173410 + }, + { + "epoch": 0.7006387440054622, + "grad_norm": 546.5704345703125, + "learning_rate": 1.0384509403931705e-05, + "loss": 27.7453, + "step": 173420 + }, + { + "epoch": 0.7006791452708299, + "grad_norm": 424.7349548339844, + "learning_rate": 1.0382060886578502e-05, + "loss": 55.1353, + "step": 173430 + }, + { + "epoch": 0.7007195465361975, + "grad_norm": 454.545166015625, + "learning_rate": 1.0379612556731984e-05, + "loss": 52.1983, + "step": 173440 + }, + { + "epoch": 0.7007599478015651, + "grad_norm": 1293.4744873046875, + "learning_rate": 1.0377164414439863e-05, + "loss": 77.2874, + "step": 173450 + }, + { + "epoch": 0.7008003490669328, + "grad_norm": 773.9328002929688, + "learning_rate": 1.0374716459749875e-05, + "loss": 46.054, + "step": 173460 + }, + { + "epoch": 0.7008407503323004, + "grad_norm": 433.6749267578125, + "learning_rate": 1.0372268692709744e-05, + "loss": 109.8178, + "step": 173470 + }, + { + "epoch": 0.7008811515976681, + "grad_norm": 659.573486328125, + "learning_rate": 1.0369821113367196e-05, + "loss": 85.3767, + "step": 173480 + }, + { + "epoch": 0.7009215528630357, + "grad_norm": 575.500732421875, + "learning_rate": 1.0367373721769947e-05, + "loss": 50.6963, + "step": 173490 + }, + { + "epoch": 0.7009619541284033, + "grad_norm": 470.64593505859375, + "learning_rate": 1.0364926517965693e-05, + "loss": 69.1622, + "step": 173500 + }, + { + "epoch": 0.701002355393771, + "grad_norm": 735.223876953125, + "learning_rate": 1.0362479502002172e-05, + "loss": 53.2807, + "step": 173510 + }, + { + "epoch": 0.7010427566591385, + "grad_norm": 514.37451171875, + "learning_rate": 1.0360032673927067e-05, + "loss": 57.0257, + "step": 173520 + }, + { + "epoch": 0.7010831579245062, + "grad_norm": 828.5984497070312, + "learning_rate": 1.0357586033788087e-05, + "loss": 68.0386, + "step": 173530 + }, + { + "epoch": 0.7011235591898738, + "grad_norm": 344.57318115234375, + "learning_rate": 1.035513958163294e-05, + "loss": 45.5269, + "step": 173540 + }, + { + "epoch": 0.7011639604552414, + "grad_norm": 1653.0169677734375, + "learning_rate": 1.0352693317509297e-05, + "loss": 68.3639, + "step": 173550 + }, + { + "epoch": 0.7012043617206091, + "grad_norm": 785.8117065429688, + "learning_rate": 1.0350247241464882e-05, + "loss": 46.2211, + "step": 173560 + }, + { + "epoch": 0.7012447629859767, + "grad_norm": 1267.23095703125, + "learning_rate": 1.0347801353547355e-05, + "loss": 95.4819, + "step": 173570 + }, + { + "epoch": 0.7012851642513444, + "grad_norm": 837.16748046875, + "learning_rate": 1.034535565380441e-05, + "loss": 42.8517, + "step": 173580 + }, + { + "epoch": 0.701325565516712, + "grad_norm": 647.8861083984375, + "learning_rate": 1.0342910142283727e-05, + "loss": 40.9802, + "step": 173590 + }, + { + "epoch": 0.7013659667820796, + "grad_norm": 4475.0498046875, + "learning_rate": 1.0340464819032991e-05, + "loss": 81.5699, + "step": 173600 + }, + { + "epoch": 0.7014063680474473, + "grad_norm": 397.51361083984375, + "learning_rate": 1.0338019684099851e-05, + "loss": 58.2599, + "step": 173610 + }, + { + "epoch": 0.7014467693128149, + "grad_norm": 848.2646484375, + "learning_rate": 1.0335574737532006e-05, + "loss": 48.4357, + "step": 173620 + }, + { + "epoch": 0.7014871705781826, + "grad_norm": 689.1300048828125, + "learning_rate": 1.0333129979377099e-05, + "loss": 65.8668, + "step": 173630 + }, + { + "epoch": 0.7015275718435502, + "grad_norm": 573.7636108398438, + "learning_rate": 1.03306854096828e-05, + "loss": 64.9351, + "step": 173640 + }, + { + "epoch": 0.7015679731089177, + "grad_norm": 940.4178466796875, + "learning_rate": 1.032824102849678e-05, + "loss": 62.0135, + "step": 173650 + }, + { + "epoch": 0.7016083743742854, + "grad_norm": 560.2929077148438, + "learning_rate": 1.0325796835866657e-05, + "loss": 43.0348, + "step": 173660 + }, + { + "epoch": 0.701648775639653, + "grad_norm": 740.8483276367188, + "learning_rate": 1.0323352831840126e-05, + "loss": 70.5928, + "step": 173670 + }, + { + "epoch": 0.7016891769050206, + "grad_norm": 909.7739868164062, + "learning_rate": 1.0320909016464795e-05, + "loss": 58.0062, + "step": 173680 + }, + { + "epoch": 0.7017295781703883, + "grad_norm": 1204.69140625, + "learning_rate": 1.0318465389788344e-05, + "loss": 95.3366, + "step": 173690 + }, + { + "epoch": 0.7017699794357559, + "grad_norm": 667.715576171875, + "learning_rate": 1.0316021951858385e-05, + "loss": 63.6164, + "step": 173700 + }, + { + "epoch": 0.7018103807011236, + "grad_norm": 587.007568359375, + "learning_rate": 1.0313578702722564e-05, + "loss": 74.1333, + "step": 173710 + }, + { + "epoch": 0.7018507819664912, + "grad_norm": 663.80859375, + "learning_rate": 1.0311135642428523e-05, + "loss": 57.9142, + "step": 173720 + }, + { + "epoch": 0.7018911832318588, + "grad_norm": 751.5151977539062, + "learning_rate": 1.0308692771023871e-05, + "loss": 62.1679, + "step": 173730 + }, + { + "epoch": 0.7019315844972265, + "grad_norm": 464.1845397949219, + "learning_rate": 1.0306250088556244e-05, + "loss": 60.3734, + "step": 173740 + }, + { + "epoch": 0.7019719857625941, + "grad_norm": 828.8981323242188, + "learning_rate": 1.0303807595073262e-05, + "loss": 63.3028, + "step": 173750 + }, + { + "epoch": 0.7020123870279618, + "grad_norm": 905.5265502929688, + "learning_rate": 1.0301365290622542e-05, + "loss": 98.0902, + "step": 173760 + }, + { + "epoch": 0.7020527882933294, + "grad_norm": 812.924072265625, + "learning_rate": 1.0298923175251698e-05, + "loss": 50.5255, + "step": 173770 + }, + { + "epoch": 0.7020931895586969, + "grad_norm": 865.485107421875, + "learning_rate": 1.029648124900835e-05, + "loss": 87.3021, + "step": 173780 + }, + { + "epoch": 0.7021335908240646, + "grad_norm": 489.7832336425781, + "learning_rate": 1.0294039511940088e-05, + "loss": 104.5911, + "step": 173790 + }, + { + "epoch": 0.7021739920894322, + "grad_norm": 1124.377197265625, + "learning_rate": 1.0291597964094522e-05, + "loss": 68.2681, + "step": 173800 + }, + { + "epoch": 0.7022143933547998, + "grad_norm": 468.07720947265625, + "learning_rate": 1.0289156605519262e-05, + "loss": 67.2632, + "step": 173810 + }, + { + "epoch": 0.7022547946201675, + "grad_norm": 1695.2227783203125, + "learning_rate": 1.0286715436261875e-05, + "loss": 103.0294, + "step": 173820 + }, + { + "epoch": 0.7022951958855351, + "grad_norm": 677.1105346679688, + "learning_rate": 1.0284274456369988e-05, + "loss": 69.5366, + "step": 173830 + }, + { + "epoch": 0.7023355971509028, + "grad_norm": 427.6378479003906, + "learning_rate": 1.0281833665891159e-05, + "loss": 101.0897, + "step": 173840 + }, + { + "epoch": 0.7023759984162704, + "grad_norm": 751.6654663085938, + "learning_rate": 1.0279393064872998e-05, + "loss": 57.3889, + "step": 173850 + }, + { + "epoch": 0.702416399681638, + "grad_norm": 573.70068359375, + "learning_rate": 1.0276952653363069e-05, + "loss": 52.2434, + "step": 173860 + }, + { + "epoch": 0.7024568009470057, + "grad_norm": 1096.41748046875, + "learning_rate": 1.0274512431408951e-05, + "loss": 76.8107, + "step": 173870 + }, + { + "epoch": 0.7024972022123733, + "grad_norm": 855.4866943359375, + "learning_rate": 1.027207239905823e-05, + "loss": 83.095, + "step": 173880 + }, + { + "epoch": 0.702537603477741, + "grad_norm": 358.9162902832031, + "learning_rate": 1.0269632556358458e-05, + "loss": 65.6442, + "step": 173890 + }, + { + "epoch": 0.7025780047431086, + "grad_norm": 1070.508056640625, + "learning_rate": 1.0267192903357206e-05, + "loss": 60.7461, + "step": 173900 + }, + { + "epoch": 0.7026184060084761, + "grad_norm": 578.6129760742188, + "learning_rate": 1.0264753440102043e-05, + "loss": 39.4229, + "step": 173910 + }, + { + "epoch": 0.7026588072738438, + "grad_norm": 379.7270812988281, + "learning_rate": 1.0262314166640519e-05, + "loss": 69.3569, + "step": 173920 + }, + { + "epoch": 0.7026992085392114, + "grad_norm": 1822.7935791015625, + "learning_rate": 1.02598750830202e-05, + "loss": 81.4621, + "step": 173930 + }, + { + "epoch": 0.7027396098045791, + "grad_norm": 421.0077819824219, + "learning_rate": 1.0257436189288633e-05, + "loss": 53.2823, + "step": 173940 + }, + { + "epoch": 0.7027800110699467, + "grad_norm": 809.2689819335938, + "learning_rate": 1.025499748549335e-05, + "loss": 76.992, + "step": 173950 + }, + { + "epoch": 0.7028204123353143, + "grad_norm": 262.55413818359375, + "learning_rate": 1.0252558971681924e-05, + "loss": 48.4548, + "step": 173960 + }, + { + "epoch": 0.702860813600682, + "grad_norm": 1709.0987548828125, + "learning_rate": 1.0250120647901871e-05, + "loss": 93.2508, + "step": 173970 + }, + { + "epoch": 0.7029012148660496, + "grad_norm": 753.3961181640625, + "learning_rate": 1.0247682514200737e-05, + "loss": 64.4457, + "step": 173980 + }, + { + "epoch": 0.7029416161314173, + "grad_norm": 922.5034790039062, + "learning_rate": 1.0245244570626058e-05, + "loss": 81.0281, + "step": 173990 + }, + { + "epoch": 0.7029820173967849, + "grad_norm": 1176.545166015625, + "learning_rate": 1.0242806817225344e-05, + "loss": 60.8465, + "step": 174000 + }, + { + "epoch": 0.7030224186621525, + "grad_norm": 900.397216796875, + "learning_rate": 1.024036925404615e-05, + "loss": 85.0663, + "step": 174010 + }, + { + "epoch": 0.7030628199275202, + "grad_norm": 514.3111572265625, + "learning_rate": 1.0237931881135975e-05, + "loss": 68.5004, + "step": 174020 + }, + { + "epoch": 0.7031032211928877, + "grad_norm": 612.28466796875, + "learning_rate": 1.0235494698542343e-05, + "loss": 55.3585, + "step": 174030 + }, + { + "epoch": 0.7031436224582553, + "grad_norm": 1206.6029052734375, + "learning_rate": 1.0233057706312768e-05, + "loss": 91.0799, + "step": 174040 + }, + { + "epoch": 0.703184023723623, + "grad_norm": 839.1258544921875, + "learning_rate": 1.0230620904494773e-05, + "loss": 78.2782, + "step": 174050 + }, + { + "epoch": 0.7032244249889906, + "grad_norm": 1032.465087890625, + "learning_rate": 1.022818429313584e-05, + "loss": 95.1569, + "step": 174060 + }, + { + "epoch": 0.7032648262543583, + "grad_norm": 286.5279235839844, + "learning_rate": 1.0225747872283489e-05, + "loss": 82.0288, + "step": 174070 + }, + { + "epoch": 0.7033052275197259, + "grad_norm": 733.3411254882812, + "learning_rate": 1.0223311641985215e-05, + "loss": 68.3364, + "step": 174080 + }, + { + "epoch": 0.7033456287850935, + "grad_norm": 459.5732421875, + "learning_rate": 1.0220875602288512e-05, + "loss": 46.1305, + "step": 174090 + }, + { + "epoch": 0.7033860300504612, + "grad_norm": 2735.070556640625, + "learning_rate": 1.0218439753240883e-05, + "loss": 61.6727, + "step": 174100 + }, + { + "epoch": 0.7034264313158288, + "grad_norm": 326.7603759765625, + "learning_rate": 1.021600409488979e-05, + "loss": 48.7877, + "step": 174110 + }, + { + "epoch": 0.7034668325811965, + "grad_norm": 479.45245361328125, + "learning_rate": 1.021356862728275e-05, + "loss": 55.4897, + "step": 174120 + }, + { + "epoch": 0.7035072338465641, + "grad_norm": 650.5347290039062, + "learning_rate": 1.0211133350467222e-05, + "loss": 43.852, + "step": 174130 + }, + { + "epoch": 0.7035476351119317, + "grad_norm": 1289.1171875, + "learning_rate": 1.0208698264490687e-05, + "loss": 72.0771, + "step": 174140 + }, + { + "epoch": 0.7035880363772994, + "grad_norm": 757.3626098632812, + "learning_rate": 1.020626336940063e-05, + "loss": 55.3567, + "step": 174150 + }, + { + "epoch": 0.7036284376426669, + "grad_norm": 783.6510620117188, + "learning_rate": 1.0203828665244493e-05, + "loss": 59.816, + "step": 174160 + }, + { + "epoch": 0.7036688389080346, + "grad_norm": 617.8003540039062, + "learning_rate": 1.0201394152069777e-05, + "loss": 62.7641, + "step": 174170 + }, + { + "epoch": 0.7037092401734022, + "grad_norm": 473.8866882324219, + "learning_rate": 1.0198959829923916e-05, + "loss": 74.1961, + "step": 174180 + }, + { + "epoch": 0.7037496414387698, + "grad_norm": 548.7943725585938, + "learning_rate": 1.0196525698854382e-05, + "loss": 58.4867, + "step": 174190 + }, + { + "epoch": 0.7037900427041375, + "grad_norm": 881.4559936523438, + "learning_rate": 1.0194091758908627e-05, + "loss": 93.7212, + "step": 174200 + }, + { + "epoch": 0.7038304439695051, + "grad_norm": 712.2219848632812, + "learning_rate": 1.01916580101341e-05, + "loss": 85.7104, + "step": 174210 + }, + { + "epoch": 0.7038708452348728, + "grad_norm": 1000.4982299804688, + "learning_rate": 1.0189224452578257e-05, + "loss": 75.4279, + "step": 174220 + }, + { + "epoch": 0.7039112465002404, + "grad_norm": 318.2071838378906, + "learning_rate": 1.0186791086288528e-05, + "loss": 83.0412, + "step": 174230 + }, + { + "epoch": 0.703951647765608, + "grad_norm": 920.510009765625, + "learning_rate": 1.018435791131236e-05, + "loss": 83.8872, + "step": 174240 + }, + { + "epoch": 0.7039920490309757, + "grad_norm": 501.2214660644531, + "learning_rate": 1.0181924927697185e-05, + "loss": 140.1871, + "step": 174250 + }, + { + "epoch": 0.7040324502963433, + "grad_norm": 612.39794921875, + "learning_rate": 1.0179492135490444e-05, + "loss": 72.1299, + "step": 174260 + }, + { + "epoch": 0.704072851561711, + "grad_norm": 593.0027465820312, + "learning_rate": 1.017705953473955e-05, + "loss": 66.1959, + "step": 174270 + }, + { + "epoch": 0.7041132528270786, + "grad_norm": 896.54443359375, + "learning_rate": 1.017462712549195e-05, + "loss": 45.1586, + "step": 174280 + }, + { + "epoch": 0.7041536540924461, + "grad_norm": 693.3680419921875, + "learning_rate": 1.017219490779504e-05, + "loss": 57.4797, + "step": 174290 + }, + { + "epoch": 0.7041940553578138, + "grad_norm": 788.0621948242188, + "learning_rate": 1.0169762881696261e-05, + "loss": 81.3068, + "step": 174300 + }, + { + "epoch": 0.7042344566231814, + "grad_norm": 394.5865173339844, + "learning_rate": 1.0167331047243012e-05, + "loss": 70.5919, + "step": 174310 + }, + { + "epoch": 0.704274857888549, + "grad_norm": 651.2617797851562, + "learning_rate": 1.0164899404482704e-05, + "loss": 56.5097, + "step": 174320 + }, + { + "epoch": 0.7043152591539167, + "grad_norm": 1161.2529296875, + "learning_rate": 1.0162467953462753e-05, + "loss": 64.6826, + "step": 174330 + }, + { + "epoch": 0.7043556604192843, + "grad_norm": 1017.0380859375, + "learning_rate": 1.0160036694230548e-05, + "loss": 47.1102, + "step": 174340 + }, + { + "epoch": 0.704396061684652, + "grad_norm": 1204.7369384765625, + "learning_rate": 1.0157605626833491e-05, + "loss": 66.3066, + "step": 174350 + }, + { + "epoch": 0.7044364629500196, + "grad_norm": 968.8399658203125, + "learning_rate": 1.0155174751318981e-05, + "loss": 90.4313, + "step": 174360 + }, + { + "epoch": 0.7044768642153872, + "grad_norm": 865.56591796875, + "learning_rate": 1.0152744067734408e-05, + "loss": 79.8717, + "step": 174370 + }, + { + "epoch": 0.7045172654807549, + "grad_norm": 1619.8673095703125, + "learning_rate": 1.015031357612716e-05, + "loss": 84.5769, + "step": 174380 + }, + { + "epoch": 0.7045576667461225, + "grad_norm": 583.5523071289062, + "learning_rate": 1.0147883276544626e-05, + "loss": 51.3674, + "step": 174390 + }, + { + "epoch": 0.7045980680114902, + "grad_norm": 712.704833984375, + "learning_rate": 1.0145453169034172e-05, + "loss": 108.8797, + "step": 174400 + }, + { + "epoch": 0.7046384692768578, + "grad_norm": 611.5519409179688, + "learning_rate": 1.0143023253643183e-05, + "loss": 60.0839, + "step": 174410 + }, + { + "epoch": 0.7046788705422253, + "grad_norm": 441.3805847167969, + "learning_rate": 1.0140593530419038e-05, + "loss": 64.1282, + "step": 174420 + }, + { + "epoch": 0.704719271807593, + "grad_norm": 543.6461181640625, + "learning_rate": 1.0138163999409081e-05, + "loss": 66.5415, + "step": 174430 + }, + { + "epoch": 0.7047596730729606, + "grad_norm": 724.8994750976562, + "learning_rate": 1.0135734660660711e-05, + "loss": 74.3618, + "step": 174440 + }, + { + "epoch": 0.7048000743383283, + "grad_norm": 715.3988647460938, + "learning_rate": 1.0133305514221256e-05, + "loss": 84.5824, + "step": 174450 + }, + { + "epoch": 0.7048404756036959, + "grad_norm": 737.0521240234375, + "learning_rate": 1.0130876560138105e-05, + "loss": 54.8389, + "step": 174460 + }, + { + "epoch": 0.7048808768690635, + "grad_norm": 714.4993896484375, + "learning_rate": 1.0128447798458589e-05, + "loss": 84.8171, + "step": 174470 + }, + { + "epoch": 0.7049212781344312, + "grad_norm": 677.4454345703125, + "learning_rate": 1.0126019229230066e-05, + "loss": 83.7663, + "step": 174480 + }, + { + "epoch": 0.7049616793997988, + "grad_norm": 994.8671875, + "learning_rate": 1.012359085249989e-05, + "loss": 74.9495, + "step": 174490 + }, + { + "epoch": 0.7050020806651665, + "grad_norm": 307.2357177734375, + "learning_rate": 1.0121162668315385e-05, + "loss": 69.4571, + "step": 174500 + }, + { + "epoch": 0.7050424819305341, + "grad_norm": 630.3380737304688, + "learning_rate": 1.0118734676723903e-05, + "loss": 62.263, + "step": 174510 + }, + { + "epoch": 0.7050828831959017, + "grad_norm": 1197.194580078125, + "learning_rate": 1.0116306877772772e-05, + "loss": 49.7339, + "step": 174520 + }, + { + "epoch": 0.7051232844612694, + "grad_norm": 1045.4124755859375, + "learning_rate": 1.011387927150933e-05, + "loss": 69.9549, + "step": 174530 + }, + { + "epoch": 0.705163685726637, + "grad_norm": 938.6143188476562, + "learning_rate": 1.0111451857980898e-05, + "loss": 81.3826, + "step": 174540 + }, + { + "epoch": 0.7052040869920045, + "grad_norm": 306.8988952636719, + "learning_rate": 1.0109024637234812e-05, + "loss": 65.0736, + "step": 174550 + }, + { + "epoch": 0.7052444882573722, + "grad_norm": 265.95452880859375, + "learning_rate": 1.0106597609318376e-05, + "loss": 99.7346, + "step": 174560 + }, + { + "epoch": 0.7052848895227398, + "grad_norm": 465.24200439453125, + "learning_rate": 1.0104170774278914e-05, + "loss": 76.0752, + "step": 174570 + }, + { + "epoch": 0.7053252907881075, + "grad_norm": 590.0308837890625, + "learning_rate": 1.0101744132163737e-05, + "loss": 59.5508, + "step": 174580 + }, + { + "epoch": 0.7053656920534751, + "grad_norm": 1231.116455078125, + "learning_rate": 1.0099317683020153e-05, + "loss": 70.4236, + "step": 174590 + }, + { + "epoch": 0.7054060933188427, + "grad_norm": 532.1766967773438, + "learning_rate": 1.0096891426895476e-05, + "loss": 41.6031, + "step": 174600 + }, + { + "epoch": 0.7054464945842104, + "grad_norm": 296.2389831542969, + "learning_rate": 1.0094465363836986e-05, + "loss": 75.4278, + "step": 174610 + }, + { + "epoch": 0.705486895849578, + "grad_norm": 671.1690063476562, + "learning_rate": 1.0092039493892005e-05, + "loss": 50.0217, + "step": 174620 + }, + { + "epoch": 0.7055272971149457, + "grad_norm": 734.7295532226562, + "learning_rate": 1.0089613817107808e-05, + "loss": 66.087, + "step": 174630 + }, + { + "epoch": 0.7055676983803133, + "grad_norm": 1226.897705078125, + "learning_rate": 1.0087188333531692e-05, + "loss": 74.0554, + "step": 174640 + }, + { + "epoch": 0.7056080996456809, + "grad_norm": 868.8613891601562, + "learning_rate": 1.0084763043210946e-05, + "loss": 91.7178, + "step": 174650 + }, + { + "epoch": 0.7056485009110486, + "grad_norm": 439.9600524902344, + "learning_rate": 1.0082337946192846e-05, + "loss": 54.4037, + "step": 174660 + }, + { + "epoch": 0.7056889021764161, + "grad_norm": 1016.7705688476562, + "learning_rate": 1.0079913042524683e-05, + "loss": 115.6719, + "step": 174670 + }, + { + "epoch": 0.7057293034417838, + "grad_norm": 615.3187866210938, + "learning_rate": 1.0077488332253714e-05, + "loss": 67.2632, + "step": 174680 + }, + { + "epoch": 0.7057697047071514, + "grad_norm": 446.0236511230469, + "learning_rate": 1.0075063815427217e-05, + "loss": 47.2394, + "step": 174690 + }, + { + "epoch": 0.705810105972519, + "grad_norm": 946.7235107421875, + "learning_rate": 1.0072639492092463e-05, + "loss": 53.2771, + "step": 174700 + }, + { + "epoch": 0.7058505072378867, + "grad_norm": 1305.1824951171875, + "learning_rate": 1.007021536229672e-05, + "loss": 79.2216, + "step": 174710 + }, + { + "epoch": 0.7058909085032543, + "grad_norm": 623.8622436523438, + "learning_rate": 1.0067791426087226e-05, + "loss": 81.2861, + "step": 174720 + }, + { + "epoch": 0.705931309768622, + "grad_norm": 651.63623046875, + "learning_rate": 1.0065367683511265e-05, + "loss": 54.9706, + "step": 174730 + }, + { + "epoch": 0.7059717110339896, + "grad_norm": 586.8106079101562, + "learning_rate": 1.006294413461607e-05, + "loss": 81.0045, + "step": 174740 + }, + { + "epoch": 0.7060121122993572, + "grad_norm": 812.7340698242188, + "learning_rate": 1.0060520779448894e-05, + "loss": 57.3844, + "step": 174750 + }, + { + "epoch": 0.7060525135647249, + "grad_norm": 265.2034912109375, + "learning_rate": 1.0058097618056992e-05, + "loss": 45.5918, + "step": 174760 + }, + { + "epoch": 0.7060929148300925, + "grad_norm": 468.6656494140625, + "learning_rate": 1.0055674650487576e-05, + "loss": 56.8827, + "step": 174770 + }, + { + "epoch": 0.7061333160954602, + "grad_norm": 647.6321411132812, + "learning_rate": 1.0053251876787922e-05, + "loss": 59.9765, + "step": 174780 + }, + { + "epoch": 0.7061737173608278, + "grad_norm": 596.6109008789062, + "learning_rate": 1.0050829297005233e-05, + "loss": 110.7099, + "step": 174790 + }, + { + "epoch": 0.7062141186261953, + "grad_norm": 122.8767318725586, + "learning_rate": 1.004840691118675e-05, + "loss": 70.8703, + "step": 174800 + }, + { + "epoch": 0.706254519891563, + "grad_norm": 593.3654174804688, + "learning_rate": 1.0045984719379698e-05, + "loss": 46.5643, + "step": 174810 + }, + { + "epoch": 0.7062949211569306, + "grad_norm": 729.160888671875, + "learning_rate": 1.00435627216313e-05, + "loss": 86.8706, + "step": 174820 + }, + { + "epoch": 0.7063353224222982, + "grad_norm": 545.685546875, + "learning_rate": 1.004114091798878e-05, + "loss": 56.2661, + "step": 174830 + }, + { + "epoch": 0.7063757236876659, + "grad_norm": 760.6835327148438, + "learning_rate": 1.0038719308499335e-05, + "loss": 57.8207, + "step": 174840 + }, + { + "epoch": 0.7064161249530335, + "grad_norm": 410.64239501953125, + "learning_rate": 1.0036297893210184e-05, + "loss": 42.3584, + "step": 174850 + }, + { + "epoch": 0.7064565262184012, + "grad_norm": 461.0585632324219, + "learning_rate": 1.0033876672168538e-05, + "loss": 75.9668, + "step": 174860 + }, + { + "epoch": 0.7064969274837688, + "grad_norm": 569.5731201171875, + "learning_rate": 1.0031455645421607e-05, + "loss": 47.435, + "step": 174870 + }, + { + "epoch": 0.7065373287491364, + "grad_norm": 920.7491455078125, + "learning_rate": 1.0029034813016563e-05, + "loss": 108.039, + "step": 174880 + }, + { + "epoch": 0.7065777300145041, + "grad_norm": 570.5654907226562, + "learning_rate": 1.0026614175000634e-05, + "loss": 67.8055, + "step": 174890 + }, + { + "epoch": 0.7066181312798717, + "grad_norm": 1593.2763671875, + "learning_rate": 1.0024193731420989e-05, + "loss": 68.5977, + "step": 174900 + }, + { + "epoch": 0.7066585325452394, + "grad_norm": 1173.8751220703125, + "learning_rate": 1.0021773482324822e-05, + "loss": 62.6037, + "step": 174910 + }, + { + "epoch": 0.706698933810607, + "grad_norm": 935.0791015625, + "learning_rate": 1.0019353427759317e-05, + "loss": 82.2353, + "step": 174920 + }, + { + "epoch": 0.7067393350759745, + "grad_norm": 934.474853515625, + "learning_rate": 1.0016933567771656e-05, + "loss": 80.0569, + "step": 174930 + }, + { + "epoch": 0.7067797363413422, + "grad_norm": 765.419189453125, + "learning_rate": 1.0014513902409025e-05, + "loss": 57.9742, + "step": 174940 + }, + { + "epoch": 0.7068201376067098, + "grad_norm": 549.7628173828125, + "learning_rate": 1.0012094431718577e-05, + "loss": 67.3954, + "step": 174950 + }, + { + "epoch": 0.7068605388720774, + "grad_norm": 232.22938537597656, + "learning_rate": 1.0009675155747489e-05, + "loss": 51.6529, + "step": 174960 + }, + { + "epoch": 0.7069009401374451, + "grad_norm": 810.7760620117188, + "learning_rate": 1.0007256074542927e-05, + "loss": 64.5349, + "step": 174970 + }, + { + "epoch": 0.7069413414028127, + "grad_norm": 676.5203857421875, + "learning_rate": 1.0004837188152056e-05, + "loss": 66.7751, + "step": 174980 + }, + { + "epoch": 0.7069817426681804, + "grad_norm": 810.6488037109375, + "learning_rate": 1.0002418496622029e-05, + "loss": 39.3295, + "step": 174990 + }, + { + "epoch": 0.707022143933548, + "grad_norm": 923.7711181640625, + "learning_rate": 1.0000000000000006e-05, + "loss": 105.5032, + "step": 175000 + }, + { + "epoch": 0.7070625451989156, + "grad_norm": 721.304443359375, + "learning_rate": 9.997581698333124e-06, + "loss": 58.3973, + "step": 175010 + }, + { + "epoch": 0.7071029464642833, + "grad_norm": 678.7724609375, + "learning_rate": 9.995163591668535e-06, + "loss": 97.0357, + "step": 175020 + }, + { + "epoch": 0.7071433477296509, + "grad_norm": 1115.90673828125, + "learning_rate": 9.992745680053393e-06, + "loss": 75.4986, + "step": 175030 + }, + { + "epoch": 0.7071837489950186, + "grad_norm": 204.11740112304688, + "learning_rate": 9.99032796353481e-06, + "loss": 64.1877, + "step": 175040 + }, + { + "epoch": 0.7072241502603862, + "grad_norm": 627.7744140625, + "learning_rate": 9.987910442159952e-06, + "loss": 80.1522, + "step": 175050 + }, + { + "epoch": 0.7072645515257537, + "grad_norm": 484.9147033691406, + "learning_rate": 9.98549311597592e-06, + "loss": 111.8285, + "step": 175060 + }, + { + "epoch": 0.7073049527911214, + "grad_norm": 2146.079833984375, + "learning_rate": 9.983075985029869e-06, + "loss": 111.8794, + "step": 175070 + }, + { + "epoch": 0.707345354056489, + "grad_norm": 381.7259521484375, + "learning_rate": 9.980659049368902e-06, + "loss": 77.9763, + "step": 175080 + }, + { + "epoch": 0.7073857553218567, + "grad_norm": 918.2215576171875, + "learning_rate": 9.978242309040146e-06, + "loss": 64.8392, + "step": 175090 + }, + { + "epoch": 0.7074261565872243, + "grad_norm": 585.0704345703125, + "learning_rate": 9.97582576409072e-06, + "loss": 63.4779, + "step": 175100 + }, + { + "epoch": 0.7074665578525919, + "grad_norm": 402.9342041015625, + "learning_rate": 9.973409414567728e-06, + "loss": 91.8981, + "step": 175110 + }, + { + "epoch": 0.7075069591179596, + "grad_norm": 480.516357421875, + "learning_rate": 9.97099326051828e-06, + "loss": 44.8011, + "step": 175120 + }, + { + "epoch": 0.7075473603833272, + "grad_norm": 490.1944274902344, + "learning_rate": 9.968577301989482e-06, + "loss": 41.6644, + "step": 175130 + }, + { + "epoch": 0.7075877616486949, + "grad_norm": 861.2775268554688, + "learning_rate": 9.966161539028433e-06, + "loss": 59.0499, + "step": 175140 + }, + { + "epoch": 0.7076281629140625, + "grad_norm": 241.42832946777344, + "learning_rate": 9.96374597168223e-06, + "loss": 52.5384, + "step": 175150 + }, + { + "epoch": 0.7076685641794301, + "grad_norm": 891.005126953125, + "learning_rate": 9.961330599997976e-06, + "loss": 86.8153, + "step": 175160 + }, + { + "epoch": 0.7077089654447978, + "grad_norm": 824.2645874023438, + "learning_rate": 9.958915424022742e-06, + "loss": 63.4667, + "step": 175170 + }, + { + "epoch": 0.7077493667101654, + "grad_norm": 631.36279296875, + "learning_rate": 9.956500443803621e-06, + "loss": 83.0433, + "step": 175180 + }, + { + "epoch": 0.707789767975533, + "grad_norm": 338.26983642578125, + "learning_rate": 9.954085659387693e-06, + "loss": 47.2102, + "step": 175190 + }, + { + "epoch": 0.7078301692409006, + "grad_norm": 1739.5494384765625, + "learning_rate": 9.95167107082204e-06, + "loss": 78.4122, + "step": 175200 + }, + { + "epoch": 0.7078705705062682, + "grad_norm": 1107.9295654296875, + "learning_rate": 9.949256678153739e-06, + "loss": 80.2144, + "step": 175210 + }, + { + "epoch": 0.7079109717716359, + "grad_norm": 555.0942993164062, + "learning_rate": 9.946842481429837e-06, + "loss": 45.0919, + "step": 175220 + }, + { + "epoch": 0.7079513730370035, + "grad_norm": 751.4346923828125, + "learning_rate": 9.944428480697434e-06, + "loss": 46.6399, + "step": 175230 + }, + { + "epoch": 0.7079917743023711, + "grad_norm": 644.6951904296875, + "learning_rate": 9.942014676003567e-06, + "loss": 46.4817, + "step": 175240 + }, + { + "epoch": 0.7080321755677388, + "grad_norm": 438.3336486816406, + "learning_rate": 9.9396010673953e-06, + "loss": 68.2384, + "step": 175250 + }, + { + "epoch": 0.7080725768331064, + "grad_norm": 618.3799438476562, + "learning_rate": 9.937187654919691e-06, + "loss": 49.2702, + "step": 175260 + }, + { + "epoch": 0.7081129780984741, + "grad_norm": 384.3260192871094, + "learning_rate": 9.934774438623788e-06, + "loss": 46.2385, + "step": 175270 + }, + { + "epoch": 0.7081533793638417, + "grad_norm": 564.835693359375, + "learning_rate": 9.932361418554648e-06, + "loss": 82.1475, + "step": 175280 + }, + { + "epoch": 0.7081937806292093, + "grad_norm": 709.8888549804688, + "learning_rate": 9.929948594759298e-06, + "loss": 58.5317, + "step": 175290 + }, + { + "epoch": 0.708234181894577, + "grad_norm": 631.4082641601562, + "learning_rate": 9.927535967284785e-06, + "loss": 100.3214, + "step": 175300 + }, + { + "epoch": 0.7082745831599445, + "grad_norm": 307.2262268066406, + "learning_rate": 9.925123536178143e-06, + "loss": 51.3848, + "step": 175310 + }, + { + "epoch": 0.7083149844253122, + "grad_norm": 660.6044921875, + "learning_rate": 9.922711301486411e-06, + "loss": 89.1257, + "step": 175320 + }, + { + "epoch": 0.7083553856906798, + "grad_norm": 947.41015625, + "learning_rate": 9.9202992632566e-06, + "loss": 76.8756, + "step": 175330 + }, + { + "epoch": 0.7083957869560474, + "grad_norm": 716.44775390625, + "learning_rate": 9.917887421535755e-06, + "loss": 82.5583, + "step": 175340 + }, + { + "epoch": 0.7084361882214151, + "grad_norm": 816.8583984375, + "learning_rate": 9.915475776370877e-06, + "loss": 66.0616, + "step": 175350 + }, + { + "epoch": 0.7084765894867827, + "grad_norm": 1519.1053466796875, + "learning_rate": 9.913064327808993e-06, + "loss": 69.485, + "step": 175360 + }, + { + "epoch": 0.7085169907521504, + "grad_norm": 820.0328979492188, + "learning_rate": 9.910653075897121e-06, + "loss": 51.6841, + "step": 175370 + }, + { + "epoch": 0.708557392017518, + "grad_norm": 1675.81201171875, + "learning_rate": 9.908242020682243e-06, + "loss": 107.4479, + "step": 175380 + }, + { + "epoch": 0.7085977932828856, + "grad_norm": 940.8255615234375, + "learning_rate": 9.905831162211401e-06, + "loss": 78.0673, + "step": 175390 + }, + { + "epoch": 0.7086381945482533, + "grad_norm": 928.2705078125, + "learning_rate": 9.90342050053157e-06, + "loss": 76.151, + "step": 175400 + }, + { + "epoch": 0.7086785958136209, + "grad_norm": 578.9796752929688, + "learning_rate": 9.901010035689751e-06, + "loss": 61.5812, + "step": 175410 + }, + { + "epoch": 0.7087189970789886, + "grad_norm": 688.26953125, + "learning_rate": 9.898599767732944e-06, + "loss": 51.2358, + "step": 175420 + }, + { + "epoch": 0.7087593983443562, + "grad_norm": 934.5057983398438, + "learning_rate": 9.896189696708133e-06, + "loss": 64.2849, + "step": 175430 + }, + { + "epoch": 0.7087997996097237, + "grad_norm": 578.05322265625, + "learning_rate": 9.893779822662314e-06, + "loss": 53.198, + "step": 175440 + }, + { + "epoch": 0.7088402008750914, + "grad_norm": 1121.799072265625, + "learning_rate": 9.891370145642454e-06, + "loss": 66.8121, + "step": 175450 + }, + { + "epoch": 0.708880602140459, + "grad_norm": 374.0003662109375, + "learning_rate": 9.888960665695535e-06, + "loss": 45.148, + "step": 175460 + }, + { + "epoch": 0.7089210034058266, + "grad_norm": 567.3477783203125, + "learning_rate": 9.886551382868535e-06, + "loss": 50.3106, + "step": 175470 + }, + { + "epoch": 0.7089614046711943, + "grad_norm": 310.883056640625, + "learning_rate": 9.88414229720843e-06, + "loss": 51.4552, + "step": 175480 + }, + { + "epoch": 0.7090018059365619, + "grad_norm": 353.7326965332031, + "learning_rate": 9.881733408762163e-06, + "loss": 61.9096, + "step": 175490 + }, + { + "epoch": 0.7090422072019296, + "grad_norm": 394.66766357421875, + "learning_rate": 9.879324717576729e-06, + "loss": 82.7891, + "step": 175500 + }, + { + "epoch": 0.7090826084672972, + "grad_norm": 1240.7437744140625, + "learning_rate": 9.876916223699061e-06, + "loss": 58.7884, + "step": 175510 + }, + { + "epoch": 0.7091230097326648, + "grad_norm": 369.1908874511719, + "learning_rate": 9.874507927176122e-06, + "loss": 53.1191, + "step": 175520 + }, + { + "epoch": 0.7091634109980325, + "grad_norm": 379.8385314941406, + "learning_rate": 9.872099828054866e-06, + "loss": 82.629, + "step": 175530 + }, + { + "epoch": 0.7092038122634001, + "grad_norm": 380.65191650390625, + "learning_rate": 9.869691926382238e-06, + "loss": 60.0182, + "step": 175540 + }, + { + "epoch": 0.7092442135287678, + "grad_norm": 1609.0286865234375, + "learning_rate": 9.86728422220519e-06, + "loss": 97.1612, + "step": 175550 + }, + { + "epoch": 0.7092846147941354, + "grad_norm": 1329.3541259765625, + "learning_rate": 9.86487671557064e-06, + "loss": 53.2065, + "step": 175560 + }, + { + "epoch": 0.7093250160595029, + "grad_norm": 798.9496459960938, + "learning_rate": 9.862469406525541e-06, + "loss": 58.556, + "step": 175570 + }, + { + "epoch": 0.7093654173248706, + "grad_norm": 1586.617919921875, + "learning_rate": 9.86006229511682e-06, + "loss": 90.5476, + "step": 175580 + }, + { + "epoch": 0.7094058185902382, + "grad_norm": 821.9381103515625, + "learning_rate": 9.857655381391402e-06, + "loss": 50.0883, + "step": 175590 + }, + { + "epoch": 0.7094462198556059, + "grad_norm": 609.460693359375, + "learning_rate": 9.855248665396218e-06, + "loss": 81.7901, + "step": 175600 + }, + { + "epoch": 0.7094866211209735, + "grad_norm": 939.080322265625, + "learning_rate": 9.852842147178188e-06, + "loss": 52.0923, + "step": 175610 + }, + { + "epoch": 0.7095270223863411, + "grad_norm": 523.3330078125, + "learning_rate": 9.850435826784219e-06, + "loss": 59.911, + "step": 175620 + }, + { + "epoch": 0.7095674236517088, + "grad_norm": 745.2606201171875, + "learning_rate": 9.848029704261228e-06, + "loss": 52.4235, + "step": 175630 + }, + { + "epoch": 0.7096078249170764, + "grad_norm": 970.3857421875, + "learning_rate": 9.845623779656132e-06, + "loss": 73.1751, + "step": 175640 + }, + { + "epoch": 0.709648226182444, + "grad_norm": 819.71533203125, + "learning_rate": 9.843218053015814e-06, + "loss": 65.952, + "step": 175650 + }, + { + "epoch": 0.7096886274478117, + "grad_norm": 1096.90234375, + "learning_rate": 9.840812524387205e-06, + "loss": 87.4375, + "step": 175660 + }, + { + "epoch": 0.7097290287131793, + "grad_norm": 677.1637573242188, + "learning_rate": 9.838407193817167e-06, + "loss": 89.0742, + "step": 175670 + }, + { + "epoch": 0.709769429978547, + "grad_norm": 320.3072814941406, + "learning_rate": 9.836002061352632e-06, + "loss": 82.0809, + "step": 175680 + }, + { + "epoch": 0.7098098312439146, + "grad_norm": 621.2733764648438, + "learning_rate": 9.83359712704046e-06, + "loss": 58.7607, + "step": 175690 + }, + { + "epoch": 0.7098502325092821, + "grad_norm": 567.6275634765625, + "learning_rate": 9.831192390927547e-06, + "loss": 72.7008, + "step": 175700 + }, + { + "epoch": 0.7098906337746498, + "grad_norm": 590.5513305664062, + "learning_rate": 9.82878785306078e-06, + "loss": 58.4705, + "step": 175710 + }, + { + "epoch": 0.7099310350400174, + "grad_norm": 463.1714172363281, + "learning_rate": 9.826383513487014e-06, + "loss": 74.7691, + "step": 175720 + }, + { + "epoch": 0.7099714363053851, + "grad_norm": 638.1265869140625, + "learning_rate": 9.823979372253156e-06, + "loss": 58.1646, + "step": 175730 + }, + { + "epoch": 0.7100118375707527, + "grad_norm": 767.8487548828125, + "learning_rate": 9.821575429406052e-06, + "loss": 83.9523, + "step": 175740 + }, + { + "epoch": 0.7100522388361203, + "grad_norm": 1002.283203125, + "learning_rate": 9.819171684992575e-06, + "loss": 93.4922, + "step": 175750 + }, + { + "epoch": 0.710092640101488, + "grad_norm": 474.8342590332031, + "learning_rate": 9.816768139059587e-06, + "loss": 91.1698, + "step": 175760 + }, + { + "epoch": 0.7101330413668556, + "grad_norm": 808.0189208984375, + "learning_rate": 9.814364791653954e-06, + "loss": 79.6291, + "step": 175770 + }, + { + "epoch": 0.7101734426322233, + "grad_norm": 1418.9228515625, + "learning_rate": 9.811961642822516e-06, + "loss": 71.1894, + "step": 175780 + }, + { + "epoch": 0.7102138438975909, + "grad_norm": 652.4476318359375, + "learning_rate": 9.809558692612131e-06, + "loss": 59.4424, + "step": 175790 + }, + { + "epoch": 0.7102542451629585, + "grad_norm": 662.1224975585938, + "learning_rate": 9.807155941069646e-06, + "loss": 73.49, + "step": 175800 + }, + { + "epoch": 0.7102946464283262, + "grad_norm": 792.9608154296875, + "learning_rate": 9.804753388241903e-06, + "loss": 68.5003, + "step": 175810 + }, + { + "epoch": 0.7103350476936938, + "grad_norm": 315.5412292480469, + "learning_rate": 9.80235103417575e-06, + "loss": 66.612, + "step": 175820 + }, + { + "epoch": 0.7103754489590614, + "grad_norm": 946.0546875, + "learning_rate": 9.799948878917997e-06, + "loss": 80.0671, + "step": 175830 + }, + { + "epoch": 0.710415850224429, + "grad_norm": 1158.7694091796875, + "learning_rate": 9.797546922515509e-06, + "loss": 74.0545, + "step": 175840 + }, + { + "epoch": 0.7104562514897966, + "grad_norm": 1142.8350830078125, + "learning_rate": 9.795145165015088e-06, + "loss": 115.9837, + "step": 175850 + }, + { + "epoch": 0.7104966527551643, + "grad_norm": 1206.6817626953125, + "learning_rate": 9.792743606463565e-06, + "loss": 69.0592, + "step": 175860 + }, + { + "epoch": 0.7105370540205319, + "grad_norm": 1943.599365234375, + "learning_rate": 9.79034224690776e-06, + "loss": 88.132, + "step": 175870 + }, + { + "epoch": 0.7105774552858996, + "grad_norm": 599.8779296875, + "learning_rate": 9.787941086394493e-06, + "loss": 58.2741, + "step": 175880 + }, + { + "epoch": 0.7106178565512672, + "grad_norm": 844.2930908203125, + "learning_rate": 9.785540124970576e-06, + "loss": 52.2268, + "step": 175890 + }, + { + "epoch": 0.7106582578166348, + "grad_norm": 1271.0859375, + "learning_rate": 9.783139362682806e-06, + "loss": 72.0596, + "step": 175900 + }, + { + "epoch": 0.7106986590820025, + "grad_norm": 704.2230224609375, + "learning_rate": 9.780738799577994e-06, + "loss": 42.1076, + "step": 175910 + }, + { + "epoch": 0.7107390603473701, + "grad_norm": 767.5592041015625, + "learning_rate": 9.778338435702941e-06, + "loss": 48.5028, + "step": 175920 + }, + { + "epoch": 0.7107794616127378, + "grad_norm": 816.2061157226562, + "learning_rate": 9.77593827110445e-06, + "loss": 49.9039, + "step": 175930 + }, + { + "epoch": 0.7108198628781054, + "grad_norm": 1022.869873046875, + "learning_rate": 9.77353830582929e-06, + "loss": 79.2638, + "step": 175940 + }, + { + "epoch": 0.7108602641434729, + "grad_norm": 1021.1415405273438, + "learning_rate": 9.771138539924284e-06, + "loss": 78.4217, + "step": 175950 + }, + { + "epoch": 0.7109006654088406, + "grad_norm": 770.7762451171875, + "learning_rate": 9.768738973436186e-06, + "loss": 89.5501, + "step": 175960 + }, + { + "epoch": 0.7109410666742082, + "grad_norm": 563.9398803710938, + "learning_rate": 9.766339606411792e-06, + "loss": 58.3864, + "step": 175970 + }, + { + "epoch": 0.7109814679395758, + "grad_norm": 944.0662231445312, + "learning_rate": 9.763940438897883e-06, + "loss": 75.7847, + "step": 175980 + }, + { + "epoch": 0.7110218692049435, + "grad_norm": 636.219482421875, + "learning_rate": 9.761541470941212e-06, + "loss": 73.0265, + "step": 175990 + }, + { + "epoch": 0.7110622704703111, + "grad_norm": 1496.7889404296875, + "learning_rate": 9.759142702588574e-06, + "loss": 112.7843, + "step": 176000 + }, + { + "epoch": 0.7111026717356788, + "grad_norm": 518.286865234375, + "learning_rate": 9.756744133886713e-06, + "loss": 90.3036, + "step": 176010 + }, + { + "epoch": 0.7111430730010464, + "grad_norm": 1144.7154541015625, + "learning_rate": 9.7543457648824e-06, + "loss": 114.5435, + "step": 176020 + }, + { + "epoch": 0.711183474266414, + "grad_norm": 652.7275390625, + "learning_rate": 9.75194759562239e-06, + "loss": 56.1393, + "step": 176030 + }, + { + "epoch": 0.7112238755317817, + "grad_norm": 771.572021484375, + "learning_rate": 9.74954962615344e-06, + "loss": 70.3897, + "step": 176040 + }, + { + "epoch": 0.7112642767971493, + "grad_norm": 1719.02197265625, + "learning_rate": 9.747151856522303e-06, + "loss": 77.5322, + "step": 176050 + }, + { + "epoch": 0.711304678062517, + "grad_norm": 391.1044006347656, + "learning_rate": 9.744754286775713e-06, + "loss": 89.1653, + "step": 176060 + }, + { + "epoch": 0.7113450793278846, + "grad_norm": 642.1503295898438, + "learning_rate": 9.742356916960418e-06, + "loss": 58.039, + "step": 176070 + }, + { + "epoch": 0.7113854805932521, + "grad_norm": 2125.87841796875, + "learning_rate": 9.739959747123154e-06, + "loss": 75.9643, + "step": 176080 + }, + { + "epoch": 0.7114258818586198, + "grad_norm": 452.7261962890625, + "learning_rate": 9.737562777310664e-06, + "loss": 90.3785, + "step": 176090 + }, + { + "epoch": 0.7114662831239874, + "grad_norm": 665.0387573242188, + "learning_rate": 9.735166007569659e-06, + "loss": 67.8737, + "step": 176100 + }, + { + "epoch": 0.711506684389355, + "grad_norm": 987.0153198242188, + "learning_rate": 9.732769437946892e-06, + "loss": 61.2282, + "step": 176110 + }, + { + "epoch": 0.7115470856547227, + "grad_norm": 374.9240417480469, + "learning_rate": 9.73037306848906e-06, + "loss": 55.0529, + "step": 176120 + }, + { + "epoch": 0.7115874869200903, + "grad_norm": 835.5217895507812, + "learning_rate": 9.727976899242894e-06, + "loss": 94.2665, + "step": 176130 + }, + { + "epoch": 0.711627888185458, + "grad_norm": 575.0413208007812, + "learning_rate": 9.725580930255116e-06, + "loss": 49.7113, + "step": 176140 + }, + { + "epoch": 0.7116682894508256, + "grad_norm": 420.2292175292969, + "learning_rate": 9.72318516157241e-06, + "loss": 42.6012, + "step": 176150 + }, + { + "epoch": 0.7117086907161932, + "grad_norm": 680.0390014648438, + "learning_rate": 9.720789593241517e-06, + "loss": 74.8878, + "step": 176160 + }, + { + "epoch": 0.7117490919815609, + "grad_norm": 733.02197265625, + "learning_rate": 9.718394225309114e-06, + "loss": 68.2192, + "step": 176170 + }, + { + "epoch": 0.7117894932469285, + "grad_norm": 833.8541259765625, + "learning_rate": 9.71599905782191e-06, + "loss": 91.1535, + "step": 176180 + }, + { + "epoch": 0.7118298945122962, + "grad_norm": 159.2884979248047, + "learning_rate": 9.713604090826598e-06, + "loss": 69.5258, + "step": 176190 + }, + { + "epoch": 0.7118702957776638, + "grad_norm": 871.5256958007812, + "learning_rate": 9.71120932436987e-06, + "loss": 63.7782, + "step": 176200 + }, + { + "epoch": 0.7119106970430313, + "grad_norm": 887.8225708007812, + "learning_rate": 9.708814758498422e-06, + "loss": 55.5052, + "step": 176210 + }, + { + "epoch": 0.711951098308399, + "grad_norm": 659.6270141601562, + "learning_rate": 9.706420393258919e-06, + "loss": 68.254, + "step": 176220 + }, + { + "epoch": 0.7119914995737666, + "grad_norm": 531.64111328125, + "learning_rate": 9.704026228698052e-06, + "loss": 59.423, + "step": 176230 + }, + { + "epoch": 0.7120319008391343, + "grad_norm": 747.3566284179688, + "learning_rate": 9.701632264862493e-06, + "loss": 52.5738, + "step": 176240 + }, + { + "epoch": 0.7120723021045019, + "grad_norm": 1047.845458984375, + "learning_rate": 9.699238501798916e-06, + "loss": 92.2433, + "step": 176250 + }, + { + "epoch": 0.7121127033698695, + "grad_norm": 468.23309326171875, + "learning_rate": 9.696844939553987e-06, + "loss": 50.6691, + "step": 176260 + }, + { + "epoch": 0.7121531046352372, + "grad_norm": 709.6765747070312, + "learning_rate": 9.694451578174377e-06, + "loss": 79.9856, + "step": 176270 + }, + { + "epoch": 0.7121935059006048, + "grad_norm": 1746.3692626953125, + "learning_rate": 9.692058417706727e-06, + "loss": 117.6605, + "step": 176280 + }, + { + "epoch": 0.7122339071659725, + "grad_norm": 430.3566589355469, + "learning_rate": 9.689665458197717e-06, + "loss": 52.0436, + "step": 176290 + }, + { + "epoch": 0.7122743084313401, + "grad_norm": 458.0624084472656, + "learning_rate": 9.687272699693981e-06, + "loss": 66.547, + "step": 176300 + }, + { + "epoch": 0.7123147096967077, + "grad_norm": 286.7371520996094, + "learning_rate": 9.684880142242173e-06, + "loss": 101.0307, + "step": 176310 + }, + { + "epoch": 0.7123551109620754, + "grad_norm": 712.3006591796875, + "learning_rate": 9.682487785888943e-06, + "loss": 42.2338, + "step": 176320 + }, + { + "epoch": 0.712395512227443, + "grad_norm": 1098.064208984375, + "learning_rate": 9.680095630680912e-06, + "loss": 68.6039, + "step": 176330 + }, + { + "epoch": 0.7124359134928105, + "grad_norm": 513.7142944335938, + "learning_rate": 9.677703676664745e-06, + "loss": 63.058, + "step": 176340 + }, + { + "epoch": 0.7124763147581782, + "grad_norm": 779.77294921875, + "learning_rate": 9.675311923887052e-06, + "loss": 87.8023, + "step": 176350 + }, + { + "epoch": 0.7125167160235458, + "grad_norm": 763.1795043945312, + "learning_rate": 9.672920372394467e-06, + "loss": 75.7001, + "step": 176360 + }, + { + "epoch": 0.7125571172889135, + "grad_norm": 268.31890869140625, + "learning_rate": 9.670529022233616e-06, + "loss": 62.708, + "step": 176370 + }, + { + "epoch": 0.7125975185542811, + "grad_norm": 961.3299560546875, + "learning_rate": 9.668137873451128e-06, + "loss": 88.7879, + "step": 176380 + }, + { + "epoch": 0.7126379198196487, + "grad_norm": 488.5883483886719, + "learning_rate": 9.665746926093604e-06, + "loss": 56.3685, + "step": 176390 + }, + { + "epoch": 0.7126783210850164, + "grad_norm": 370.4557189941406, + "learning_rate": 9.663356180207663e-06, + "loss": 62.8707, + "step": 176400 + }, + { + "epoch": 0.712718722350384, + "grad_norm": 232.86314392089844, + "learning_rate": 9.660965635839919e-06, + "loss": 58.461, + "step": 176410 + }, + { + "epoch": 0.7127591236157517, + "grad_norm": 615.363525390625, + "learning_rate": 9.65857529303697e-06, + "loss": 52.3581, + "step": 176420 + }, + { + "epoch": 0.7127995248811193, + "grad_norm": 829.3931274414062, + "learning_rate": 9.656185151845424e-06, + "loss": 57.2616, + "step": 176430 + }, + { + "epoch": 0.712839926146487, + "grad_norm": 320.6863708496094, + "learning_rate": 9.653795212311863e-06, + "loss": 54.2649, + "step": 176440 + }, + { + "epoch": 0.7128803274118546, + "grad_norm": 438.32830810546875, + "learning_rate": 9.651405474482904e-06, + "loss": 99.7178, + "step": 176450 + }, + { + "epoch": 0.7129207286772221, + "grad_norm": 1191.050537109375, + "learning_rate": 9.649015938405117e-06, + "loss": 76.5189, + "step": 176460 + }, + { + "epoch": 0.7129611299425898, + "grad_norm": 1232.09130859375, + "learning_rate": 9.646626604125094e-06, + "loss": 98.1632, + "step": 176470 + }, + { + "epoch": 0.7130015312079574, + "grad_norm": 621.3639526367188, + "learning_rate": 9.644237471689419e-06, + "loss": 78.7869, + "step": 176480 + }, + { + "epoch": 0.713041932473325, + "grad_norm": 1121.5333251953125, + "learning_rate": 9.641848541144653e-06, + "loss": 69.8053, + "step": 176490 + }, + { + "epoch": 0.7130823337386927, + "grad_norm": 615.60205078125, + "learning_rate": 9.6394598125374e-06, + "loss": 83.458, + "step": 176500 + }, + { + "epoch": 0.7131227350040603, + "grad_norm": 784.4923706054688, + "learning_rate": 9.637071285914203e-06, + "loss": 47.7706, + "step": 176510 + }, + { + "epoch": 0.713163136269428, + "grad_norm": 886.1889038085938, + "learning_rate": 9.634682961321636e-06, + "loss": 105.9259, + "step": 176520 + }, + { + "epoch": 0.7132035375347956, + "grad_norm": 793.4755249023438, + "learning_rate": 9.63229483880626e-06, + "loss": 112.2589, + "step": 176530 + }, + { + "epoch": 0.7132439388001632, + "grad_norm": 330.9516906738281, + "learning_rate": 9.629906918414644e-06, + "loss": 47.7534, + "step": 176540 + }, + { + "epoch": 0.7132843400655309, + "grad_norm": 845.9786376953125, + "learning_rate": 9.627519200193322e-06, + "loss": 75.5034, + "step": 176550 + }, + { + "epoch": 0.7133247413308985, + "grad_norm": 710.198974609375, + "learning_rate": 9.625131684188852e-06, + "loss": 104.4863, + "step": 176560 + }, + { + "epoch": 0.7133651425962662, + "grad_norm": 950.7787475585938, + "learning_rate": 9.622744370447785e-06, + "loss": 52.0737, + "step": 176570 + }, + { + "epoch": 0.7134055438616338, + "grad_norm": 1473.84521484375, + "learning_rate": 9.620357259016657e-06, + "loss": 93.012, + "step": 176580 + }, + { + "epoch": 0.7134459451270013, + "grad_norm": 415.7459411621094, + "learning_rate": 9.617970349942014e-06, + "loss": 56.4533, + "step": 176590 + }, + { + "epoch": 0.713486346392369, + "grad_norm": 450.0052795410156, + "learning_rate": 9.615583643270371e-06, + "loss": 67.2616, + "step": 176600 + }, + { + "epoch": 0.7135267476577366, + "grad_norm": 847.8720703125, + "learning_rate": 9.613197139048285e-06, + "loss": 62.4612, + "step": 176610 + }, + { + "epoch": 0.7135671489231042, + "grad_norm": 398.41455078125, + "learning_rate": 9.610810837322262e-06, + "loss": 67.5338, + "step": 176620 + }, + { + "epoch": 0.7136075501884719, + "grad_norm": 478.386962890625, + "learning_rate": 9.60842473813883e-06, + "loss": 82.881, + "step": 176630 + }, + { + "epoch": 0.7136479514538395, + "grad_norm": 531.3264770507812, + "learning_rate": 9.606038841544507e-06, + "loss": 67.0893, + "step": 176640 + }, + { + "epoch": 0.7136883527192072, + "grad_norm": 495.50469970703125, + "learning_rate": 9.60365314758581e-06, + "loss": 59.8076, + "step": 176650 + }, + { + "epoch": 0.7137287539845748, + "grad_norm": 758.1607055664062, + "learning_rate": 9.601267656309253e-06, + "loss": 71.1561, + "step": 176660 + }, + { + "epoch": 0.7137691552499424, + "grad_norm": 543.3732299804688, + "learning_rate": 9.598882367761329e-06, + "loss": 64.4097, + "step": 176670 + }, + { + "epoch": 0.7138095565153101, + "grad_norm": 450.6043395996094, + "learning_rate": 9.596497281988548e-06, + "loss": 71.1413, + "step": 176680 + }, + { + "epoch": 0.7138499577806777, + "grad_norm": 259.1203308105469, + "learning_rate": 9.59411239903741e-06, + "loss": 51.4825, + "step": 176690 + }, + { + "epoch": 0.7138903590460454, + "grad_norm": 742.5173950195312, + "learning_rate": 9.591727718954414e-06, + "loss": 67.5317, + "step": 176700 + }, + { + "epoch": 0.713930760311413, + "grad_norm": 1268.1644287109375, + "learning_rate": 9.589343241786032e-06, + "loss": 72.045, + "step": 176710 + }, + { + "epoch": 0.7139711615767805, + "grad_norm": 463.9710693359375, + "learning_rate": 9.586958967578775e-06, + "loss": 122.2048, + "step": 176720 + }, + { + "epoch": 0.7140115628421482, + "grad_norm": 305.2679443359375, + "learning_rate": 9.584574896379108e-06, + "loss": 38.8264, + "step": 176730 + }, + { + "epoch": 0.7140519641075158, + "grad_norm": 481.50140380859375, + "learning_rate": 9.582191028233516e-06, + "loss": 64.4636, + "step": 176740 + }, + { + "epoch": 0.7140923653728835, + "grad_norm": 766.5243530273438, + "learning_rate": 9.57980736318848e-06, + "loss": 66.4596, + "step": 176750 + }, + { + "epoch": 0.7141327666382511, + "grad_norm": 719.5803833007812, + "learning_rate": 9.57742390129045e-06, + "loss": 139.1673, + "step": 176760 + }, + { + "epoch": 0.7141731679036187, + "grad_norm": 994.8701171875, + "learning_rate": 9.575040642585923e-06, + "loss": 83.2412, + "step": 176770 + }, + { + "epoch": 0.7142135691689864, + "grad_norm": 780.5865478515625, + "learning_rate": 9.572657587121328e-06, + "loss": 71.8217, + "step": 176780 + }, + { + "epoch": 0.714253970434354, + "grad_norm": 1006.6753540039062, + "learning_rate": 9.570274734943159e-06, + "loss": 46.4225, + "step": 176790 + }, + { + "epoch": 0.7142943716997217, + "grad_norm": 505.00726318359375, + "learning_rate": 9.567892086097845e-06, + "loss": 49.6179, + "step": 176800 + }, + { + "epoch": 0.7143347729650893, + "grad_norm": 840.145751953125, + "learning_rate": 9.565509640631845e-06, + "loss": 64.1118, + "step": 176810 + }, + { + "epoch": 0.7143751742304569, + "grad_norm": 482.8485412597656, + "learning_rate": 9.563127398591617e-06, + "loss": 61.8544, + "step": 176820 + }, + { + "epoch": 0.7144155754958246, + "grad_norm": 1111.79296875, + "learning_rate": 9.560745360023581e-06, + "loss": 49.6911, + "step": 176830 + }, + { + "epoch": 0.7144559767611922, + "grad_norm": 681.2439575195312, + "learning_rate": 9.558363524974192e-06, + "loss": 50.3085, + "step": 176840 + }, + { + "epoch": 0.7144963780265597, + "grad_norm": 557.5352783203125, + "learning_rate": 9.555981893489883e-06, + "loss": 70.7203, + "step": 176850 + }, + { + "epoch": 0.7145367792919274, + "grad_norm": 866.8016357421875, + "learning_rate": 9.553600465617084e-06, + "loss": 97.6749, + "step": 176860 + }, + { + "epoch": 0.714577180557295, + "grad_norm": 659.822998046875, + "learning_rate": 9.55121924140222e-06, + "loss": 76.599, + "step": 176870 + }, + { + "epoch": 0.7146175818226627, + "grad_norm": 559.7158203125, + "learning_rate": 9.548838220891723e-06, + "loss": 87.7138, + "step": 176880 + }, + { + "epoch": 0.7146579830880303, + "grad_norm": 368.3558044433594, + "learning_rate": 9.546457404131999e-06, + "loss": 55.873, + "step": 176890 + }, + { + "epoch": 0.7146983843533979, + "grad_norm": 584.16845703125, + "learning_rate": 9.54407679116947e-06, + "loss": 76.1475, + "step": 176900 + }, + { + "epoch": 0.7147387856187656, + "grad_norm": 776.585205078125, + "learning_rate": 9.541696382050548e-06, + "loss": 65.4531, + "step": 176910 + }, + { + "epoch": 0.7147791868841332, + "grad_norm": 805.8932495117188, + "learning_rate": 9.539316176821639e-06, + "loss": 70.549, + "step": 176920 + }, + { + "epoch": 0.7148195881495009, + "grad_norm": 763.5477905273438, + "learning_rate": 9.536936175529152e-06, + "loss": 43.3509, + "step": 176930 + }, + { + "epoch": 0.7148599894148685, + "grad_norm": 877.72705078125, + "learning_rate": 9.534556378219469e-06, + "loss": 68.3388, + "step": 176940 + }, + { + "epoch": 0.7149003906802361, + "grad_norm": 774.36376953125, + "learning_rate": 9.532176784939011e-06, + "loss": 70.0248, + "step": 176950 + }, + { + "epoch": 0.7149407919456038, + "grad_norm": 865.5239868164062, + "learning_rate": 9.529797395734149e-06, + "loss": 65.01, + "step": 176960 + }, + { + "epoch": 0.7149811932109714, + "grad_norm": 550.9984130859375, + "learning_rate": 9.527418210651275e-06, + "loss": 69.5419, + "step": 176970 + }, + { + "epoch": 0.715021594476339, + "grad_norm": 527.6162719726562, + "learning_rate": 9.525039229736777e-06, + "loss": 43.7608, + "step": 176980 + }, + { + "epoch": 0.7150619957417066, + "grad_norm": 420.148193359375, + "learning_rate": 9.522660453037039e-06, + "loss": 79.8898, + "step": 176990 + }, + { + "epoch": 0.7151023970070742, + "grad_norm": 3334.756103515625, + "learning_rate": 9.52028188059842e-06, + "loss": 59.2708, + "step": 177000 + }, + { + "epoch": 0.7151427982724419, + "grad_norm": 810.755859375, + "learning_rate": 9.517903512467304e-06, + "loss": 72.368, + "step": 177010 + }, + { + "epoch": 0.7151831995378095, + "grad_norm": 203.78079223632812, + "learning_rate": 9.515525348690053e-06, + "loss": 62.0946, + "step": 177020 + }, + { + "epoch": 0.7152236008031772, + "grad_norm": 162.84886169433594, + "learning_rate": 9.513147389313035e-06, + "loss": 81.2641, + "step": 177030 + }, + { + "epoch": 0.7152640020685448, + "grad_norm": 520.66650390625, + "learning_rate": 9.510769634382614e-06, + "loss": 60.3478, + "step": 177040 + }, + { + "epoch": 0.7153044033339124, + "grad_norm": 189.00433349609375, + "learning_rate": 9.508392083945126e-06, + "loss": 80.7681, + "step": 177050 + }, + { + "epoch": 0.7153448045992801, + "grad_norm": 1078.2235107421875, + "learning_rate": 9.50601473804695e-06, + "loss": 75.7939, + "step": 177060 + }, + { + "epoch": 0.7153852058646477, + "grad_norm": 1449.465087890625, + "learning_rate": 9.503637596734413e-06, + "loss": 56.5837, + "step": 177070 + }, + { + "epoch": 0.7154256071300154, + "grad_norm": 936.8643798828125, + "learning_rate": 9.501260660053862e-06, + "loss": 66.2122, + "step": 177080 + }, + { + "epoch": 0.715466008395383, + "grad_norm": 1931.105224609375, + "learning_rate": 9.498883928051647e-06, + "loss": 56.3298, + "step": 177090 + }, + { + "epoch": 0.7155064096607505, + "grad_norm": 1192.1268310546875, + "learning_rate": 9.496507400774085e-06, + "loss": 115.7615, + "step": 177100 + }, + { + "epoch": 0.7155468109261182, + "grad_norm": 177.53179931640625, + "learning_rate": 9.49413107826753e-06, + "loss": 37.8553, + "step": 177110 + }, + { + "epoch": 0.7155872121914858, + "grad_norm": 654.974853515625, + "learning_rate": 9.491754960578292e-06, + "loss": 63.6557, + "step": 177120 + }, + { + "epoch": 0.7156276134568534, + "grad_norm": 1014.482666015625, + "learning_rate": 9.489379047752702e-06, + "loss": 72.4637, + "step": 177130 + }, + { + "epoch": 0.7156680147222211, + "grad_norm": 1023.412841796875, + "learning_rate": 9.487003339837078e-06, + "loss": 70.5193, + "step": 177140 + }, + { + "epoch": 0.7157084159875887, + "grad_norm": 1256.193603515625, + "learning_rate": 9.484627836877745e-06, + "loss": 78.9554, + "step": 177150 + }, + { + "epoch": 0.7157488172529564, + "grad_norm": 497.5287780761719, + "learning_rate": 9.482252538920997e-06, + "loss": 49.1657, + "step": 177160 + }, + { + "epoch": 0.715789218518324, + "grad_norm": 1523.8216552734375, + "learning_rate": 9.479877446013151e-06, + "loss": 45.6081, + "step": 177170 + }, + { + "epoch": 0.7158296197836916, + "grad_norm": 972.072998046875, + "learning_rate": 9.477502558200508e-06, + "loss": 62.8283, + "step": 177180 + }, + { + "epoch": 0.7158700210490593, + "grad_norm": 1244.7247314453125, + "learning_rate": 9.475127875529372e-06, + "loss": 103.2778, + "step": 177190 + }, + { + "epoch": 0.7159104223144269, + "grad_norm": 974.709228515625, + "learning_rate": 9.472753398046045e-06, + "loss": 67.7894, + "step": 177200 + }, + { + "epoch": 0.7159508235797946, + "grad_norm": 1007.3297119140625, + "learning_rate": 9.470379125796796e-06, + "loss": 54.3299, + "step": 177210 + }, + { + "epoch": 0.7159912248451622, + "grad_norm": 490.16845703125, + "learning_rate": 9.468005058827941e-06, + "loss": 47.5834, + "step": 177220 + }, + { + "epoch": 0.7160316261105297, + "grad_norm": 516.9462280273438, + "learning_rate": 9.465631197185743e-06, + "loss": 58.8762, + "step": 177230 + }, + { + "epoch": 0.7160720273758974, + "grad_norm": 960.2066650390625, + "learning_rate": 9.463257540916486e-06, + "loss": 55.8481, + "step": 177240 + }, + { + "epoch": 0.716112428641265, + "grad_norm": 678.4588012695312, + "learning_rate": 9.460884090066449e-06, + "loss": 55.4485, + "step": 177250 + }, + { + "epoch": 0.7161528299066326, + "grad_norm": 2369.359375, + "learning_rate": 9.458510844681902e-06, + "loss": 79.356, + "step": 177260 + }, + { + "epoch": 0.7161932311720003, + "grad_norm": 621.841796875, + "learning_rate": 9.456137804809123e-06, + "loss": 47.6917, + "step": 177270 + }, + { + "epoch": 0.7162336324373679, + "grad_norm": 604.0316772460938, + "learning_rate": 9.453764970494354e-06, + "loss": 62.544, + "step": 177280 + }, + { + "epoch": 0.7162740337027356, + "grad_norm": 624.397705078125, + "learning_rate": 9.451392341783871e-06, + "loss": 63.1988, + "step": 177290 + }, + { + "epoch": 0.7163144349681032, + "grad_norm": 379.3200378417969, + "learning_rate": 9.449019918723923e-06, + "loss": 75.6695, + "step": 177300 + }, + { + "epoch": 0.7163548362334708, + "grad_norm": 601.7987060546875, + "learning_rate": 9.446647701360765e-06, + "loss": 39.5436, + "step": 177310 + }, + { + "epoch": 0.7163952374988385, + "grad_norm": 553.4331665039062, + "learning_rate": 9.44427568974064e-06, + "loss": 52.8883, + "step": 177320 + }, + { + "epoch": 0.7164356387642061, + "grad_norm": 1003.4904174804688, + "learning_rate": 9.441903883909806e-06, + "loss": 78.1867, + "step": 177330 + }, + { + "epoch": 0.7164760400295738, + "grad_norm": 635.7399291992188, + "learning_rate": 9.439532283914483e-06, + "loss": 55.5031, + "step": 177340 + }, + { + "epoch": 0.7165164412949414, + "grad_norm": 1143.7169189453125, + "learning_rate": 9.437160889800914e-06, + "loss": 56.7723, + "step": 177350 + }, + { + "epoch": 0.7165568425603089, + "grad_norm": 1099.380615234375, + "learning_rate": 9.434789701615339e-06, + "loss": 70.1295, + "step": 177360 + }, + { + "epoch": 0.7165972438256766, + "grad_norm": 756.1007690429688, + "learning_rate": 9.432418719403965e-06, + "loss": 67.4151, + "step": 177370 + }, + { + "epoch": 0.7166376450910442, + "grad_norm": 293.9726257324219, + "learning_rate": 9.430047943213042e-06, + "loss": 76.3054, + "step": 177380 + }, + { + "epoch": 0.7166780463564119, + "grad_norm": 1950.1439208984375, + "learning_rate": 9.427677373088762e-06, + "loss": 76.4267, + "step": 177390 + }, + { + "epoch": 0.7167184476217795, + "grad_norm": 1118.2529296875, + "learning_rate": 9.425307009077368e-06, + "loss": 70.9934, + "step": 177400 + }, + { + "epoch": 0.7167588488871471, + "grad_norm": 475.9242858886719, + "learning_rate": 9.422936851225052e-06, + "loss": 67.1251, + "step": 177410 + }, + { + "epoch": 0.7167992501525148, + "grad_norm": 689.40478515625, + "learning_rate": 9.420566899578028e-06, + "loss": 91.8602, + "step": 177420 + }, + { + "epoch": 0.7168396514178824, + "grad_norm": 450.0085754394531, + "learning_rate": 9.418197154182505e-06, + "loss": 88.419, + "step": 177430 + }, + { + "epoch": 0.7168800526832501, + "grad_norm": 987.5018920898438, + "learning_rate": 9.41582761508467e-06, + "loss": 67.8902, + "step": 177440 + }, + { + "epoch": 0.7169204539486177, + "grad_norm": 1010.2935791015625, + "learning_rate": 9.413458282330724e-06, + "loss": 57.6901, + "step": 177450 + }, + { + "epoch": 0.7169608552139853, + "grad_norm": 859.7064208984375, + "learning_rate": 9.411089155966859e-06, + "loss": 61.4552, + "step": 177460 + }, + { + "epoch": 0.717001256479353, + "grad_norm": 270.70001220703125, + "learning_rate": 9.408720236039261e-06, + "loss": 44.2164, + "step": 177470 + }, + { + "epoch": 0.7170416577447206, + "grad_norm": 356.26409912109375, + "learning_rate": 9.406351522594117e-06, + "loss": 84.8645, + "step": 177480 + }, + { + "epoch": 0.7170820590100881, + "grad_norm": 328.6282653808594, + "learning_rate": 9.403983015677611e-06, + "loss": 64.7361, + "step": 177490 + }, + { + "epoch": 0.7171224602754558, + "grad_norm": 614.06787109375, + "learning_rate": 9.401614715335905e-06, + "loss": 66.4943, + "step": 177500 + }, + { + "epoch": 0.7171628615408234, + "grad_norm": 771.6884765625, + "learning_rate": 9.399246621615175e-06, + "loss": 81.3253, + "step": 177510 + }, + { + "epoch": 0.7172032628061911, + "grad_norm": 741.4154052734375, + "learning_rate": 9.39687873456159e-06, + "loss": 48.778, + "step": 177520 + }, + { + "epoch": 0.7172436640715587, + "grad_norm": 669.9552001953125, + "learning_rate": 9.394511054221313e-06, + "loss": 90.2534, + "step": 177530 + }, + { + "epoch": 0.7172840653369263, + "grad_norm": 440.0824279785156, + "learning_rate": 9.392143580640511e-06, + "loss": 46.1884, + "step": 177540 + }, + { + "epoch": 0.717324466602294, + "grad_norm": 1088.93115234375, + "learning_rate": 9.389776313865315e-06, + "loss": 63.4121, + "step": 177550 + }, + { + "epoch": 0.7173648678676616, + "grad_norm": 1121.9910888671875, + "learning_rate": 9.387409253941908e-06, + "loss": 96.8904, + "step": 177560 + }, + { + "epoch": 0.7174052691330293, + "grad_norm": 542.5570068359375, + "learning_rate": 9.385042400916416e-06, + "loss": 62.2497, + "step": 177570 + }, + { + "epoch": 0.7174456703983969, + "grad_norm": 722.9684448242188, + "learning_rate": 9.382675754834984e-06, + "loss": 71.5256, + "step": 177580 + }, + { + "epoch": 0.7174860716637645, + "grad_norm": 4675.45703125, + "learning_rate": 9.380309315743756e-06, + "loss": 55.1475, + "step": 177590 + }, + { + "epoch": 0.7175264729291322, + "grad_norm": 959.8240356445312, + "learning_rate": 9.377943083688873e-06, + "loss": 59.2894, + "step": 177600 + }, + { + "epoch": 0.7175668741944998, + "grad_norm": 531.5782470703125, + "learning_rate": 9.37557705871645e-06, + "loss": 108.1883, + "step": 177610 + }, + { + "epoch": 0.7176072754598674, + "grad_norm": 381.9957580566406, + "learning_rate": 9.373211240872621e-06, + "loss": 88.7133, + "step": 177620 + }, + { + "epoch": 0.717647676725235, + "grad_norm": 711.1021728515625, + "learning_rate": 9.37084563020351e-06, + "loss": 78.1624, + "step": 177630 + }, + { + "epoch": 0.7176880779906026, + "grad_norm": 901.7408447265625, + "learning_rate": 9.368480226755239e-06, + "loss": 54.5059, + "step": 177640 + }, + { + "epoch": 0.7177284792559703, + "grad_norm": 1068.1778564453125, + "learning_rate": 9.366115030573923e-06, + "loss": 71.6056, + "step": 177650 + }, + { + "epoch": 0.7177688805213379, + "grad_norm": 711.0968627929688, + "learning_rate": 9.363750041705658e-06, + "loss": 123.3143, + "step": 177660 + }, + { + "epoch": 0.7178092817867056, + "grad_norm": 844.0927734375, + "learning_rate": 9.361385260196574e-06, + "loss": 65.5605, + "step": 177670 + }, + { + "epoch": 0.7178496830520732, + "grad_norm": 1024.0242919921875, + "learning_rate": 9.359020686092755e-06, + "loss": 69.184, + "step": 177680 + }, + { + "epoch": 0.7178900843174408, + "grad_norm": 854.4601440429688, + "learning_rate": 9.356656319440305e-06, + "loss": 74.0853, + "step": 177690 + }, + { + "epoch": 0.7179304855828085, + "grad_norm": 872.7996215820312, + "learning_rate": 9.354292160285328e-06, + "loss": 75.2658, + "step": 177700 + }, + { + "epoch": 0.7179708868481761, + "grad_norm": 285.60888671875, + "learning_rate": 9.351928208673891e-06, + "loss": 96.4457, + "step": 177710 + }, + { + "epoch": 0.7180112881135438, + "grad_norm": 1147.8115234375, + "learning_rate": 9.34956446465211e-06, + "loss": 66.6468, + "step": 177720 + }, + { + "epoch": 0.7180516893789114, + "grad_norm": 462.4715270996094, + "learning_rate": 9.347200928266047e-06, + "loss": 73.0679, + "step": 177730 + }, + { + "epoch": 0.7180920906442789, + "grad_norm": 1233.6014404296875, + "learning_rate": 9.344837599561784e-06, + "loss": 88.5246, + "step": 177740 + }, + { + "epoch": 0.7181324919096466, + "grad_norm": 586.175048828125, + "learning_rate": 9.342474478585399e-06, + "loss": 61.1885, + "step": 177750 + }, + { + "epoch": 0.7181728931750142, + "grad_norm": 428.0393981933594, + "learning_rate": 9.340111565382961e-06, + "loss": 55.9852, + "step": 177760 + }, + { + "epoch": 0.7182132944403818, + "grad_norm": 891.8316650390625, + "learning_rate": 9.337748860000544e-06, + "loss": 67.5769, + "step": 177770 + }, + { + "epoch": 0.7182536957057495, + "grad_norm": 891.379150390625, + "learning_rate": 9.335386362484196e-06, + "loss": 70.9174, + "step": 177780 + }, + { + "epoch": 0.7182940969711171, + "grad_norm": 690.211181640625, + "learning_rate": 9.33302407287998e-06, + "loss": 59.6796, + "step": 177790 + }, + { + "epoch": 0.7183344982364848, + "grad_norm": 382.752197265625, + "learning_rate": 9.330661991233949e-06, + "loss": 82.2801, + "step": 177800 + }, + { + "epoch": 0.7183748995018524, + "grad_norm": 514.2881469726562, + "learning_rate": 9.328300117592166e-06, + "loss": 66.6352, + "step": 177810 + }, + { + "epoch": 0.71841530076722, + "grad_norm": 463.48388671875, + "learning_rate": 9.325938452000649e-06, + "loss": 63.497, + "step": 177820 + }, + { + "epoch": 0.7184557020325877, + "grad_norm": 555.4263305664062, + "learning_rate": 9.323576994505474e-06, + "loss": 58.4557, + "step": 177830 + }, + { + "epoch": 0.7184961032979553, + "grad_norm": 687.364013671875, + "learning_rate": 9.321215745152654e-06, + "loss": 54.3189, + "step": 177840 + }, + { + "epoch": 0.718536504563323, + "grad_norm": 1168.851806640625, + "learning_rate": 9.318854703988231e-06, + "loss": 75.1864, + "step": 177850 + }, + { + "epoch": 0.7185769058286906, + "grad_norm": 381.9225158691406, + "learning_rate": 9.316493871058236e-06, + "loss": 58.6526, + "step": 177860 + }, + { + "epoch": 0.7186173070940581, + "grad_norm": 812.256103515625, + "learning_rate": 9.314133246408693e-06, + "loss": 98.4532, + "step": 177870 + }, + { + "epoch": 0.7186577083594258, + "grad_norm": 312.1396179199219, + "learning_rate": 9.311772830085629e-06, + "loss": 60.6979, + "step": 177880 + }, + { + "epoch": 0.7186981096247934, + "grad_norm": 1056.2342529296875, + "learning_rate": 9.30941262213505e-06, + "loss": 71.2385, + "step": 177890 + }, + { + "epoch": 0.718738510890161, + "grad_norm": 499.1510009765625, + "learning_rate": 9.307052622602977e-06, + "loss": 81.9869, + "step": 177900 + }, + { + "epoch": 0.7187789121555287, + "grad_norm": 706.7220458984375, + "learning_rate": 9.304692831535416e-06, + "loss": 77.5879, + "step": 177910 + }, + { + "epoch": 0.7188193134208963, + "grad_norm": 996.89697265625, + "learning_rate": 9.302333248978375e-06, + "loss": 68.8067, + "step": 177920 + }, + { + "epoch": 0.718859714686264, + "grad_norm": 942.6900634765625, + "learning_rate": 9.299973874977857e-06, + "loss": 71.9032, + "step": 177930 + }, + { + "epoch": 0.7189001159516316, + "grad_norm": 731.5128173828125, + "learning_rate": 9.297614709579863e-06, + "loss": 81.2315, + "step": 177940 + }, + { + "epoch": 0.7189405172169993, + "grad_norm": 727.8933715820312, + "learning_rate": 9.295255752830372e-06, + "loss": 45.8408, + "step": 177950 + }, + { + "epoch": 0.7189809184823669, + "grad_norm": 940.5167236328125, + "learning_rate": 9.292897004775381e-06, + "loss": 83.8417, + "step": 177960 + }, + { + "epoch": 0.7190213197477345, + "grad_norm": 602.7026977539062, + "learning_rate": 9.290538465460885e-06, + "loss": 115.9614, + "step": 177970 + }, + { + "epoch": 0.7190617210131022, + "grad_norm": 315.2239074707031, + "learning_rate": 9.288180134932839e-06, + "loss": 63.1777, + "step": 177980 + }, + { + "epoch": 0.7191021222784698, + "grad_norm": 538.0882568359375, + "learning_rate": 9.285822013237252e-06, + "loss": 54.0047, + "step": 177990 + }, + { + "epoch": 0.7191425235438373, + "grad_norm": 415.70556640625, + "learning_rate": 9.283464100420064e-06, + "loss": 66.3718, + "step": 178000 + }, + { + "epoch": 0.719182924809205, + "grad_norm": 873.6517333984375, + "learning_rate": 9.281106396527278e-06, + "loss": 74.3739, + "step": 178010 + }, + { + "epoch": 0.7192233260745726, + "grad_norm": 2013.052001953125, + "learning_rate": 9.27874890160483e-06, + "loss": 85.5577, + "step": 178020 + }, + { + "epoch": 0.7192637273399403, + "grad_norm": 769.38330078125, + "learning_rate": 9.276391615698695e-06, + "loss": 81.862, + "step": 178030 + }, + { + "epoch": 0.7193041286053079, + "grad_norm": 729.7666015625, + "learning_rate": 9.274034538854832e-06, + "loss": 70.6524, + "step": 178040 + }, + { + "epoch": 0.7193445298706755, + "grad_norm": 593.6414794921875, + "learning_rate": 9.271677671119181e-06, + "loss": 66.2894, + "step": 178050 + }, + { + "epoch": 0.7193849311360432, + "grad_norm": 978.9708862304688, + "learning_rate": 9.269321012537697e-06, + "loss": 73.0464, + "step": 178060 + }, + { + "epoch": 0.7194253324014108, + "grad_norm": 679.9098510742188, + "learning_rate": 9.266964563156326e-06, + "loss": 81.2963, + "step": 178070 + }, + { + "epoch": 0.7194657336667785, + "grad_norm": 664.0359497070312, + "learning_rate": 9.264608323021005e-06, + "loss": 61.6717, + "step": 178080 + }, + { + "epoch": 0.7195061349321461, + "grad_norm": 458.09075927734375, + "learning_rate": 9.262252292177674e-06, + "loss": 52.9407, + "step": 178090 + }, + { + "epoch": 0.7195465361975137, + "grad_norm": 421.01849365234375, + "learning_rate": 9.259896470672267e-06, + "loss": 37.0598, + "step": 178100 + }, + { + "epoch": 0.7195869374628814, + "grad_norm": 679.7785034179688, + "learning_rate": 9.257540858550702e-06, + "loss": 117.833, + "step": 178110 + }, + { + "epoch": 0.719627338728249, + "grad_norm": 914.44140625, + "learning_rate": 9.255185455858909e-06, + "loss": 82.1931, + "step": 178120 + }, + { + "epoch": 0.7196677399936166, + "grad_norm": 845.9158935546875, + "learning_rate": 9.252830262642814e-06, + "loss": 57.0969, + "step": 178130 + }, + { + "epoch": 0.7197081412589842, + "grad_norm": 708.2632446289062, + "learning_rate": 9.25047527894831e-06, + "loss": 73.9386, + "step": 178140 + }, + { + "epoch": 0.7197485425243518, + "grad_norm": 736.4959106445312, + "learning_rate": 9.24812050482134e-06, + "loss": 67.6326, + "step": 178150 + }, + { + "epoch": 0.7197889437897195, + "grad_norm": 350.1199645996094, + "learning_rate": 9.245765940307783e-06, + "loss": 78.5523, + "step": 178160 + }, + { + "epoch": 0.7198293450550871, + "grad_norm": 610.5077514648438, + "learning_rate": 9.243411585453568e-06, + "loss": 73.9334, + "step": 178170 + }, + { + "epoch": 0.7198697463204548, + "grad_norm": 1373.80908203125, + "learning_rate": 9.241057440304578e-06, + "loss": 112.4053, + "step": 178180 + }, + { + "epoch": 0.7199101475858224, + "grad_norm": 567.7282104492188, + "learning_rate": 9.238703504906707e-06, + "loss": 43.2479, + "step": 178190 + }, + { + "epoch": 0.71995054885119, + "grad_norm": 347.75885009765625, + "learning_rate": 9.236349779305855e-06, + "loss": 79.9898, + "step": 178200 + }, + { + "epoch": 0.7199909501165577, + "grad_norm": 589.7095336914062, + "learning_rate": 9.233996263547912e-06, + "loss": 85.1535, + "step": 178210 + }, + { + "epoch": 0.7200313513819253, + "grad_norm": 667.8616943359375, + "learning_rate": 9.23164295767875e-06, + "loss": 37.4834, + "step": 178220 + }, + { + "epoch": 0.720071752647293, + "grad_norm": 1188.6444091796875, + "learning_rate": 9.22928986174425e-06, + "loss": 54.4332, + "step": 178230 + }, + { + "epoch": 0.7201121539126606, + "grad_norm": 1031.96337890625, + "learning_rate": 9.226936975790288e-06, + "loss": 70.0238, + "step": 178240 + }, + { + "epoch": 0.7201525551780282, + "grad_norm": 406.4094543457031, + "learning_rate": 9.224584299862737e-06, + "loss": 54.717, + "step": 178250 + }, + { + "epoch": 0.7201929564433958, + "grad_norm": 739.1002197265625, + "learning_rate": 9.222231834007474e-06, + "loss": 91.0845, + "step": 178260 + }, + { + "epoch": 0.7202333577087634, + "grad_norm": 1301.359619140625, + "learning_rate": 9.21987957827033e-06, + "loss": 53.4465, + "step": 178270 + }, + { + "epoch": 0.720273758974131, + "grad_norm": 1121.22216796875, + "learning_rate": 9.217527532697201e-06, + "loss": 54.2899, + "step": 178280 + }, + { + "epoch": 0.7203141602394987, + "grad_norm": 750.1578369140625, + "learning_rate": 9.215175697333915e-06, + "loss": 100.6965, + "step": 178290 + }, + { + "epoch": 0.7203545615048663, + "grad_norm": 642.3121948242188, + "learning_rate": 9.212824072226332e-06, + "loss": 60.3219, + "step": 178300 + }, + { + "epoch": 0.720394962770234, + "grad_norm": 704.5855712890625, + "learning_rate": 9.210472657420307e-06, + "loss": 81.8237, + "step": 178310 + }, + { + "epoch": 0.7204353640356016, + "grad_norm": 870.6272583007812, + "learning_rate": 9.208121452961653e-06, + "loss": 61.6433, + "step": 178320 + }, + { + "epoch": 0.7204757653009692, + "grad_norm": 1282.59912109375, + "learning_rate": 9.205770458896246e-06, + "loss": 59.2295, + "step": 178330 + }, + { + "epoch": 0.7205161665663369, + "grad_norm": 669.8829345703125, + "learning_rate": 9.203419675269891e-06, + "loss": 76.3381, + "step": 178340 + }, + { + "epoch": 0.7205565678317045, + "grad_norm": 841.8216552734375, + "learning_rate": 9.20106910212843e-06, + "loss": 56.8211, + "step": 178350 + }, + { + "epoch": 0.7205969690970722, + "grad_norm": 842.8999633789062, + "learning_rate": 9.198718739517685e-06, + "loss": 71.9941, + "step": 178360 + }, + { + "epoch": 0.7206373703624398, + "grad_norm": 2037.391357421875, + "learning_rate": 9.19636858748348e-06, + "loss": 93.2316, + "step": 178370 + }, + { + "epoch": 0.7206777716278073, + "grad_norm": 159.64715576171875, + "learning_rate": 9.194018646071639e-06, + "loss": 48.9418, + "step": 178380 + }, + { + "epoch": 0.720718172893175, + "grad_norm": 743.0030517578125, + "learning_rate": 9.19166891532796e-06, + "loss": 48.8543, + "step": 178390 + }, + { + "epoch": 0.7207585741585426, + "grad_norm": 508.32281494140625, + "learning_rate": 9.18931939529826e-06, + "loss": 80.9353, + "step": 178400 + }, + { + "epoch": 0.7207989754239102, + "grad_norm": 510.40582275390625, + "learning_rate": 9.186970086028344e-06, + "loss": 49.4643, + "step": 178410 + }, + { + "epoch": 0.7208393766892779, + "grad_norm": 653.2859497070312, + "learning_rate": 9.184620987564019e-06, + "loss": 62.0808, + "step": 178420 + }, + { + "epoch": 0.7208797779546455, + "grad_norm": 1402.91650390625, + "learning_rate": 9.182272099951064e-06, + "loss": 85.3353, + "step": 178430 + }, + { + "epoch": 0.7209201792200132, + "grad_norm": 718.594970703125, + "learning_rate": 9.179923423235297e-06, + "loss": 76.7785, + "step": 178440 + }, + { + "epoch": 0.7209605804853808, + "grad_norm": 2033.590087890625, + "learning_rate": 9.177574957462485e-06, + "loss": 122.3783, + "step": 178450 + }, + { + "epoch": 0.7210009817507484, + "grad_norm": 472.1727600097656, + "learning_rate": 9.175226702678424e-06, + "loss": 77.5439, + "step": 178460 + }, + { + "epoch": 0.7210413830161161, + "grad_norm": 363.4253845214844, + "learning_rate": 9.172878658928896e-06, + "loss": 54.176, + "step": 178470 + }, + { + "epoch": 0.7210817842814837, + "grad_norm": 469.11712646484375, + "learning_rate": 9.170530826259657e-06, + "loss": 62.607, + "step": 178480 + }, + { + "epoch": 0.7211221855468514, + "grad_norm": 423.5703430175781, + "learning_rate": 9.168183204716515e-06, + "loss": 88.9397, + "step": 178490 + }, + { + "epoch": 0.721162586812219, + "grad_norm": 1064.75927734375, + "learning_rate": 9.165835794345206e-06, + "loss": 89.6133, + "step": 178500 + }, + { + "epoch": 0.7212029880775865, + "grad_norm": 322.1258239746094, + "learning_rate": 9.163488595191507e-06, + "loss": 81.426, + "step": 178510 + }, + { + "epoch": 0.7212433893429542, + "grad_norm": 679.8613891601562, + "learning_rate": 9.161141607301178e-06, + "loss": 64.6093, + "step": 178520 + }, + { + "epoch": 0.7212837906083218, + "grad_norm": 816.7735595703125, + "learning_rate": 9.158794830719973e-06, + "loss": 83.5846, + "step": 178530 + }, + { + "epoch": 0.7213241918736895, + "grad_norm": 700.0399169921875, + "learning_rate": 9.156448265493643e-06, + "loss": 68.6239, + "step": 178540 + }, + { + "epoch": 0.7213645931390571, + "grad_norm": 655.7626953125, + "learning_rate": 9.154101911667945e-06, + "loss": 47.4746, + "step": 178550 + }, + { + "epoch": 0.7214049944044247, + "grad_norm": 1119.7322998046875, + "learning_rate": 9.151755769288605e-06, + "loss": 64.1706, + "step": 178560 + }, + { + "epoch": 0.7214453956697924, + "grad_norm": 612.4402465820312, + "learning_rate": 9.149409838401375e-06, + "loss": 69.208, + "step": 178570 + }, + { + "epoch": 0.72148579693516, + "grad_norm": 49.99311447143555, + "learning_rate": 9.147064119051991e-06, + "loss": 49.016, + "step": 178580 + }, + { + "epoch": 0.7215261982005277, + "grad_norm": 396.8388671875, + "learning_rate": 9.144718611286168e-06, + "loss": 61.8908, + "step": 178590 + }, + { + "epoch": 0.7215665994658953, + "grad_norm": 299.02001953125, + "learning_rate": 9.142373315149655e-06, + "loss": 68.9501, + "step": 178600 + }, + { + "epoch": 0.7216070007312629, + "grad_norm": 518.5909423828125, + "learning_rate": 9.140028230688151e-06, + "loss": 62.8525, + "step": 178610 + }, + { + "epoch": 0.7216474019966306, + "grad_norm": 652.2169189453125, + "learning_rate": 9.137683357947402e-06, + "loss": 63.4455, + "step": 178620 + }, + { + "epoch": 0.7216878032619982, + "grad_norm": 568.4611206054688, + "learning_rate": 9.135338696973101e-06, + "loss": 70.4595, + "step": 178630 + }, + { + "epoch": 0.7217282045273657, + "grad_norm": 715.0235595703125, + "learning_rate": 9.132994247810967e-06, + "loss": 48.4329, + "step": 178640 + }, + { + "epoch": 0.7217686057927334, + "grad_norm": 856.7667846679688, + "learning_rate": 9.130650010506712e-06, + "loss": 71.8784, + "step": 178650 + }, + { + "epoch": 0.721809007058101, + "grad_norm": 534.0291748046875, + "learning_rate": 9.128305985106023e-06, + "loss": 60.7968, + "step": 178660 + }, + { + "epoch": 0.7218494083234687, + "grad_norm": 604.4874877929688, + "learning_rate": 9.125962171654606e-06, + "loss": 63.7536, + "step": 178670 + }, + { + "epoch": 0.7218898095888363, + "grad_norm": 456.695068359375, + "learning_rate": 9.123618570198154e-06, + "loss": 141.3931, + "step": 178680 + }, + { + "epoch": 0.721930210854204, + "grad_norm": 742.3792114257812, + "learning_rate": 9.121275180782356e-06, + "loss": 57.8361, + "step": 178690 + }, + { + "epoch": 0.7219706121195716, + "grad_norm": 366.27252197265625, + "learning_rate": 9.118932003452902e-06, + "loss": 85.5254, + "step": 178700 + }, + { + "epoch": 0.7220110133849392, + "grad_norm": 429.12591552734375, + "learning_rate": 9.116589038255477e-06, + "loss": 50.9915, + "step": 178710 + }, + { + "epoch": 0.7220514146503069, + "grad_norm": 2281.986328125, + "learning_rate": 9.114246285235745e-06, + "loss": 108.1215, + "step": 178720 + }, + { + "epoch": 0.7220918159156745, + "grad_norm": 783.9915161132812, + "learning_rate": 9.111903744439385e-06, + "loss": 61.3328, + "step": 178730 + }, + { + "epoch": 0.7221322171810421, + "grad_norm": 499.4376220703125, + "learning_rate": 9.109561415912076e-06, + "loss": 64.8468, + "step": 178740 + }, + { + "epoch": 0.7221726184464098, + "grad_norm": 395.06396484375, + "learning_rate": 9.107219299699459e-06, + "loss": 48.8986, + "step": 178750 + }, + { + "epoch": 0.7222130197117774, + "grad_norm": 784.8989868164062, + "learning_rate": 9.104877395847223e-06, + "loss": 77.7188, + "step": 178760 + }, + { + "epoch": 0.722253420977145, + "grad_norm": 800.2418212890625, + "learning_rate": 9.102535704400997e-06, + "loss": 72.249, + "step": 178770 + }, + { + "epoch": 0.7222938222425126, + "grad_norm": 986.5653076171875, + "learning_rate": 9.100194225406462e-06, + "loss": 93.5203, + "step": 178780 + }, + { + "epoch": 0.7223342235078802, + "grad_norm": 1959.8472900390625, + "learning_rate": 9.097852958909245e-06, + "loss": 97.405, + "step": 178790 + }, + { + "epoch": 0.7223746247732479, + "grad_norm": 486.9242858886719, + "learning_rate": 9.095511904955e-06, + "loss": 55.1571, + "step": 178800 + }, + { + "epoch": 0.7224150260386155, + "grad_norm": 510.7974548339844, + "learning_rate": 9.09317106358937e-06, + "loss": 74.2918, + "step": 178810 + }, + { + "epoch": 0.7224554273039832, + "grad_norm": 900.587646484375, + "learning_rate": 9.090830434857972e-06, + "loss": 107.8847, + "step": 178820 + }, + { + "epoch": 0.7224958285693508, + "grad_norm": 678.684326171875, + "learning_rate": 9.088490018806461e-06, + "loss": 71.1948, + "step": 178830 + }, + { + "epoch": 0.7225362298347184, + "grad_norm": 501.3406677246094, + "learning_rate": 9.086149815480452e-06, + "loss": 50.436, + "step": 178840 + }, + { + "epoch": 0.7225766311000861, + "grad_norm": 664.7572021484375, + "learning_rate": 9.08380982492557e-06, + "loss": 97.2431, + "step": 178850 + }, + { + "epoch": 0.7226170323654537, + "grad_norm": 1178.9423828125, + "learning_rate": 9.081470047187435e-06, + "loss": 82.5202, + "step": 178860 + }, + { + "epoch": 0.7226574336308214, + "grad_norm": 405.91790771484375, + "learning_rate": 9.079130482311671e-06, + "loss": 57.183, + "step": 178870 + }, + { + "epoch": 0.722697834896189, + "grad_norm": 870.985107421875, + "learning_rate": 9.076791130343872e-06, + "loss": 40.8775, + "step": 178880 + }, + { + "epoch": 0.7227382361615566, + "grad_norm": 436.2530212402344, + "learning_rate": 9.074451991329653e-06, + "loss": 47.9224, + "step": 178890 + }, + { + "epoch": 0.7227786374269242, + "grad_norm": 629.8067016601562, + "learning_rate": 9.07211306531462e-06, + "loss": 53.4179, + "step": 178900 + }, + { + "epoch": 0.7228190386922918, + "grad_norm": 450.3126220703125, + "learning_rate": 9.069774352344367e-06, + "loss": 62.2102, + "step": 178910 + }, + { + "epoch": 0.7228594399576594, + "grad_norm": 619.9445190429688, + "learning_rate": 9.067435852464498e-06, + "loss": 58.4163, + "step": 178920 + }, + { + "epoch": 0.7228998412230271, + "grad_norm": 380.9347839355469, + "learning_rate": 9.065097565720582e-06, + "loss": 49.7463, + "step": 178930 + }, + { + "epoch": 0.7229402424883947, + "grad_norm": 1356.444091796875, + "learning_rate": 9.06275949215823e-06, + "loss": 122.8771, + "step": 178940 + }, + { + "epoch": 0.7229806437537624, + "grad_norm": 1800.9051513671875, + "learning_rate": 9.060421631823008e-06, + "loss": 103.7775, + "step": 178950 + }, + { + "epoch": 0.72302104501913, + "grad_norm": 500.61724853515625, + "learning_rate": 9.058083984760497e-06, + "loss": 86.077, + "step": 178960 + }, + { + "epoch": 0.7230614462844976, + "grad_norm": 631.016845703125, + "learning_rate": 9.055746551016274e-06, + "loss": 52.0124, + "step": 178970 + }, + { + "epoch": 0.7231018475498653, + "grad_norm": 1670.90234375, + "learning_rate": 9.053409330635904e-06, + "loss": 103.1398, + "step": 178980 + }, + { + "epoch": 0.7231422488152329, + "grad_norm": 650.10693359375, + "learning_rate": 9.051072323664964e-06, + "loss": 94.7056, + "step": 178990 + }, + { + "epoch": 0.7231826500806006, + "grad_norm": 512.10888671875, + "learning_rate": 9.048735530148998e-06, + "loss": 41.4662, + "step": 179000 + }, + { + "epoch": 0.7232230513459682, + "grad_norm": 518.0028076171875, + "learning_rate": 9.04639895013357e-06, + "loss": 67.8102, + "step": 179010 + }, + { + "epoch": 0.7232634526113357, + "grad_norm": 447.71661376953125, + "learning_rate": 9.044062583664235e-06, + "loss": 52.9089, + "step": 179020 + }, + { + "epoch": 0.7233038538767034, + "grad_norm": 651.3134765625, + "learning_rate": 9.041726430786549e-06, + "loss": 64.4857, + "step": 179030 + }, + { + "epoch": 0.723344255142071, + "grad_norm": 823.6974487304688, + "learning_rate": 9.039390491546031e-06, + "loss": 71.7375, + "step": 179040 + }, + { + "epoch": 0.7233846564074387, + "grad_norm": 508.65411376953125, + "learning_rate": 9.037054765988253e-06, + "loss": 49.0815, + "step": 179050 + }, + { + "epoch": 0.7234250576728063, + "grad_norm": 952.0470581054688, + "learning_rate": 9.03471925415873e-06, + "loss": 51.0136, + "step": 179060 + }, + { + "epoch": 0.7234654589381739, + "grad_norm": 327.0063171386719, + "learning_rate": 9.032383956102998e-06, + "loss": 41.4861, + "step": 179070 + }, + { + "epoch": 0.7235058602035416, + "grad_norm": 657.5863647460938, + "learning_rate": 9.0300488718666e-06, + "loss": 86.6084, + "step": 179080 + }, + { + "epoch": 0.7235462614689092, + "grad_norm": 589.230712890625, + "learning_rate": 9.027714001495027e-06, + "loss": 49.3835, + "step": 179090 + }, + { + "epoch": 0.7235866627342769, + "grad_norm": 1515.5777587890625, + "learning_rate": 9.025379345033836e-06, + "loss": 51.0094, + "step": 179100 + }, + { + "epoch": 0.7236270639996445, + "grad_norm": 1069.3218994140625, + "learning_rate": 9.023044902528517e-06, + "loss": 76.944, + "step": 179110 + }, + { + "epoch": 0.7236674652650121, + "grad_norm": 696.35498046875, + "learning_rate": 9.020710674024588e-06, + "loss": 55.1167, + "step": 179120 + }, + { + "epoch": 0.7237078665303798, + "grad_norm": 948.7158813476562, + "learning_rate": 9.018376659567559e-06, + "loss": 64.2804, + "step": 179130 + }, + { + "epoch": 0.7237482677957474, + "grad_norm": 951.3439331054688, + "learning_rate": 9.016042859202927e-06, + "loss": 84.1618, + "step": 179140 + }, + { + "epoch": 0.7237886690611149, + "grad_norm": 378.4971008300781, + "learning_rate": 9.013709272976206e-06, + "loss": 41.42, + "step": 179150 + }, + { + "epoch": 0.7238290703264826, + "grad_norm": 686.0518798828125, + "learning_rate": 9.011375900932869e-06, + "loss": 51.1954, + "step": 179160 + }, + { + "epoch": 0.7238694715918502, + "grad_norm": 763.6070556640625, + "learning_rate": 9.009042743118415e-06, + "loss": 64.143, + "step": 179170 + }, + { + "epoch": 0.7239098728572179, + "grad_norm": 723.9493408203125, + "learning_rate": 9.006709799578332e-06, + "loss": 79.1555, + "step": 179180 + }, + { + "epoch": 0.7239502741225855, + "grad_norm": 887.4378051757812, + "learning_rate": 9.004377070358111e-06, + "loss": 59.7995, + "step": 179190 + }, + { + "epoch": 0.7239906753879531, + "grad_norm": 562.9559326171875, + "learning_rate": 9.002044555503204e-06, + "loss": 55.0926, + "step": 179200 + }, + { + "epoch": 0.7240310766533208, + "grad_norm": 539.4136352539062, + "learning_rate": 8.999712255059114e-06, + "loss": 58.1087, + "step": 179210 + }, + { + "epoch": 0.7240714779186884, + "grad_norm": 816.045166015625, + "learning_rate": 8.99738016907129e-06, + "loss": 75.8669, + "step": 179220 + }, + { + "epoch": 0.7241118791840561, + "grad_norm": 335.1075439453125, + "learning_rate": 8.995048297585205e-06, + "loss": 68.2474, + "step": 179230 + }, + { + "epoch": 0.7241522804494237, + "grad_norm": 618.8425903320312, + "learning_rate": 8.992716640646319e-06, + "loss": 49.2674, + "step": 179240 + }, + { + "epoch": 0.7241926817147913, + "grad_norm": 1128.7371826171875, + "learning_rate": 8.99038519830009e-06, + "loss": 88.7021, + "step": 179250 + }, + { + "epoch": 0.724233082980159, + "grad_norm": 999.9204711914062, + "learning_rate": 8.988053970591975e-06, + "loss": 59.4317, + "step": 179260 + }, + { + "epoch": 0.7242734842455266, + "grad_norm": 531.2050170898438, + "learning_rate": 8.985722957567409e-06, + "loss": 73.6118, + "step": 179270 + }, + { + "epoch": 0.7243138855108942, + "grad_norm": 841.2200317382812, + "learning_rate": 8.983392159271846e-06, + "loss": 102.3337, + "step": 179280 + }, + { + "epoch": 0.7243542867762618, + "grad_norm": 829.595703125, + "learning_rate": 8.981061575750725e-06, + "loss": 74.8579, + "step": 179290 + }, + { + "epoch": 0.7243946880416294, + "grad_norm": 955.025634765625, + "learning_rate": 8.97873120704948e-06, + "loss": 74.2986, + "step": 179300 + }, + { + "epoch": 0.7244350893069971, + "grad_norm": 334.8311767578125, + "learning_rate": 8.976401053213546e-06, + "loss": 68.7878, + "step": 179310 + }, + { + "epoch": 0.7244754905723647, + "grad_norm": 484.7381286621094, + "learning_rate": 8.974071114288356e-06, + "loss": 82.849, + "step": 179320 + }, + { + "epoch": 0.7245158918377324, + "grad_norm": 251.02279663085938, + "learning_rate": 8.971741390319319e-06, + "loss": 29.6861, + "step": 179330 + }, + { + "epoch": 0.7245562931031, + "grad_norm": 457.145263671875, + "learning_rate": 8.969411881351859e-06, + "loss": 68.9994, + "step": 179340 + }, + { + "epoch": 0.7245966943684676, + "grad_norm": 451.2746276855469, + "learning_rate": 8.967082587431398e-06, + "loss": 63.6743, + "step": 179350 + }, + { + "epoch": 0.7246370956338353, + "grad_norm": 1037.2691650390625, + "learning_rate": 8.964753508603336e-06, + "loss": 98.737, + "step": 179360 + }, + { + "epoch": 0.7246774968992029, + "grad_norm": 202.5893096923828, + "learning_rate": 8.962424644913096e-06, + "loss": 60.0061, + "step": 179370 + }, + { + "epoch": 0.7247178981645706, + "grad_norm": 781.7704467773438, + "learning_rate": 8.960095996406058e-06, + "loss": 51.4762, + "step": 179380 + }, + { + "epoch": 0.7247582994299382, + "grad_norm": 830.0137939453125, + "learning_rate": 8.957767563127645e-06, + "loss": 51.2726, + "step": 179390 + }, + { + "epoch": 0.7247987006953058, + "grad_norm": 811.23681640625, + "learning_rate": 8.95543934512323e-06, + "loss": 79.8067, + "step": 179400 + }, + { + "epoch": 0.7248391019606734, + "grad_norm": 700.7471313476562, + "learning_rate": 8.95311134243821e-06, + "loss": 49.0242, + "step": 179410 + }, + { + "epoch": 0.724879503226041, + "grad_norm": 482.2694091796875, + "learning_rate": 8.950783555117981e-06, + "loss": 59.0057, + "step": 179420 + }, + { + "epoch": 0.7249199044914086, + "grad_norm": 1094.19921875, + "learning_rate": 8.9484559832079e-06, + "loss": 91.3032, + "step": 179430 + }, + { + "epoch": 0.7249603057567763, + "grad_norm": 440.04083251953125, + "learning_rate": 8.946128626753375e-06, + "loss": 120.4496, + "step": 179440 + }, + { + "epoch": 0.7250007070221439, + "grad_norm": 691.3696899414062, + "learning_rate": 8.943801485799752e-06, + "loss": 54.395, + "step": 179450 + }, + { + "epoch": 0.7250411082875116, + "grad_norm": 409.84820556640625, + "learning_rate": 8.941474560392415e-06, + "loss": 57.6676, + "step": 179460 + }, + { + "epoch": 0.7250815095528792, + "grad_norm": 832.8084716796875, + "learning_rate": 8.939147850576724e-06, + "loss": 59.0546, + "step": 179470 + }, + { + "epoch": 0.7251219108182468, + "grad_norm": 934.2299194335938, + "learning_rate": 8.936821356398047e-06, + "loss": 76.56, + "step": 179480 + }, + { + "epoch": 0.7251623120836145, + "grad_norm": 980.20703125, + "learning_rate": 8.934495077901725e-06, + "loss": 79.7855, + "step": 179490 + }, + { + "epoch": 0.7252027133489821, + "grad_norm": 458.7400207519531, + "learning_rate": 8.93216901513312e-06, + "loss": 58.9225, + "step": 179500 + }, + { + "epoch": 0.7252431146143498, + "grad_norm": 738.2205200195312, + "learning_rate": 8.92984316813758e-06, + "loss": 52.2725, + "step": 179510 + }, + { + "epoch": 0.7252835158797174, + "grad_norm": 833.9366455078125, + "learning_rate": 8.927517536960444e-06, + "loss": 73.5499, + "step": 179520 + }, + { + "epoch": 0.7253239171450849, + "grad_norm": 574.342041015625, + "learning_rate": 8.925192121647062e-06, + "loss": 37.5426, + "step": 179530 + }, + { + "epoch": 0.7253643184104526, + "grad_norm": 679.7711181640625, + "learning_rate": 8.922866922242748e-06, + "loss": 81.1355, + "step": 179540 + }, + { + "epoch": 0.7254047196758202, + "grad_norm": 404.11383056640625, + "learning_rate": 8.920541938792859e-06, + "loss": 60.0099, + "step": 179550 + }, + { + "epoch": 0.7254451209411878, + "grad_norm": 1213.224609375, + "learning_rate": 8.918217171342702e-06, + "loss": 60.3312, + "step": 179560 + }, + { + "epoch": 0.7254855222065555, + "grad_norm": 645.4854125976562, + "learning_rate": 8.915892619937607e-06, + "loss": 129.2964, + "step": 179570 + }, + { + "epoch": 0.7255259234719231, + "grad_norm": 729.5332641601562, + "learning_rate": 8.91356828462289e-06, + "loss": 69.0625, + "step": 179580 + }, + { + "epoch": 0.7255663247372908, + "grad_norm": 976.0846557617188, + "learning_rate": 8.911244165443868e-06, + "loss": 74.7747, + "step": 179590 + }, + { + "epoch": 0.7256067260026584, + "grad_norm": 591.7907104492188, + "learning_rate": 8.908920262445859e-06, + "loss": 63.0021, + "step": 179600 + }, + { + "epoch": 0.725647127268026, + "grad_norm": 1141.09765625, + "learning_rate": 8.906596575674151e-06, + "loss": 68.7176, + "step": 179610 + }, + { + "epoch": 0.7256875285333937, + "grad_norm": 461.4086608886719, + "learning_rate": 8.904273105174055e-06, + "loss": 55.7562, + "step": 179620 + }, + { + "epoch": 0.7257279297987613, + "grad_norm": 673.3651733398438, + "learning_rate": 8.901949850990866e-06, + "loss": 57.4495, + "step": 179630 + }, + { + "epoch": 0.725768331064129, + "grad_norm": 453.03997802734375, + "learning_rate": 8.899626813169886e-06, + "loss": 94.2908, + "step": 179640 + }, + { + "epoch": 0.7258087323294966, + "grad_norm": 623.0118408203125, + "learning_rate": 8.89730399175638e-06, + "loss": 51.8041, + "step": 179650 + }, + { + "epoch": 0.7258491335948641, + "grad_norm": 451.2889099121094, + "learning_rate": 8.894981386795668e-06, + "loss": 50.8597, + "step": 179660 + }, + { + "epoch": 0.7258895348602318, + "grad_norm": 1125.2200927734375, + "learning_rate": 8.892658998332998e-06, + "loss": 68.6097, + "step": 179670 + }, + { + "epoch": 0.7259299361255994, + "grad_norm": 793.3580322265625, + "learning_rate": 8.890336826413664e-06, + "loss": 54.715, + "step": 179680 + }, + { + "epoch": 0.7259703373909671, + "grad_norm": 799.2090454101562, + "learning_rate": 8.888014871082937e-06, + "loss": 90.9344, + "step": 179690 + }, + { + "epoch": 0.7260107386563347, + "grad_norm": 783.2426147460938, + "learning_rate": 8.885693132386069e-06, + "loss": 73.4217, + "step": 179700 + }, + { + "epoch": 0.7260511399217023, + "grad_norm": 542.4451293945312, + "learning_rate": 8.883371610368348e-06, + "loss": 72.6817, + "step": 179710 + }, + { + "epoch": 0.72609154118707, + "grad_norm": 895.587158203125, + "learning_rate": 8.881050305075013e-06, + "loss": 56.5891, + "step": 179720 + }, + { + "epoch": 0.7261319424524376, + "grad_norm": 1021.8092041015625, + "learning_rate": 8.87872921655133e-06, + "loss": 70.8452, + "step": 179730 + }, + { + "epoch": 0.7261723437178053, + "grad_norm": 339.1346435546875, + "learning_rate": 8.876408344842541e-06, + "loss": 41.2228, + "step": 179740 + }, + { + "epoch": 0.7262127449831729, + "grad_norm": 863.810302734375, + "learning_rate": 8.874087689993904e-06, + "loss": 93.5623, + "step": 179750 + }, + { + "epoch": 0.7262531462485405, + "grad_norm": 467.46563720703125, + "learning_rate": 8.87176725205066e-06, + "loss": 76.7701, + "step": 179760 + }, + { + "epoch": 0.7262935475139082, + "grad_norm": 665.7024536132812, + "learning_rate": 8.869447031058035e-06, + "loss": 66.7535, + "step": 179770 + }, + { + "epoch": 0.7263339487792758, + "grad_norm": 696.1509399414062, + "learning_rate": 8.86712702706127e-06, + "loss": 65.5342, + "step": 179780 + }, + { + "epoch": 0.7263743500446433, + "grad_norm": 506.5912780761719, + "learning_rate": 8.864807240105598e-06, + "loss": 140.4293, + "step": 179790 + }, + { + "epoch": 0.726414751310011, + "grad_norm": 1073.2720947265625, + "learning_rate": 8.862487670236249e-06, + "loss": 65.7895, + "step": 179800 + }, + { + "epoch": 0.7264551525753786, + "grad_norm": 422.345458984375, + "learning_rate": 8.86016831749842e-06, + "loss": 44.571, + "step": 179810 + }, + { + "epoch": 0.7264955538407463, + "grad_norm": 624.7973022460938, + "learning_rate": 8.857849181937359e-06, + "loss": 75.8256, + "step": 179820 + }, + { + "epoch": 0.7265359551061139, + "grad_norm": 1243.210205078125, + "learning_rate": 8.855530263598258e-06, + "loss": 73.0754, + "step": 179830 + }, + { + "epoch": 0.7265763563714815, + "grad_norm": 604.524658203125, + "learning_rate": 8.853211562526332e-06, + "loss": 74.201, + "step": 179840 + }, + { + "epoch": 0.7266167576368492, + "grad_norm": 580.4423217773438, + "learning_rate": 8.850893078766788e-06, + "loss": 84.7496, + "step": 179850 + }, + { + "epoch": 0.7266571589022168, + "grad_norm": 1868.38427734375, + "learning_rate": 8.848574812364818e-06, + "loss": 77.7952, + "step": 179860 + }, + { + "epoch": 0.7266975601675845, + "grad_norm": 914.8170776367188, + "learning_rate": 8.846256763365635e-06, + "loss": 95.8372, + "step": 179870 + }, + { + "epoch": 0.7267379614329521, + "grad_norm": 659.1104125976562, + "learning_rate": 8.843938931814402e-06, + "loss": 48.5352, + "step": 179880 + }, + { + "epoch": 0.7267783626983197, + "grad_norm": 474.0215148925781, + "learning_rate": 8.841621317756339e-06, + "loss": 64.9721, + "step": 179890 + }, + { + "epoch": 0.7268187639636874, + "grad_norm": 760.419189453125, + "learning_rate": 8.839303921236605e-06, + "loss": 61.0, + "step": 179900 + }, + { + "epoch": 0.726859165229055, + "grad_norm": 1035.7755126953125, + "learning_rate": 8.836986742300386e-06, + "loss": 73.8864, + "step": 179910 + }, + { + "epoch": 0.7268995664944226, + "grad_norm": 710.3796997070312, + "learning_rate": 8.83466978099286e-06, + "loss": 60.4806, + "step": 179920 + }, + { + "epoch": 0.7269399677597902, + "grad_norm": 553.2732543945312, + "learning_rate": 8.8323530373592e-06, + "loss": 90.3882, + "step": 179930 + }, + { + "epoch": 0.7269803690251578, + "grad_norm": 793.3099975585938, + "learning_rate": 8.83003651144456e-06, + "loss": 54.1118, + "step": 179940 + }, + { + "epoch": 0.7270207702905255, + "grad_norm": 622.4246826171875, + "learning_rate": 8.82772020329411e-06, + "loss": 68.2674, + "step": 179950 + }, + { + "epoch": 0.7270611715558931, + "grad_norm": 576.824462890625, + "learning_rate": 8.825404112953007e-06, + "loss": 67.5397, + "step": 179960 + }, + { + "epoch": 0.7271015728212608, + "grad_norm": 868.1696166992188, + "learning_rate": 8.823088240466402e-06, + "loss": 60.1296, + "step": 179970 + }, + { + "epoch": 0.7271419740866284, + "grad_norm": 470.0470275878906, + "learning_rate": 8.820772585879454e-06, + "loss": 86.8104, + "step": 179980 + }, + { + "epoch": 0.727182375351996, + "grad_norm": 237.6008758544922, + "learning_rate": 8.818457149237284e-06, + "loss": 64.7473, + "step": 179990 + }, + { + "epoch": 0.7272227766173637, + "grad_norm": 826.0542602539062, + "learning_rate": 8.816141930585067e-06, + "loss": 63.5099, + "step": 180000 + }, + { + "epoch": 0.7272631778827313, + "grad_norm": 619.8072509765625, + "learning_rate": 8.81382692996791e-06, + "loss": 63.3079, + "step": 180010 + }, + { + "epoch": 0.727303579148099, + "grad_norm": 623.7779541015625, + "learning_rate": 8.811512147430958e-06, + "loss": 65.3005, + "step": 180020 + }, + { + "epoch": 0.7273439804134666, + "grad_norm": 806.67626953125, + "learning_rate": 8.809197583019344e-06, + "loss": 64.3786, + "step": 180030 + }, + { + "epoch": 0.7273843816788342, + "grad_norm": 321.6869201660156, + "learning_rate": 8.806883236778168e-06, + "loss": 80.2416, + "step": 180040 + }, + { + "epoch": 0.7274247829442018, + "grad_norm": 831.1265258789062, + "learning_rate": 8.804569108752583e-06, + "loss": 67.2177, + "step": 180050 + }, + { + "epoch": 0.7274651842095694, + "grad_norm": 1147.8966064453125, + "learning_rate": 8.802255198987678e-06, + "loss": 106.9756, + "step": 180060 + }, + { + "epoch": 0.727505585474937, + "grad_norm": 929.6754150390625, + "learning_rate": 8.799941507528573e-06, + "loss": 69.9646, + "step": 180070 + }, + { + "epoch": 0.7275459867403047, + "grad_norm": 812.6502685546875, + "learning_rate": 8.797628034420375e-06, + "loss": 52.8962, + "step": 180080 + }, + { + "epoch": 0.7275863880056723, + "grad_norm": 387.9434509277344, + "learning_rate": 8.795314779708192e-06, + "loss": 58.8869, + "step": 180090 + }, + { + "epoch": 0.72762678927104, + "grad_norm": 1486.0791015625, + "learning_rate": 8.793001743437111e-06, + "loss": 81.6035, + "step": 180100 + }, + { + "epoch": 0.7276671905364076, + "grad_norm": 431.376953125, + "learning_rate": 8.79068892565223e-06, + "loss": 55.6943, + "step": 180110 + }, + { + "epoch": 0.7277075918017752, + "grad_norm": 896.2058715820312, + "learning_rate": 8.788376326398637e-06, + "loss": 60.3276, + "step": 180120 + }, + { + "epoch": 0.7277479930671429, + "grad_norm": 764.2176513671875, + "learning_rate": 8.786063945721424e-06, + "loss": 72.1844, + "step": 180130 + }, + { + "epoch": 0.7277883943325105, + "grad_norm": 622.8843383789062, + "learning_rate": 8.783751783665672e-06, + "loss": 41.2299, + "step": 180140 + }, + { + "epoch": 0.7278287955978782, + "grad_norm": 824.318603515625, + "learning_rate": 8.78143984027644e-06, + "loss": 80.4631, + "step": 180150 + }, + { + "epoch": 0.7278691968632458, + "grad_norm": 1405.62353515625, + "learning_rate": 8.779128115598827e-06, + "loss": 53.1241, + "step": 180160 + }, + { + "epoch": 0.7279095981286133, + "grad_norm": 709.2952880859375, + "learning_rate": 8.776816609677882e-06, + "loss": 57.6457, + "step": 180170 + }, + { + "epoch": 0.727949999393981, + "grad_norm": 501.6903076171875, + "learning_rate": 8.774505322558675e-06, + "loss": 45.0454, + "step": 180180 + }, + { + "epoch": 0.7279904006593486, + "grad_norm": 438.5440673828125, + "learning_rate": 8.772194254286266e-06, + "loss": 90.8563, + "step": 180190 + }, + { + "epoch": 0.7280308019247163, + "grad_norm": 1212.14697265625, + "learning_rate": 8.769883404905712e-06, + "loss": 69.6589, + "step": 180200 + }, + { + "epoch": 0.7280712031900839, + "grad_norm": 1427.153564453125, + "learning_rate": 8.767572774462067e-06, + "loss": 118.1735, + "step": 180210 + }, + { + "epoch": 0.7281116044554515, + "grad_norm": 502.1898498535156, + "learning_rate": 8.765262363000369e-06, + "loss": 67.601, + "step": 180220 + }, + { + "epoch": 0.7281520057208192, + "grad_norm": 311.3202209472656, + "learning_rate": 8.762952170565664e-06, + "loss": 35.9511, + "step": 180230 + }, + { + "epoch": 0.7281924069861868, + "grad_norm": 724.260498046875, + "learning_rate": 8.760642197202992e-06, + "loss": 78.571, + "step": 180240 + }, + { + "epoch": 0.7282328082515545, + "grad_norm": 685.9864501953125, + "learning_rate": 8.758332442957394e-06, + "loss": 54.4146, + "step": 180250 + }, + { + "epoch": 0.7282732095169221, + "grad_norm": 843.0648803710938, + "learning_rate": 8.756022907873878e-06, + "loss": 91.44, + "step": 180260 + }, + { + "epoch": 0.7283136107822897, + "grad_norm": 602.7659912109375, + "learning_rate": 8.7537135919975e-06, + "loss": 49.9255, + "step": 180270 + }, + { + "epoch": 0.7283540120476574, + "grad_norm": 1090.334228515625, + "learning_rate": 8.751404495373257e-06, + "loss": 84.3592, + "step": 180280 + }, + { + "epoch": 0.728394413313025, + "grad_norm": 596.3447875976562, + "learning_rate": 8.749095618046176e-06, + "loss": 60.14, + "step": 180290 + }, + { + "epoch": 0.7284348145783925, + "grad_norm": 478.1618957519531, + "learning_rate": 8.746786960061273e-06, + "loss": 41.2122, + "step": 180300 + }, + { + "epoch": 0.7284752158437602, + "grad_norm": 1325.0712890625, + "learning_rate": 8.744478521463537e-06, + "loss": 57.9819, + "step": 180310 + }, + { + "epoch": 0.7285156171091278, + "grad_norm": 1014.0343627929688, + "learning_rate": 8.742170302298004e-06, + "loss": 55.5004, + "step": 180320 + }, + { + "epoch": 0.7285560183744955, + "grad_norm": 567.0553588867188, + "learning_rate": 8.739862302609648e-06, + "loss": 49.1532, + "step": 180330 + }, + { + "epoch": 0.7285964196398631, + "grad_norm": 540.2462768554688, + "learning_rate": 8.737554522443474e-06, + "loss": 52.9117, + "step": 180340 + }, + { + "epoch": 0.7286368209052307, + "grad_norm": 340.2773132324219, + "learning_rate": 8.735246961844472e-06, + "loss": 62.8812, + "step": 180350 + }, + { + "epoch": 0.7286772221705984, + "grad_norm": 1082.2159423828125, + "learning_rate": 8.732939620857633e-06, + "loss": 74.4289, + "step": 180360 + }, + { + "epoch": 0.728717623435966, + "grad_norm": 507.2069396972656, + "learning_rate": 8.73063249952794e-06, + "loss": 91.359, + "step": 180370 + }, + { + "epoch": 0.7287580247013337, + "grad_norm": 498.7413024902344, + "learning_rate": 8.728325597900362e-06, + "loss": 58.98, + "step": 180380 + }, + { + "epoch": 0.7287984259667013, + "grad_norm": 998.984375, + "learning_rate": 8.726018916019883e-06, + "loss": 52.3583, + "step": 180390 + }, + { + "epoch": 0.7288388272320689, + "grad_norm": 814.6353759765625, + "learning_rate": 8.723712453931465e-06, + "loss": 57.7162, + "step": 180400 + }, + { + "epoch": 0.7288792284974366, + "grad_norm": 605.412109375, + "learning_rate": 8.721406211680082e-06, + "loss": 75.733, + "step": 180410 + }, + { + "epoch": 0.7289196297628042, + "grad_norm": 902.1212768554688, + "learning_rate": 8.719100189310687e-06, + "loss": 77.4146, + "step": 180420 + }, + { + "epoch": 0.7289600310281718, + "grad_norm": 630.577880859375, + "learning_rate": 8.716794386868253e-06, + "loss": 83.0791, + "step": 180430 + }, + { + "epoch": 0.7290004322935394, + "grad_norm": 317.25091552734375, + "learning_rate": 8.714488804397711e-06, + "loss": 116.3311, + "step": 180440 + }, + { + "epoch": 0.729040833558907, + "grad_norm": 503.9173583984375, + "learning_rate": 8.712183441944022e-06, + "loss": 95.4872, + "step": 180450 + }, + { + "epoch": 0.7290812348242747, + "grad_norm": 765.6050415039062, + "learning_rate": 8.709878299552135e-06, + "loss": 61.3621, + "step": 180460 + }, + { + "epoch": 0.7291216360896423, + "grad_norm": 1681.1304931640625, + "learning_rate": 8.70757337726697e-06, + "loss": 81.151, + "step": 180470 + }, + { + "epoch": 0.72916203735501, + "grad_norm": 355.28155517578125, + "learning_rate": 8.705268675133488e-06, + "loss": 66.6799, + "step": 180480 + }, + { + "epoch": 0.7292024386203776, + "grad_norm": 515.2119140625, + "learning_rate": 8.702964193196595e-06, + "loss": 38.9603, + "step": 180490 + }, + { + "epoch": 0.7292428398857452, + "grad_norm": 677.4627075195312, + "learning_rate": 8.700659931501244e-06, + "loss": 71.7919, + "step": 180500 + }, + { + "epoch": 0.7292832411511129, + "grad_norm": 485.6435852050781, + "learning_rate": 8.698355890092338e-06, + "loss": 54.2746, + "step": 180510 + }, + { + "epoch": 0.7293236424164805, + "grad_norm": 600.9970092773438, + "learning_rate": 8.696052069014805e-06, + "loss": 46.5223, + "step": 180520 + }, + { + "epoch": 0.7293640436818482, + "grad_norm": 406.7672424316406, + "learning_rate": 8.693748468313555e-06, + "loss": 60.0171, + "step": 180530 + }, + { + "epoch": 0.7294044449472158, + "grad_norm": 868.28515625, + "learning_rate": 8.691445088033506e-06, + "loss": 73.5636, + "step": 180540 + }, + { + "epoch": 0.7294448462125834, + "grad_norm": 489.1455993652344, + "learning_rate": 8.68914192821955e-06, + "loss": 58.2959, + "step": 180550 + }, + { + "epoch": 0.729485247477951, + "grad_norm": 593.87939453125, + "learning_rate": 8.686838988916595e-06, + "loss": 54.9377, + "step": 180560 + }, + { + "epoch": 0.7295256487433186, + "grad_norm": 784.5049438476562, + "learning_rate": 8.684536270169539e-06, + "loss": 58.3528, + "step": 180570 + }, + { + "epoch": 0.7295660500086862, + "grad_norm": 775.0606689453125, + "learning_rate": 8.682233772023272e-06, + "loss": 74.4088, + "step": 180580 + }, + { + "epoch": 0.7296064512740539, + "grad_norm": 588.4043579101562, + "learning_rate": 8.679931494522695e-06, + "loss": 59.3083, + "step": 180590 + }, + { + "epoch": 0.7296468525394215, + "grad_norm": 657.1256713867188, + "learning_rate": 8.677629437712665e-06, + "loss": 100.0195, + "step": 180600 + }, + { + "epoch": 0.7296872538047892, + "grad_norm": 575.1522216796875, + "learning_rate": 8.675327601638092e-06, + "loss": 50.7734, + "step": 180610 + }, + { + "epoch": 0.7297276550701568, + "grad_norm": 587.4725952148438, + "learning_rate": 8.673025986343833e-06, + "loss": 72.567, + "step": 180620 + }, + { + "epoch": 0.7297680563355244, + "grad_norm": 928.2684326171875, + "learning_rate": 8.670724591874762e-06, + "loss": 56.1089, + "step": 180630 + }, + { + "epoch": 0.7298084576008921, + "grad_norm": 2238.17578125, + "learning_rate": 8.668423418275755e-06, + "loss": 108.1139, + "step": 180640 + }, + { + "epoch": 0.7298488588662597, + "grad_norm": 640.6253051757812, + "learning_rate": 8.666122465591653e-06, + "loss": 58.2304, + "step": 180650 + }, + { + "epoch": 0.7298892601316274, + "grad_norm": 753.1292114257812, + "learning_rate": 8.663821733867346e-06, + "loss": 75.4676, + "step": 180660 + }, + { + "epoch": 0.729929661396995, + "grad_norm": 376.223388671875, + "learning_rate": 8.66152122314766e-06, + "loss": 48.8801, + "step": 180670 + }, + { + "epoch": 0.7299700626623626, + "grad_norm": 590.1094360351562, + "learning_rate": 8.659220933477455e-06, + "loss": 80.8054, + "step": 180680 + }, + { + "epoch": 0.7300104639277302, + "grad_norm": 516.4126586914062, + "learning_rate": 8.65692086490158e-06, + "loss": 76.1522, + "step": 180690 + }, + { + "epoch": 0.7300508651930978, + "grad_norm": 445.6968994140625, + "learning_rate": 8.654621017464875e-06, + "loss": 49.7985, + "step": 180700 + }, + { + "epoch": 0.7300912664584654, + "grad_norm": 656.9788208007812, + "learning_rate": 8.652321391212171e-06, + "loss": 73.1953, + "step": 180710 + }, + { + "epoch": 0.7301316677238331, + "grad_norm": 318.99395751953125, + "learning_rate": 8.650021986188301e-06, + "loss": 77.7871, + "step": 180720 + }, + { + "epoch": 0.7301720689892007, + "grad_norm": 783.4091186523438, + "learning_rate": 8.647722802438096e-06, + "loss": 51.3349, + "step": 180730 + }, + { + "epoch": 0.7302124702545684, + "grad_norm": 1061.826171875, + "learning_rate": 8.645423840006382e-06, + "loss": 88.0435, + "step": 180740 + }, + { + "epoch": 0.730252871519936, + "grad_norm": 654.4931030273438, + "learning_rate": 8.643125098937982e-06, + "loss": 62.2231, + "step": 180750 + }, + { + "epoch": 0.7302932727853036, + "grad_norm": 1021.0757446289062, + "learning_rate": 8.640826579277687e-06, + "loss": 67.5352, + "step": 180760 + }, + { + "epoch": 0.7303336740506713, + "grad_norm": 648.2791137695312, + "learning_rate": 8.638528281070347e-06, + "loss": 35.2776, + "step": 180770 + }, + { + "epoch": 0.7303740753160389, + "grad_norm": 1026.352294921875, + "learning_rate": 8.636230204360736e-06, + "loss": 80.4507, + "step": 180780 + }, + { + "epoch": 0.7304144765814066, + "grad_norm": 856.6405029296875, + "learning_rate": 8.633932349193667e-06, + "loss": 85.7861, + "step": 180790 + }, + { + "epoch": 0.7304548778467742, + "grad_norm": 519.2671508789062, + "learning_rate": 8.631634715613948e-06, + "loss": 92.2844, + "step": 180800 + }, + { + "epoch": 0.7304952791121417, + "grad_norm": 392.5109558105469, + "learning_rate": 8.629337303666348e-06, + "loss": 94.6236, + "step": 180810 + }, + { + "epoch": 0.7305356803775094, + "grad_norm": 415.673583984375, + "learning_rate": 8.627040113395685e-06, + "loss": 68.8476, + "step": 180820 + }, + { + "epoch": 0.730576081642877, + "grad_norm": 637.0103149414062, + "learning_rate": 8.624743144846723e-06, + "loss": 63.9645, + "step": 180830 + }, + { + "epoch": 0.7306164829082447, + "grad_norm": 505.2231140136719, + "learning_rate": 8.62244639806425e-06, + "loss": 52.3647, + "step": 180840 + }, + { + "epoch": 0.7306568841736123, + "grad_norm": 1023.375244140625, + "learning_rate": 8.620149873093042e-06, + "loss": 109.715, + "step": 180850 + }, + { + "epoch": 0.7306972854389799, + "grad_norm": 601.5147094726562, + "learning_rate": 8.617853569977871e-06, + "loss": 58.9005, + "step": 180860 + }, + { + "epoch": 0.7307376867043476, + "grad_norm": 445.46893310546875, + "learning_rate": 8.615557488763506e-06, + "loss": 57.4375, + "step": 180870 + }, + { + "epoch": 0.7307780879697152, + "grad_norm": 457.2873229980469, + "learning_rate": 8.613261629494716e-06, + "loss": 59.8843, + "step": 180880 + }, + { + "epoch": 0.7308184892350829, + "grad_norm": 1270.587890625, + "learning_rate": 8.610965992216243e-06, + "loss": 120.4748, + "step": 180890 + }, + { + "epoch": 0.7308588905004505, + "grad_norm": 675.197998046875, + "learning_rate": 8.608670576972852e-06, + "loss": 96.6521, + "step": 180900 + }, + { + "epoch": 0.7308992917658181, + "grad_norm": 735.2103271484375, + "learning_rate": 8.606375383809301e-06, + "loss": 42.5129, + "step": 180910 + }, + { + "epoch": 0.7309396930311858, + "grad_norm": 481.1005859375, + "learning_rate": 8.604080412770315e-06, + "loss": 34.0832, + "step": 180920 + }, + { + "epoch": 0.7309800942965534, + "grad_norm": 611.4954833984375, + "learning_rate": 8.601785663900658e-06, + "loss": 83.1082, + "step": 180930 + }, + { + "epoch": 0.731020495561921, + "grad_norm": 1227.8760986328125, + "learning_rate": 8.599491137245048e-06, + "loss": 47.7063, + "step": 180940 + }, + { + "epoch": 0.7310608968272886, + "grad_norm": 232.61041259765625, + "learning_rate": 8.59719683284824e-06, + "loss": 87.6623, + "step": 180950 + }, + { + "epoch": 0.7311012980926562, + "grad_norm": 664.9630126953125, + "learning_rate": 8.594902750754942e-06, + "loss": 71.9654, + "step": 180960 + }, + { + "epoch": 0.7311416993580239, + "grad_norm": 627.6824340820312, + "learning_rate": 8.59260889100989e-06, + "loss": 50.8036, + "step": 180970 + }, + { + "epoch": 0.7311821006233915, + "grad_norm": 569.5469970703125, + "learning_rate": 8.590315253657807e-06, + "loss": 71.4901, + "step": 180980 + }, + { + "epoch": 0.7312225018887591, + "grad_norm": 610.1592407226562, + "learning_rate": 8.588021838743392e-06, + "loss": 59.4596, + "step": 180990 + }, + { + "epoch": 0.7312629031541268, + "grad_norm": 490.7856140136719, + "learning_rate": 8.585728646311368e-06, + "loss": 52.4683, + "step": 181000 + }, + { + "epoch": 0.7313033044194944, + "grad_norm": 468.3042297363281, + "learning_rate": 8.583435676406441e-06, + "loss": 83.9002, + "step": 181010 + }, + { + "epoch": 0.7313437056848621, + "grad_norm": 676.7130126953125, + "learning_rate": 8.581142929073314e-06, + "loss": 69.6591, + "step": 181020 + }, + { + "epoch": 0.7313841069502297, + "grad_norm": 348.9791259765625, + "learning_rate": 8.578850404356682e-06, + "loss": 97.0322, + "step": 181030 + }, + { + "epoch": 0.7314245082155973, + "grad_norm": 992.2329711914062, + "learning_rate": 8.576558102301252e-06, + "loss": 86.9441, + "step": 181040 + }, + { + "epoch": 0.731464909480965, + "grad_norm": 948.9354248046875, + "learning_rate": 8.574266022951693e-06, + "loss": 76.519, + "step": 181050 + }, + { + "epoch": 0.7315053107463326, + "grad_norm": 788.005615234375, + "learning_rate": 8.571974166352701e-06, + "loss": 70.4961, + "step": 181060 + }, + { + "epoch": 0.7315457120117002, + "grad_norm": 2551.769775390625, + "learning_rate": 8.569682532548965e-06, + "loss": 55.726, + "step": 181070 + }, + { + "epoch": 0.7315861132770678, + "grad_norm": 662.015625, + "learning_rate": 8.567391121585136e-06, + "loss": 40.6179, + "step": 181080 + }, + { + "epoch": 0.7316265145424354, + "grad_norm": 807.2650756835938, + "learning_rate": 8.565099933505917e-06, + "loss": 49.5346, + "step": 181090 + }, + { + "epoch": 0.7316669158078031, + "grad_norm": 5204.0654296875, + "learning_rate": 8.562808968355949e-06, + "loss": 79.4676, + "step": 181100 + }, + { + "epoch": 0.7317073170731707, + "grad_norm": 1287.424560546875, + "learning_rate": 8.560518226179921e-06, + "loss": 105.4606, + "step": 181110 + }, + { + "epoch": 0.7317477183385384, + "grad_norm": 344.0022277832031, + "learning_rate": 8.558227707022475e-06, + "loss": 94.4977, + "step": 181120 + }, + { + "epoch": 0.731788119603906, + "grad_norm": 307.97003173828125, + "learning_rate": 8.55593741092827e-06, + "loss": 54.031, + "step": 181130 + }, + { + "epoch": 0.7318285208692736, + "grad_norm": 758.092041015625, + "learning_rate": 8.553647337941962e-06, + "loss": 116.8244, + "step": 181140 + }, + { + "epoch": 0.7318689221346413, + "grad_norm": 739.5404663085938, + "learning_rate": 8.551357488108187e-06, + "loss": 61.4915, + "step": 181150 + }, + { + "epoch": 0.7319093234000089, + "grad_norm": 642.4363403320312, + "learning_rate": 8.549067861471592e-06, + "loss": 61.7979, + "step": 181160 + }, + { + "epoch": 0.7319497246653766, + "grad_norm": 1228.2034912109375, + "learning_rate": 8.546778458076813e-06, + "loss": 80.8843, + "step": 181170 + }, + { + "epoch": 0.7319901259307442, + "grad_norm": 1487.498046875, + "learning_rate": 8.544489277968486e-06, + "loss": 117.1804, + "step": 181180 + }, + { + "epoch": 0.7320305271961118, + "grad_norm": 377.8185729980469, + "learning_rate": 8.542200321191237e-06, + "loss": 98.885, + "step": 181190 + }, + { + "epoch": 0.7320709284614794, + "grad_norm": 646.796630859375, + "learning_rate": 8.5399115877897e-06, + "loss": 48.6889, + "step": 181200 + }, + { + "epoch": 0.732111329726847, + "grad_norm": 352.8114013671875, + "learning_rate": 8.537623077808471e-06, + "loss": 67.7965, + "step": 181210 + }, + { + "epoch": 0.7321517309922146, + "grad_norm": 1871.1900634765625, + "learning_rate": 8.535334791292195e-06, + "loss": 93.5929, + "step": 181220 + }, + { + "epoch": 0.7321921322575823, + "grad_norm": 362.99609375, + "learning_rate": 8.533046728285464e-06, + "loss": 50.946, + "step": 181230 + }, + { + "epoch": 0.7322325335229499, + "grad_norm": 670.5611572265625, + "learning_rate": 8.530758888832891e-06, + "loss": 67.3179, + "step": 181240 + }, + { + "epoch": 0.7322729347883176, + "grad_norm": 646.5902099609375, + "learning_rate": 8.528471272979083e-06, + "loss": 71.6147, + "step": 181250 + }, + { + "epoch": 0.7323133360536852, + "grad_norm": 488.93963623046875, + "learning_rate": 8.526183880768621e-06, + "loss": 74.9505, + "step": 181260 + }, + { + "epoch": 0.7323537373190528, + "grad_norm": 1013.2658081054688, + "learning_rate": 8.523896712246126e-06, + "loss": 80.2418, + "step": 181270 + }, + { + "epoch": 0.7323941385844205, + "grad_norm": 318.415771484375, + "learning_rate": 8.521609767456165e-06, + "loss": 66.2052, + "step": 181280 + }, + { + "epoch": 0.7324345398497881, + "grad_norm": 918.9953002929688, + "learning_rate": 8.519323046443333e-06, + "loss": 86.1466, + "step": 181290 + }, + { + "epoch": 0.7324749411151558, + "grad_norm": 1339.4842529296875, + "learning_rate": 8.517036549252206e-06, + "loss": 74.2527, + "step": 181300 + }, + { + "epoch": 0.7325153423805234, + "grad_norm": 1104.5279541015625, + "learning_rate": 8.514750275927373e-06, + "loss": 74.8456, + "step": 181310 + }, + { + "epoch": 0.732555743645891, + "grad_norm": 739.8590087890625, + "learning_rate": 8.512464226513387e-06, + "loss": 89.7367, + "step": 181320 + }, + { + "epoch": 0.7325961449112586, + "grad_norm": 1128.3245849609375, + "learning_rate": 8.510178401054825e-06, + "loss": 66.4996, + "step": 181330 + }, + { + "epoch": 0.7326365461766262, + "grad_norm": 759.0236206054688, + "learning_rate": 8.507892799596251e-06, + "loss": 52.091, + "step": 181340 + }, + { + "epoch": 0.7326769474419939, + "grad_norm": 569.92626953125, + "learning_rate": 8.505607422182225e-06, + "loss": 73.0507, + "step": 181350 + }, + { + "epoch": 0.7327173487073615, + "grad_norm": 815.8599243164062, + "learning_rate": 8.503322268857306e-06, + "loss": 67.9318, + "step": 181360 + }, + { + "epoch": 0.7327577499727291, + "grad_norm": 714.9804077148438, + "learning_rate": 8.501037339666022e-06, + "loss": 66.5841, + "step": 181370 + }, + { + "epoch": 0.7327981512380968, + "grad_norm": 740.033935546875, + "learning_rate": 8.498752634652954e-06, + "loss": 89.5774, + "step": 181380 + }, + { + "epoch": 0.7328385525034644, + "grad_norm": 530.9480590820312, + "learning_rate": 8.496468153862613e-06, + "loss": 54.6891, + "step": 181390 + }, + { + "epoch": 0.732878953768832, + "grad_norm": 418.4931945800781, + "learning_rate": 8.49418389733955e-06, + "loss": 87.8691, + "step": 181400 + }, + { + "epoch": 0.7329193550341997, + "grad_norm": 870.8177490234375, + "learning_rate": 8.491899865128302e-06, + "loss": 65.5883, + "step": 181410 + }, + { + "epoch": 0.7329597562995673, + "grad_norm": 564.4786987304688, + "learning_rate": 8.489616057273377e-06, + "loss": 62.6517, + "step": 181420 + }, + { + "epoch": 0.733000157564935, + "grad_norm": 533.9349975585938, + "learning_rate": 8.487332473819328e-06, + "loss": 62.8028, + "step": 181430 + }, + { + "epoch": 0.7330405588303026, + "grad_norm": 496.3848571777344, + "learning_rate": 8.48504911481065e-06, + "loss": 83.0822, + "step": 181440 + }, + { + "epoch": 0.7330809600956701, + "grad_norm": 1020.8622436523438, + "learning_rate": 8.482765980291869e-06, + "loss": 47.3372, + "step": 181450 + }, + { + "epoch": 0.7331213613610378, + "grad_norm": 357.13092041015625, + "learning_rate": 8.480483070307495e-06, + "loss": 62.6803, + "step": 181460 + }, + { + "epoch": 0.7331617626264054, + "grad_norm": 662.653564453125, + "learning_rate": 8.478200384902033e-06, + "loss": 47.3639, + "step": 181470 + }, + { + "epoch": 0.7332021638917731, + "grad_norm": 505.0613098144531, + "learning_rate": 8.475917924119996e-06, + "loss": 63.226, + "step": 181480 + }, + { + "epoch": 0.7332425651571407, + "grad_norm": 866.1542358398438, + "learning_rate": 8.473635688005863e-06, + "loss": 68.3971, + "step": 181490 + }, + { + "epoch": 0.7332829664225083, + "grad_norm": 1072.444091796875, + "learning_rate": 8.471353676604138e-06, + "loss": 73.8611, + "step": 181500 + }, + { + "epoch": 0.733323367687876, + "grad_norm": 350.84375, + "learning_rate": 8.469071889959305e-06, + "loss": 60.4794, + "step": 181510 + }, + { + "epoch": 0.7333637689532436, + "grad_norm": 122.44292449951172, + "learning_rate": 8.466790328115864e-06, + "loss": 57.0609, + "step": 181520 + }, + { + "epoch": 0.7334041702186113, + "grad_norm": 722.8484497070312, + "learning_rate": 8.464508991118265e-06, + "loss": 62.0047, + "step": 181530 + }, + { + "epoch": 0.7334445714839789, + "grad_norm": 716.9519653320312, + "learning_rate": 8.462227879011018e-06, + "loss": 101.6049, + "step": 181540 + }, + { + "epoch": 0.7334849727493465, + "grad_norm": 346.8747863769531, + "learning_rate": 8.459946991838572e-06, + "loss": 64.399, + "step": 181550 + }, + { + "epoch": 0.7335253740147142, + "grad_norm": 412.2267761230469, + "learning_rate": 8.4576663296454e-06, + "loss": 63.5247, + "step": 181560 + }, + { + "epoch": 0.7335657752800818, + "grad_norm": 551.333740234375, + "learning_rate": 8.455385892475964e-06, + "loss": 36.9888, + "step": 181570 + }, + { + "epoch": 0.7336061765454494, + "grad_norm": 699.9310913085938, + "learning_rate": 8.453105680374725e-06, + "loss": 82.4018, + "step": 181580 + }, + { + "epoch": 0.733646577810817, + "grad_norm": 936.860595703125, + "learning_rate": 8.450825693386142e-06, + "loss": 90.6526, + "step": 181590 + }, + { + "epoch": 0.7336869790761846, + "grad_norm": 934.2276611328125, + "learning_rate": 8.448545931554652e-06, + "loss": 101.8589, + "step": 181600 + }, + { + "epoch": 0.7337273803415523, + "grad_norm": 815.6109008789062, + "learning_rate": 8.446266394924705e-06, + "loss": 46.1619, + "step": 181610 + }, + { + "epoch": 0.7337677816069199, + "grad_norm": 361.1557312011719, + "learning_rate": 8.443987083540744e-06, + "loss": 66.5194, + "step": 181620 + }, + { + "epoch": 0.7338081828722876, + "grad_norm": 1017.4846801757812, + "learning_rate": 8.441707997447203e-06, + "loss": 74.0219, + "step": 181630 + }, + { + "epoch": 0.7338485841376552, + "grad_norm": 1515.513916015625, + "learning_rate": 8.439429136688516e-06, + "loss": 94.9583, + "step": 181640 + }, + { + "epoch": 0.7338889854030228, + "grad_norm": 490.3881530761719, + "learning_rate": 8.437150501309117e-06, + "loss": 63.7691, + "step": 181650 + }, + { + "epoch": 0.7339293866683905, + "grad_norm": 618.5309448242188, + "learning_rate": 8.434872091353414e-06, + "loss": 48.4508, + "step": 181660 + }, + { + "epoch": 0.7339697879337581, + "grad_norm": 327.8459167480469, + "learning_rate": 8.432593906865834e-06, + "loss": 52.0368, + "step": 181670 + }, + { + "epoch": 0.7340101891991258, + "grad_norm": 944.6820678710938, + "learning_rate": 8.430315947890799e-06, + "loss": 60.4806, + "step": 181680 + }, + { + "epoch": 0.7340505904644934, + "grad_norm": 490.24810791015625, + "learning_rate": 8.428038214472696e-06, + "loss": 79.1826, + "step": 181690 + }, + { + "epoch": 0.734090991729861, + "grad_norm": 450.2830810546875, + "learning_rate": 8.42576070665596e-06, + "loss": 70.3326, + "step": 181700 + }, + { + "epoch": 0.7341313929952286, + "grad_norm": 593.322998046875, + "learning_rate": 8.423483424484964e-06, + "loss": 52.2624, + "step": 181710 + }, + { + "epoch": 0.7341717942605962, + "grad_norm": 285.5361022949219, + "learning_rate": 8.421206368004131e-06, + "loss": 48.371, + "step": 181720 + }, + { + "epoch": 0.7342121955259638, + "grad_norm": 660.9585571289062, + "learning_rate": 8.418929537257836e-06, + "loss": 40.8531, + "step": 181730 + }, + { + "epoch": 0.7342525967913315, + "grad_norm": 784.2150268554688, + "learning_rate": 8.416652932290471e-06, + "loss": 87.4896, + "step": 181740 + }, + { + "epoch": 0.7342929980566991, + "grad_norm": 631.8790283203125, + "learning_rate": 8.414376553146428e-06, + "loss": 86.8905, + "step": 181750 + }, + { + "epoch": 0.7343333993220668, + "grad_norm": 428.8209228515625, + "learning_rate": 8.412100399870074e-06, + "loss": 39.2045, + "step": 181760 + }, + { + "epoch": 0.7343738005874344, + "grad_norm": 721.2139282226562, + "learning_rate": 8.409824472505786e-06, + "loss": 70.2129, + "step": 181770 + }, + { + "epoch": 0.734414201852802, + "grad_norm": 581.1078491210938, + "learning_rate": 8.407548771097938e-06, + "loss": 56.9139, + "step": 181780 + }, + { + "epoch": 0.7344546031181697, + "grad_norm": 269.2318115234375, + "learning_rate": 8.405273295690893e-06, + "loss": 59.2758, + "step": 181790 + }, + { + "epoch": 0.7344950043835373, + "grad_norm": 414.8443603515625, + "learning_rate": 8.402998046329016e-06, + "loss": 66.196, + "step": 181800 + }, + { + "epoch": 0.734535405648905, + "grad_norm": 535.7174682617188, + "learning_rate": 8.400723023056669e-06, + "loss": 61.3374, + "step": 181810 + }, + { + "epoch": 0.7345758069142726, + "grad_norm": 698.0447998046875, + "learning_rate": 8.398448225918192e-06, + "loss": 77.6528, + "step": 181820 + }, + { + "epoch": 0.7346162081796402, + "grad_norm": 969.572509765625, + "learning_rate": 8.39617365495794e-06, + "loss": 51.3609, + "step": 181830 + }, + { + "epoch": 0.7346566094450078, + "grad_norm": 870.1377563476562, + "learning_rate": 8.393899310220255e-06, + "loss": 102.2289, + "step": 181840 + }, + { + "epoch": 0.7346970107103754, + "grad_norm": 674.4017944335938, + "learning_rate": 8.39162519174948e-06, + "loss": 73.1914, + "step": 181850 + }, + { + "epoch": 0.734737411975743, + "grad_norm": 296.48870849609375, + "learning_rate": 8.389351299589954e-06, + "loss": 60.2958, + "step": 181860 + }, + { + "epoch": 0.7347778132411107, + "grad_norm": 843.11767578125, + "learning_rate": 8.38707763378599e-06, + "loss": 80.881, + "step": 181870 + }, + { + "epoch": 0.7348182145064783, + "grad_norm": 738.2548828125, + "learning_rate": 8.384804194381938e-06, + "loss": 82.8814, + "step": 181880 + }, + { + "epoch": 0.734858615771846, + "grad_norm": 1068.1209716796875, + "learning_rate": 8.3825309814221e-06, + "loss": 62.4366, + "step": 181890 + }, + { + "epoch": 0.7348990170372136, + "grad_norm": 754.33544921875, + "learning_rate": 8.380257994950805e-06, + "loss": 68.9016, + "step": 181900 + }, + { + "epoch": 0.7349394183025812, + "grad_norm": 811.265869140625, + "learning_rate": 8.37798523501236e-06, + "loss": 68.9933, + "step": 181910 + }, + { + "epoch": 0.7349798195679489, + "grad_norm": 859.0814208984375, + "learning_rate": 8.375712701651078e-06, + "loss": 83.5776, + "step": 181920 + }, + { + "epoch": 0.7350202208333165, + "grad_norm": 413.77313232421875, + "learning_rate": 8.373440394911268e-06, + "loss": 42.2367, + "step": 181930 + }, + { + "epoch": 0.7350606220986842, + "grad_norm": 842.4229736328125, + "learning_rate": 8.371168314837216e-06, + "loss": 75.5724, + "step": 181940 + }, + { + "epoch": 0.7351010233640518, + "grad_norm": 725.393798828125, + "learning_rate": 8.368896461473226e-06, + "loss": 57.8626, + "step": 181950 + }, + { + "epoch": 0.7351414246294194, + "grad_norm": 110.3294906616211, + "learning_rate": 8.366624834863585e-06, + "loss": 55.1127, + "step": 181960 + }, + { + "epoch": 0.735181825894787, + "grad_norm": 787.1930541992188, + "learning_rate": 8.36435343505259e-06, + "loss": 98.6252, + "step": 181970 + }, + { + "epoch": 0.7352222271601546, + "grad_norm": 810.0784912109375, + "learning_rate": 8.362082262084503e-06, + "loss": 69.4066, + "step": 181980 + }, + { + "epoch": 0.7352626284255223, + "grad_norm": 666.9320068359375, + "learning_rate": 8.359811316003626e-06, + "loss": 72.6057, + "step": 181990 + }, + { + "epoch": 0.7353030296908899, + "grad_norm": 537.739013671875, + "learning_rate": 8.357540596854214e-06, + "loss": 87.924, + "step": 182000 + }, + { + "epoch": 0.7353434309562575, + "grad_norm": 1134.1998291015625, + "learning_rate": 8.355270104680542e-06, + "loss": 61.8731, + "step": 182010 + }, + { + "epoch": 0.7353838322216252, + "grad_norm": 267.2256164550781, + "learning_rate": 8.35299983952688e-06, + "loss": 64.5985, + "step": 182020 + }, + { + "epoch": 0.7354242334869928, + "grad_norm": 729.6862182617188, + "learning_rate": 8.35072980143747e-06, + "loss": 71.2801, + "step": 182030 + }, + { + "epoch": 0.7354646347523605, + "grad_norm": 370.4252624511719, + "learning_rate": 8.348459990456594e-06, + "loss": 63.0893, + "step": 182040 + }, + { + "epoch": 0.7355050360177281, + "grad_norm": 917.0546875, + "learning_rate": 8.346190406628482e-06, + "loss": 57.6922, + "step": 182050 + }, + { + "epoch": 0.7355454372830957, + "grad_norm": 1187.153076171875, + "learning_rate": 8.343921049997388e-06, + "loss": 76.9959, + "step": 182060 + }, + { + "epoch": 0.7355858385484634, + "grad_norm": 739.89013671875, + "learning_rate": 8.341651920607552e-06, + "loss": 43.6943, + "step": 182070 + }, + { + "epoch": 0.735626239813831, + "grad_norm": 371.128173828125, + "learning_rate": 8.339383018503215e-06, + "loss": 60.6426, + "step": 182080 + }, + { + "epoch": 0.7356666410791985, + "grad_norm": 506.60089111328125, + "learning_rate": 8.337114343728618e-06, + "loss": 104.8425, + "step": 182090 + }, + { + "epoch": 0.7357070423445662, + "grad_norm": 557.1185913085938, + "learning_rate": 8.334845896327973e-06, + "loss": 65.6524, + "step": 182100 + }, + { + "epoch": 0.7357474436099338, + "grad_norm": 1006.3411865234375, + "learning_rate": 8.332577676345513e-06, + "loss": 76.2857, + "step": 182110 + }, + { + "epoch": 0.7357878448753015, + "grad_norm": 745.0953979492188, + "learning_rate": 8.330309683825457e-06, + "loss": 56.3611, + "step": 182120 + }, + { + "epoch": 0.7358282461406691, + "grad_norm": 603.4099731445312, + "learning_rate": 8.328041918812031e-06, + "loss": 64.471, + "step": 182130 + }, + { + "epoch": 0.7358686474060367, + "grad_norm": 1398.447265625, + "learning_rate": 8.325774381349422e-06, + "loss": 98.6797, + "step": 182140 + }, + { + "epoch": 0.7359090486714044, + "grad_norm": 690.031005859375, + "learning_rate": 8.323507071481864e-06, + "loss": 71.4838, + "step": 182150 + }, + { + "epoch": 0.735949449936772, + "grad_norm": 608.216796875, + "learning_rate": 8.321239989253543e-06, + "loss": 65.9738, + "step": 182160 + }, + { + "epoch": 0.7359898512021397, + "grad_norm": 678.5233764648438, + "learning_rate": 8.31897313470866e-06, + "loss": 71.6633, + "step": 182170 + }, + { + "epoch": 0.7360302524675073, + "grad_norm": 496.49285888671875, + "learning_rate": 8.316706507891408e-06, + "loss": 55.7583, + "step": 182180 + }, + { + "epoch": 0.736070653732875, + "grad_norm": 430.7520446777344, + "learning_rate": 8.314440108845977e-06, + "loss": 108.5646, + "step": 182190 + }, + { + "epoch": 0.7361110549982426, + "grad_norm": 602.2924194335938, + "learning_rate": 8.31217393761656e-06, + "loss": 49.9504, + "step": 182200 + }, + { + "epoch": 0.7361514562636102, + "grad_norm": 813.87353515625, + "learning_rate": 8.309907994247327e-06, + "loss": 44.4579, + "step": 182210 + }, + { + "epoch": 0.7361918575289778, + "grad_norm": 506.7007751464844, + "learning_rate": 8.307642278782451e-06, + "loss": 97.3961, + "step": 182220 + }, + { + "epoch": 0.7362322587943454, + "grad_norm": 1042.027099609375, + "learning_rate": 8.305376791266108e-06, + "loss": 78.5941, + "step": 182230 + }, + { + "epoch": 0.736272660059713, + "grad_norm": 854.4111328125, + "learning_rate": 8.30311153174247e-06, + "loss": 80.6895, + "step": 182240 + }, + { + "epoch": 0.7363130613250807, + "grad_norm": 770.261474609375, + "learning_rate": 8.300846500255691e-06, + "loss": 78.4169, + "step": 182250 + }, + { + "epoch": 0.7363534625904483, + "grad_norm": 673.6525268554688, + "learning_rate": 8.298581696849938e-06, + "loss": 61.2881, + "step": 182260 + }, + { + "epoch": 0.736393863855816, + "grad_norm": 1098.9906005859375, + "learning_rate": 8.296317121569355e-06, + "loss": 62.727, + "step": 182270 + }, + { + "epoch": 0.7364342651211836, + "grad_norm": 916.7950439453125, + "learning_rate": 8.294052774458095e-06, + "loss": 64.1543, + "step": 182280 + }, + { + "epoch": 0.7364746663865512, + "grad_norm": 534.0033569335938, + "learning_rate": 8.291788655560309e-06, + "loss": 82.3779, + "step": 182290 + }, + { + "epoch": 0.7365150676519189, + "grad_norm": 877.9806518554688, + "learning_rate": 8.289524764920118e-06, + "loss": 61.4271, + "step": 182300 + }, + { + "epoch": 0.7365554689172865, + "grad_norm": 704.4225463867188, + "learning_rate": 8.287261102581683e-06, + "loss": 74.1503, + "step": 182310 + }, + { + "epoch": 0.7365958701826542, + "grad_norm": 702.4299926757812, + "learning_rate": 8.28499766858911e-06, + "loss": 68.0738, + "step": 182320 + }, + { + "epoch": 0.7366362714480218, + "grad_norm": 527.9893798828125, + "learning_rate": 8.282734462986553e-06, + "loss": 49.4171, + "step": 182330 + }, + { + "epoch": 0.7366766727133894, + "grad_norm": 664.3424682617188, + "learning_rate": 8.280471485818114e-06, + "loss": 60.8719, + "step": 182340 + }, + { + "epoch": 0.736717073978757, + "grad_norm": 195.6165008544922, + "learning_rate": 8.278208737127915e-06, + "loss": 68.4137, + "step": 182350 + }, + { + "epoch": 0.7367574752441246, + "grad_norm": 555.5744018554688, + "learning_rate": 8.275946216960079e-06, + "loss": 73.8717, + "step": 182360 + }, + { + "epoch": 0.7367978765094922, + "grad_norm": 468.318603515625, + "learning_rate": 8.273683925358699e-06, + "loss": 64.318, + "step": 182370 + }, + { + "epoch": 0.7368382777748599, + "grad_norm": 1034.2327880859375, + "learning_rate": 8.27142186236789e-06, + "loss": 58.5483, + "step": 182380 + }, + { + "epoch": 0.7368786790402275, + "grad_norm": 1152.9095458984375, + "learning_rate": 8.26916002803175e-06, + "loss": 69.2517, + "step": 182390 + }, + { + "epoch": 0.7369190803055952, + "grad_norm": 554.6244506835938, + "learning_rate": 8.266898422394377e-06, + "loss": 56.5054, + "step": 182400 + }, + { + "epoch": 0.7369594815709628, + "grad_norm": 643.0509033203125, + "learning_rate": 8.264637045499855e-06, + "loss": 64.5477, + "step": 182410 + }, + { + "epoch": 0.7369998828363304, + "grad_norm": 560.6483154296875, + "learning_rate": 8.262375897392286e-06, + "loss": 75.8512, + "step": 182420 + }, + { + "epoch": 0.7370402841016981, + "grad_norm": 904.4130249023438, + "learning_rate": 8.260114978115732e-06, + "loss": 61.5103, + "step": 182430 + }, + { + "epoch": 0.7370806853670657, + "grad_norm": 820.02978515625, + "learning_rate": 8.257854287714282e-06, + "loss": 68.2216, + "step": 182440 + }, + { + "epoch": 0.7371210866324334, + "grad_norm": 829.57861328125, + "learning_rate": 8.255593826232009e-06, + "loss": 68.5044, + "step": 182450 + }, + { + "epoch": 0.737161487897801, + "grad_norm": 983.1105346679688, + "learning_rate": 8.25333359371298e-06, + "loss": 80.7243, + "step": 182460 + }, + { + "epoch": 0.7372018891631686, + "grad_norm": 423.48309326171875, + "learning_rate": 8.251073590201266e-06, + "loss": 50.2384, + "step": 182470 + }, + { + "epoch": 0.7372422904285362, + "grad_norm": 642.2510986328125, + "learning_rate": 8.248813815740908e-06, + "loss": 58.1556, + "step": 182480 + }, + { + "epoch": 0.7372826916939038, + "grad_norm": 671.1317138671875, + "learning_rate": 8.24655427037599e-06, + "loss": 55.3348, + "step": 182490 + }, + { + "epoch": 0.7373230929592715, + "grad_norm": 608.0697631835938, + "learning_rate": 8.24429495415054e-06, + "loss": 82.9104, + "step": 182500 + }, + { + "epoch": 0.7373634942246391, + "grad_norm": 585.0857543945312, + "learning_rate": 8.242035867108611e-06, + "loss": 43.2551, + "step": 182510 + }, + { + "epoch": 0.7374038954900067, + "grad_norm": 344.54278564453125, + "learning_rate": 8.239777009294248e-06, + "loss": 47.3913, + "step": 182520 + }, + { + "epoch": 0.7374442967553744, + "grad_norm": 1010.5748291015625, + "learning_rate": 8.237518380751484e-06, + "loss": 80.6281, + "step": 182530 + }, + { + "epoch": 0.737484698020742, + "grad_norm": 363.15460205078125, + "learning_rate": 8.235259981524364e-06, + "loss": 57.5373, + "step": 182540 + }, + { + "epoch": 0.7375250992861097, + "grad_norm": 233.0504608154297, + "learning_rate": 8.233001811656899e-06, + "loss": 85.1943, + "step": 182550 + }, + { + "epoch": 0.7375655005514773, + "grad_norm": 217.8220672607422, + "learning_rate": 8.230743871193124e-06, + "loss": 64.1415, + "step": 182560 + }, + { + "epoch": 0.7376059018168449, + "grad_norm": 800.3905029296875, + "learning_rate": 8.228486160177053e-06, + "loss": 52.1225, + "step": 182570 + }, + { + "epoch": 0.7376463030822126, + "grad_norm": 979.4124755859375, + "learning_rate": 8.226228678652715e-06, + "loss": 57.3725, + "step": 182580 + }, + { + "epoch": 0.7376867043475802, + "grad_norm": 777.706787109375, + "learning_rate": 8.223971426664094e-06, + "loss": 77.2386, + "step": 182590 + }, + { + "epoch": 0.7377271056129477, + "grad_norm": 664.9208374023438, + "learning_rate": 8.22171440425523e-06, + "loss": 101.2429, + "step": 182600 + }, + { + "epoch": 0.7377675068783154, + "grad_norm": 612.14013671875, + "learning_rate": 8.219457611470099e-06, + "loss": 82.5295, + "step": 182610 + }, + { + "epoch": 0.737807908143683, + "grad_norm": 570.2803344726562, + "learning_rate": 8.217201048352706e-06, + "loss": 53.0626, + "step": 182620 + }, + { + "epoch": 0.7378483094090507, + "grad_norm": 423.3118896484375, + "learning_rate": 8.21494471494705e-06, + "loss": 57.5231, + "step": 182630 + }, + { + "epoch": 0.7378887106744183, + "grad_norm": 617.469482421875, + "learning_rate": 8.212688611297104e-06, + "loss": 40.2864, + "step": 182640 + }, + { + "epoch": 0.7379291119397859, + "grad_norm": 1240.8238525390625, + "learning_rate": 8.210432737446873e-06, + "loss": 81.3918, + "step": 182650 + }, + { + "epoch": 0.7379695132051536, + "grad_norm": 604.07568359375, + "learning_rate": 8.208177093440322e-06, + "loss": 80.0339, + "step": 182660 + }, + { + "epoch": 0.7380099144705212, + "grad_norm": 480.34771728515625, + "learning_rate": 8.205921679321426e-06, + "loss": 76.2475, + "step": 182670 + }, + { + "epoch": 0.7380503157358889, + "grad_norm": 437.8207702636719, + "learning_rate": 8.203666495134159e-06, + "loss": 115.2487, + "step": 182680 + }, + { + "epoch": 0.7380907170012565, + "grad_norm": 668.2044067382812, + "learning_rate": 8.20141154092249e-06, + "loss": 89.7146, + "step": 182690 + }, + { + "epoch": 0.7381311182666241, + "grad_norm": 725.5908203125, + "learning_rate": 8.199156816730383e-06, + "loss": 61.1083, + "step": 182700 + }, + { + "epoch": 0.7381715195319918, + "grad_norm": 712.6251831054688, + "learning_rate": 8.196902322601781e-06, + "loss": 76.4101, + "step": 182710 + }, + { + "epoch": 0.7382119207973594, + "grad_norm": 868.2650146484375, + "learning_rate": 8.194648058580645e-06, + "loss": 94.8841, + "step": 182720 + }, + { + "epoch": 0.738252322062727, + "grad_norm": 800.4769897460938, + "learning_rate": 8.192394024710924e-06, + "loss": 50.6717, + "step": 182730 + }, + { + "epoch": 0.7382927233280946, + "grad_norm": 344.6494140625, + "learning_rate": 8.190140221036567e-06, + "loss": 84.464, + "step": 182740 + }, + { + "epoch": 0.7383331245934622, + "grad_norm": 691.3666381835938, + "learning_rate": 8.187886647601491e-06, + "loss": 48.2676, + "step": 182750 + }, + { + "epoch": 0.7383735258588299, + "grad_norm": 1069.7152099609375, + "learning_rate": 8.185633304449662e-06, + "loss": 70.6404, + "step": 182760 + }, + { + "epoch": 0.7384139271241975, + "grad_norm": 725.9649658203125, + "learning_rate": 8.183380191624986e-06, + "loss": 77.2677, + "step": 182770 + }, + { + "epoch": 0.7384543283895652, + "grad_norm": 748.8724975585938, + "learning_rate": 8.181127309171397e-06, + "loss": 56.7075, + "step": 182780 + }, + { + "epoch": 0.7384947296549328, + "grad_norm": 377.5096130371094, + "learning_rate": 8.178874657132825e-06, + "loss": 55.8461, + "step": 182790 + }, + { + "epoch": 0.7385351309203004, + "grad_norm": 808.2539672851562, + "learning_rate": 8.17662223555316e-06, + "loss": 67.7727, + "step": 182800 + }, + { + "epoch": 0.7385755321856681, + "grad_norm": 512.5366821289062, + "learning_rate": 8.174370044476347e-06, + "loss": 55.0187, + "step": 182810 + }, + { + "epoch": 0.7386159334510357, + "grad_norm": 295.6439514160156, + "learning_rate": 8.172118083946273e-06, + "loss": 72.1878, + "step": 182820 + }, + { + "epoch": 0.7386563347164034, + "grad_norm": 756.8067626953125, + "learning_rate": 8.169866354006846e-06, + "loss": 68.824, + "step": 182830 + }, + { + "epoch": 0.738696735981771, + "grad_norm": 314.791015625, + "learning_rate": 8.167614854701961e-06, + "loss": 62.1032, + "step": 182840 + }, + { + "epoch": 0.7387371372471386, + "grad_norm": 3421.14892578125, + "learning_rate": 8.16536358607552e-06, + "loss": 88.1209, + "step": 182850 + }, + { + "epoch": 0.7387775385125062, + "grad_norm": 587.5921630859375, + "learning_rate": 8.163112548171408e-06, + "loss": 82.4934, + "step": 182860 + }, + { + "epoch": 0.7388179397778738, + "grad_norm": 464.4368591308594, + "learning_rate": 8.16086174103352e-06, + "loss": 68.0541, + "step": 182870 + }, + { + "epoch": 0.7388583410432414, + "grad_norm": 665.91650390625, + "learning_rate": 8.158611164705716e-06, + "loss": 60.7443, + "step": 182880 + }, + { + "epoch": 0.7388987423086091, + "grad_norm": 1048.3070068359375, + "learning_rate": 8.156360819231887e-06, + "loss": 70.0202, + "step": 182890 + }, + { + "epoch": 0.7389391435739767, + "grad_norm": 489.06964111328125, + "learning_rate": 8.154110704655907e-06, + "loss": 52.6848, + "step": 182900 + }, + { + "epoch": 0.7389795448393444, + "grad_norm": 504.38421630859375, + "learning_rate": 8.151860821021624e-06, + "loss": 73.7209, + "step": 182910 + }, + { + "epoch": 0.739019946104712, + "grad_norm": 973.541015625, + "learning_rate": 8.149611168372929e-06, + "loss": 78.43, + "step": 182920 + }, + { + "epoch": 0.7390603473700796, + "grad_norm": 928.4710083007812, + "learning_rate": 8.147361746753648e-06, + "loss": 70.4271, + "step": 182930 + }, + { + "epoch": 0.7391007486354473, + "grad_norm": 542.09326171875, + "learning_rate": 8.145112556207668e-06, + "loss": 76.2647, + "step": 182940 + }, + { + "epoch": 0.7391411499008149, + "grad_norm": 772.9671630859375, + "learning_rate": 8.142863596778816e-06, + "loss": 83.0659, + "step": 182950 + }, + { + "epoch": 0.7391815511661826, + "grad_norm": 517.1825561523438, + "learning_rate": 8.140614868510941e-06, + "loss": 91.9754, + "step": 182960 + }, + { + "epoch": 0.7392219524315502, + "grad_norm": 1285.120849609375, + "learning_rate": 8.138366371447895e-06, + "loss": 83.0125, + "step": 182970 + }, + { + "epoch": 0.7392623536969178, + "grad_norm": 941.5364990234375, + "learning_rate": 8.136118105633486e-06, + "loss": 52.821, + "step": 182980 + }, + { + "epoch": 0.7393027549622854, + "grad_norm": 774.510009765625, + "learning_rate": 8.133870071111575e-06, + "loss": 88.4824, + "step": 182990 + }, + { + "epoch": 0.739343156227653, + "grad_norm": 724.082763671875, + "learning_rate": 8.131622267925974e-06, + "loss": 72.9662, + "step": 183000 + }, + { + "epoch": 0.7393835574930206, + "grad_norm": 882.099853515625, + "learning_rate": 8.129374696120505e-06, + "loss": 101.4292, + "step": 183010 + }, + { + "epoch": 0.7394239587583883, + "grad_norm": 405.85797119140625, + "learning_rate": 8.127127355738987e-06, + "loss": 54.8219, + "step": 183020 + }, + { + "epoch": 0.7394643600237559, + "grad_norm": 584.7989501953125, + "learning_rate": 8.12488024682524e-06, + "loss": 84.5583, + "step": 183030 + }, + { + "epoch": 0.7395047612891236, + "grad_norm": 566.4080810546875, + "learning_rate": 8.12263336942306e-06, + "loss": 59.634, + "step": 183040 + }, + { + "epoch": 0.7395451625544912, + "grad_norm": 562.1102294921875, + "learning_rate": 8.12038672357626e-06, + "loss": 59.9797, + "step": 183050 + }, + { + "epoch": 0.7395855638198588, + "grad_norm": 819.831787109375, + "learning_rate": 8.118140309328633e-06, + "loss": 52.499, + "step": 183060 + }, + { + "epoch": 0.7396259650852265, + "grad_norm": 215.61453247070312, + "learning_rate": 8.115894126723979e-06, + "loss": 91.2271, + "step": 183070 + }, + { + "epoch": 0.7396663663505941, + "grad_norm": 709.7811889648438, + "learning_rate": 8.113648175806095e-06, + "loss": 58.4668, + "step": 183080 + }, + { + "epoch": 0.7397067676159618, + "grad_norm": 907.1926879882812, + "learning_rate": 8.111402456618744e-06, + "loss": 75.854, + "step": 183090 + }, + { + "epoch": 0.7397471688813294, + "grad_norm": 248.35986328125, + "learning_rate": 8.10915696920574e-06, + "loss": 49.5379, + "step": 183100 + }, + { + "epoch": 0.739787570146697, + "grad_norm": 536.0726928710938, + "learning_rate": 8.106911713610832e-06, + "loss": 50.9282, + "step": 183110 + }, + { + "epoch": 0.7398279714120646, + "grad_norm": 727.8610229492188, + "learning_rate": 8.104666689877804e-06, + "loss": 78.89, + "step": 183120 + }, + { + "epoch": 0.7398683726774322, + "grad_norm": 361.3336486816406, + "learning_rate": 8.102421898050432e-06, + "loss": 64.4138, + "step": 183130 + }, + { + "epoch": 0.7399087739427999, + "grad_norm": 1745.10693359375, + "learning_rate": 8.100177338172454e-06, + "loss": 110.2648, + "step": 183140 + }, + { + "epoch": 0.7399491752081675, + "grad_norm": 370.4470520019531, + "learning_rate": 8.097933010287662e-06, + "loss": 52.5206, + "step": 183150 + }, + { + "epoch": 0.7399895764735351, + "grad_norm": 579.6969604492188, + "learning_rate": 8.095688914439785e-06, + "loss": 104.2364, + "step": 183160 + }, + { + "epoch": 0.7400299777389028, + "grad_norm": 580.0109252929688, + "learning_rate": 8.093445050672581e-06, + "loss": 69.3589, + "step": 183170 + }, + { + "epoch": 0.7400703790042704, + "grad_norm": 575.5120849609375, + "learning_rate": 8.091201419029795e-06, + "loss": 86.6536, + "step": 183180 + }, + { + "epoch": 0.7401107802696381, + "grad_norm": 625.2686157226562, + "learning_rate": 8.088958019555177e-06, + "loss": 56.5799, + "step": 183190 + }, + { + "epoch": 0.7401511815350057, + "grad_norm": 945.1229858398438, + "learning_rate": 8.08671485229244e-06, + "loss": 69.1045, + "step": 183200 + }, + { + "epoch": 0.7401915828003733, + "grad_norm": 536.593017578125, + "learning_rate": 8.084471917285344e-06, + "loss": 81.1258, + "step": 183210 + }, + { + "epoch": 0.740231984065741, + "grad_norm": 604.305908203125, + "learning_rate": 8.082229214577597e-06, + "loss": 70.7882, + "step": 183220 + }, + { + "epoch": 0.7402723853311086, + "grad_norm": 633.371337890625, + "learning_rate": 8.079986744212925e-06, + "loss": 62.4519, + "step": 183230 + }, + { + "epoch": 0.7403127865964761, + "grad_norm": 189.2625274658203, + "learning_rate": 8.077744506235058e-06, + "loss": 46.0964, + "step": 183240 + }, + { + "epoch": 0.7403531878618438, + "grad_norm": 461.5438232421875, + "learning_rate": 8.075502500687682e-06, + "loss": 52.4147, + "step": 183250 + }, + { + "epoch": 0.7403935891272114, + "grad_norm": 536.3472900390625, + "learning_rate": 8.07326072761454e-06, + "loss": 74.0125, + "step": 183260 + }, + { + "epoch": 0.7404339903925791, + "grad_norm": 300.7894287109375, + "learning_rate": 8.071019187059313e-06, + "loss": 79.062, + "step": 183270 + }, + { + "epoch": 0.7404743916579467, + "grad_norm": 909.899169921875, + "learning_rate": 8.06877787906571e-06, + "loss": 52.2884, + "step": 183280 + }, + { + "epoch": 0.7405147929233143, + "grad_norm": 548.2403564453125, + "learning_rate": 8.066536803677423e-06, + "loss": 54.153, + "step": 183290 + }, + { + "epoch": 0.740555194188682, + "grad_norm": 717.961669921875, + "learning_rate": 8.064295960938145e-06, + "loss": 90.0561, + "step": 183300 + }, + { + "epoch": 0.7405955954540496, + "grad_norm": 599.14697265625, + "learning_rate": 8.06205535089157e-06, + "loss": 98.23, + "step": 183310 + }, + { + "epoch": 0.7406359967194173, + "grad_norm": 730.24365234375, + "learning_rate": 8.059814973581364e-06, + "loss": 56.7512, + "step": 183320 + }, + { + "epoch": 0.7406763979847849, + "grad_norm": 612.5025024414062, + "learning_rate": 8.057574829051213e-06, + "loss": 95.6204, + "step": 183330 + }, + { + "epoch": 0.7407167992501525, + "grad_norm": 911.7921142578125, + "learning_rate": 8.055334917344788e-06, + "loss": 78.0033, + "step": 183340 + }, + { + "epoch": 0.7407572005155202, + "grad_norm": 537.8583984375, + "learning_rate": 8.053095238505766e-06, + "loss": 42.3161, + "step": 183350 + }, + { + "epoch": 0.7407976017808878, + "grad_norm": 338.4739685058594, + "learning_rate": 8.05085579257779e-06, + "loss": 70.8385, + "step": 183360 + }, + { + "epoch": 0.7408380030462554, + "grad_norm": 1453.6307373046875, + "learning_rate": 8.048616579604544e-06, + "loss": 114.7424, + "step": 183370 + }, + { + "epoch": 0.740878404311623, + "grad_norm": 531.478759765625, + "learning_rate": 8.046377599629667e-06, + "loss": 70.9794, + "step": 183380 + }, + { + "epoch": 0.7409188055769906, + "grad_norm": 393.3597717285156, + "learning_rate": 8.04413885269681e-06, + "loss": 45.89, + "step": 183390 + }, + { + "epoch": 0.7409592068423583, + "grad_norm": 882.0484619140625, + "learning_rate": 8.04190033884963e-06, + "loss": 53.0944, + "step": 183400 + }, + { + "epoch": 0.7409996081077259, + "grad_norm": 628.6268920898438, + "learning_rate": 8.039662058131745e-06, + "loss": 89.358, + "step": 183410 + }, + { + "epoch": 0.7410400093730936, + "grad_norm": 439.8164978027344, + "learning_rate": 8.037424010586818e-06, + "loss": 56.9314, + "step": 183420 + }, + { + "epoch": 0.7410804106384612, + "grad_norm": 661.421875, + "learning_rate": 8.035186196258458e-06, + "loss": 70.4223, + "step": 183430 + }, + { + "epoch": 0.7411208119038288, + "grad_norm": 409.26593017578125, + "learning_rate": 8.032948615190314e-06, + "loss": 61.7101, + "step": 183440 + }, + { + "epoch": 0.7411612131691965, + "grad_norm": 1009.5635375976562, + "learning_rate": 8.030711267425992e-06, + "loss": 70.6459, + "step": 183450 + }, + { + "epoch": 0.7412016144345641, + "grad_norm": 545.3409423828125, + "learning_rate": 8.028474153009116e-06, + "loss": 92.8223, + "step": 183460 + }, + { + "epoch": 0.7412420156999318, + "grad_norm": 321.1078186035156, + "learning_rate": 8.026237271983307e-06, + "loss": 97.9381, + "step": 183470 + }, + { + "epoch": 0.7412824169652994, + "grad_norm": 1390.2987060546875, + "learning_rate": 8.024000624392158e-06, + "loss": 66.3337, + "step": 183480 + }, + { + "epoch": 0.741322818230667, + "grad_norm": 719.2715454101562, + "learning_rate": 8.021764210279287e-06, + "loss": 97.6379, + "step": 183490 + }, + { + "epoch": 0.7413632194960346, + "grad_norm": 1233.0706787109375, + "learning_rate": 8.019528029688286e-06, + "loss": 71.0488, + "step": 183500 + }, + { + "epoch": 0.7414036207614022, + "grad_norm": 1695.03515625, + "learning_rate": 8.017292082662753e-06, + "loss": 82.1348, + "step": 183510 + }, + { + "epoch": 0.7414440220267698, + "grad_norm": 529.8947143554688, + "learning_rate": 8.015056369246282e-06, + "loss": 84.8331, + "step": 183520 + }, + { + "epoch": 0.7414844232921375, + "grad_norm": 614.3245849609375, + "learning_rate": 8.012820889482464e-06, + "loss": 71.756, + "step": 183530 + }, + { + "epoch": 0.7415248245575051, + "grad_norm": 562.1209716796875, + "learning_rate": 8.010585643414861e-06, + "loss": 58.7432, + "step": 183540 + }, + { + "epoch": 0.7415652258228728, + "grad_norm": 324.7161865234375, + "learning_rate": 8.008350631087075e-06, + "loss": 59.3577, + "step": 183550 + }, + { + "epoch": 0.7416056270882404, + "grad_norm": 427.8248291015625, + "learning_rate": 8.006115852542662e-06, + "loss": 67.169, + "step": 183560 + }, + { + "epoch": 0.741646028353608, + "grad_norm": 427.1392822265625, + "learning_rate": 8.003881307825196e-06, + "loss": 56.6587, + "step": 183570 + }, + { + "epoch": 0.7416864296189757, + "grad_norm": 458.95013427734375, + "learning_rate": 8.001646996978246e-06, + "loss": 107.1103, + "step": 183580 + }, + { + "epoch": 0.7417268308843433, + "grad_norm": 682.3165283203125, + "learning_rate": 7.99941292004535e-06, + "loss": 88.9655, + "step": 183590 + }, + { + "epoch": 0.741767232149711, + "grad_norm": 617.7960815429688, + "learning_rate": 7.997179077070092e-06, + "loss": 64.1381, + "step": 183600 + }, + { + "epoch": 0.7418076334150786, + "grad_norm": 1087.084716796875, + "learning_rate": 7.994945468096002e-06, + "loss": 92.3241, + "step": 183610 + }, + { + "epoch": 0.7418480346804462, + "grad_norm": 523.921142578125, + "learning_rate": 7.992712093166628e-06, + "loss": 52.2599, + "step": 183620 + }, + { + "epoch": 0.7418884359458138, + "grad_norm": 1282.765380859375, + "learning_rate": 7.990478952325516e-06, + "loss": 58.1339, + "step": 183630 + }, + { + "epoch": 0.7419288372111814, + "grad_norm": 905.3845825195312, + "learning_rate": 7.988246045616206e-06, + "loss": 61.941, + "step": 183640 + }, + { + "epoch": 0.741969238476549, + "grad_norm": 513.900390625, + "learning_rate": 7.986013373082216e-06, + "loss": 51.1565, + "step": 183650 + }, + { + "epoch": 0.7420096397419167, + "grad_norm": 840.2051391601562, + "learning_rate": 7.98378093476708e-06, + "loss": 82.9454, + "step": 183660 + }, + { + "epoch": 0.7420500410072843, + "grad_norm": 1300.5845947265625, + "learning_rate": 7.981548730714322e-06, + "loss": 77.3632, + "step": 183670 + }, + { + "epoch": 0.742090442272652, + "grad_norm": 641.65478515625, + "learning_rate": 7.97931676096746e-06, + "loss": 53.0731, + "step": 183680 + }, + { + "epoch": 0.7421308435380196, + "grad_norm": 600.939453125, + "learning_rate": 7.977085025570011e-06, + "loss": 43.9203, + "step": 183690 + }, + { + "epoch": 0.7421712448033873, + "grad_norm": 524.879150390625, + "learning_rate": 7.974853524565467e-06, + "loss": 65.9293, + "step": 183700 + }, + { + "epoch": 0.7422116460687549, + "grad_norm": 554.756103515625, + "learning_rate": 7.972622257997358e-06, + "loss": 72.1009, + "step": 183710 + }, + { + "epoch": 0.7422520473341225, + "grad_norm": 796.11279296875, + "learning_rate": 7.970391225909162e-06, + "loss": 84.7408, + "step": 183720 + }, + { + "epoch": 0.7422924485994902, + "grad_norm": 378.04315185546875, + "learning_rate": 7.968160428344382e-06, + "loss": 51.3648, + "step": 183730 + }, + { + "epoch": 0.7423328498648578, + "grad_norm": 862.2138061523438, + "learning_rate": 7.965929865346518e-06, + "loss": 59.9737, + "step": 183740 + }, + { + "epoch": 0.7423732511302255, + "grad_norm": 760.834228515625, + "learning_rate": 7.963699536959032e-06, + "loss": 69.3422, + "step": 183750 + }, + { + "epoch": 0.742413652395593, + "grad_norm": 580.6527709960938, + "learning_rate": 7.961469443225436e-06, + "loss": 59.3926, + "step": 183760 + }, + { + "epoch": 0.7424540536609606, + "grad_norm": 1223.2869873046875, + "learning_rate": 7.95923958418918e-06, + "loss": 79.9713, + "step": 183770 + }, + { + "epoch": 0.7424944549263283, + "grad_norm": 744.7950439453125, + "learning_rate": 7.957009959893752e-06, + "loss": 70.5377, + "step": 183780 + }, + { + "epoch": 0.7425348561916959, + "grad_norm": 436.2528991699219, + "learning_rate": 7.954780570382612e-06, + "loss": 58.6069, + "step": 183790 + }, + { + "epoch": 0.7425752574570635, + "grad_norm": 720.4943237304688, + "learning_rate": 7.952551415699232e-06, + "loss": 74.7065, + "step": 183800 + }, + { + "epoch": 0.7426156587224312, + "grad_norm": 698.4285278320312, + "learning_rate": 7.95032249588706e-06, + "loss": 96.3963, + "step": 183810 + }, + { + "epoch": 0.7426560599877988, + "grad_norm": 517.6858520507812, + "learning_rate": 7.948093810989554e-06, + "loss": 89.1975, + "step": 183820 + }, + { + "epoch": 0.7426964612531665, + "grad_norm": 1134.5260009765625, + "learning_rate": 7.945865361050164e-06, + "loss": 85.5781, + "step": 183830 + }, + { + "epoch": 0.7427368625185341, + "grad_norm": 1554.962646484375, + "learning_rate": 7.943637146112336e-06, + "loss": 95.2075, + "step": 183840 + }, + { + "epoch": 0.7427772637839017, + "grad_norm": 507.0695495605469, + "learning_rate": 7.94140916621951e-06, + "loss": 70.3432, + "step": 183850 + }, + { + "epoch": 0.7428176650492694, + "grad_norm": 408.655029296875, + "learning_rate": 7.939181421415112e-06, + "loss": 104.9384, + "step": 183860 + }, + { + "epoch": 0.742858066314637, + "grad_norm": 828.5603637695312, + "learning_rate": 7.936953911742595e-06, + "loss": 76.7543, + "step": 183870 + }, + { + "epoch": 0.7428984675800046, + "grad_norm": 670.4786987304688, + "learning_rate": 7.934726637245365e-06, + "loss": 74.159, + "step": 183880 + }, + { + "epoch": 0.7429388688453722, + "grad_norm": 770.8397827148438, + "learning_rate": 7.932499597966849e-06, + "loss": 70.0464, + "step": 183890 + }, + { + "epoch": 0.7429792701107398, + "grad_norm": 690.974853515625, + "learning_rate": 7.93027279395047e-06, + "loss": 54.824, + "step": 183900 + }, + { + "epoch": 0.7430196713761075, + "grad_norm": 360.72039794921875, + "learning_rate": 7.928046225239632e-06, + "loss": 69.1464, + "step": 183910 + }, + { + "epoch": 0.7430600726414751, + "grad_norm": 806.6488037109375, + "learning_rate": 7.925819891877758e-06, + "loss": 67.7171, + "step": 183920 + }, + { + "epoch": 0.7431004739068428, + "grad_norm": 897.4036865234375, + "learning_rate": 7.92359379390823e-06, + "loss": 97.9078, + "step": 183930 + }, + { + "epoch": 0.7431408751722104, + "grad_norm": 722.1820068359375, + "learning_rate": 7.921367931374464e-06, + "loss": 61.8954, + "step": 183940 + }, + { + "epoch": 0.743181276437578, + "grad_norm": 774.7388305664062, + "learning_rate": 7.919142304319844e-06, + "loss": 44.5491, + "step": 183950 + }, + { + "epoch": 0.7432216777029457, + "grad_norm": 601.5081787109375, + "learning_rate": 7.916916912787768e-06, + "loss": 62.6809, + "step": 183960 + }, + { + "epoch": 0.7432620789683133, + "grad_norm": 486.7084655761719, + "learning_rate": 7.914691756821617e-06, + "loss": 48.3855, + "step": 183970 + }, + { + "epoch": 0.743302480233681, + "grad_norm": 605.1259155273438, + "learning_rate": 7.912466836464778e-06, + "loss": 66.9295, + "step": 183980 + }, + { + "epoch": 0.7433428814990486, + "grad_norm": 444.86016845703125, + "learning_rate": 7.910242151760614e-06, + "loss": 88.1204, + "step": 183990 + }, + { + "epoch": 0.7433832827644162, + "grad_norm": 477.2845764160156, + "learning_rate": 7.908017702752504e-06, + "loss": 56.6898, + "step": 184000 + }, + { + "epoch": 0.7434236840297838, + "grad_norm": 894.4862060546875, + "learning_rate": 7.905793489483822e-06, + "loss": 57.7254, + "step": 184010 + }, + { + "epoch": 0.7434640852951514, + "grad_norm": 334.32440185546875, + "learning_rate": 7.903569511997908e-06, + "loss": 41.9375, + "step": 184020 + }, + { + "epoch": 0.743504486560519, + "grad_norm": 615.7723388671875, + "learning_rate": 7.901345770338147e-06, + "loss": 59.8482, + "step": 184030 + }, + { + "epoch": 0.7435448878258867, + "grad_norm": 797.382568359375, + "learning_rate": 7.899122264547865e-06, + "loss": 54.9186, + "step": 184040 + }, + { + "epoch": 0.7435852890912543, + "grad_norm": 621.2847900390625, + "learning_rate": 7.89689899467044e-06, + "loss": 83.8085, + "step": 184050 + }, + { + "epoch": 0.743625690356622, + "grad_norm": 439.89300537109375, + "learning_rate": 7.894675960749191e-06, + "loss": 65.9234, + "step": 184060 + }, + { + "epoch": 0.7436660916219896, + "grad_norm": 657.0142822265625, + "learning_rate": 7.892453162827469e-06, + "loss": 54.6101, + "step": 184070 + }, + { + "epoch": 0.7437064928873572, + "grad_norm": 465.6173095703125, + "learning_rate": 7.890230600948612e-06, + "loss": 38.5862, + "step": 184080 + }, + { + "epoch": 0.7437468941527249, + "grad_norm": 738.6668090820312, + "learning_rate": 7.888008275155936e-06, + "loss": 53.4723, + "step": 184090 + }, + { + "epoch": 0.7437872954180925, + "grad_norm": 637.0023803710938, + "learning_rate": 7.885786185492773e-06, + "loss": 79.4498, + "step": 184100 + }, + { + "epoch": 0.7438276966834602, + "grad_norm": 2877.622314453125, + "learning_rate": 7.88356433200245e-06, + "loss": 92.2443, + "step": 184110 + }, + { + "epoch": 0.7438680979488278, + "grad_norm": 590.3246459960938, + "learning_rate": 7.881342714728275e-06, + "loss": 53.6198, + "step": 184120 + }, + { + "epoch": 0.7439084992141954, + "grad_norm": 1078.56982421875, + "learning_rate": 7.879121333713562e-06, + "loss": 65.5379, + "step": 184130 + }, + { + "epoch": 0.743948900479563, + "grad_norm": 552.90283203125, + "learning_rate": 7.876900189001628e-06, + "loss": 57.8678, + "step": 184140 + }, + { + "epoch": 0.7439893017449306, + "grad_norm": 501.085693359375, + "learning_rate": 7.874679280635758e-06, + "loss": 59.4736, + "step": 184150 + }, + { + "epoch": 0.7440297030102982, + "grad_norm": 376.67584228515625, + "learning_rate": 7.87245860865926e-06, + "loss": 48.0784, + "step": 184160 + }, + { + "epoch": 0.7440701042756659, + "grad_norm": 407.5163269042969, + "learning_rate": 7.870238173115423e-06, + "loss": 57.1172, + "step": 184170 + }, + { + "epoch": 0.7441105055410335, + "grad_norm": 865.1954956054688, + "learning_rate": 7.868017974047536e-06, + "loss": 81.6435, + "step": 184180 + }, + { + "epoch": 0.7441509068064012, + "grad_norm": 392.0999755859375, + "learning_rate": 7.865798011498895e-06, + "loss": 42.7363, + "step": 184190 + }, + { + "epoch": 0.7441913080717688, + "grad_norm": 1019.3994140625, + "learning_rate": 7.863578285512753e-06, + "loss": 81.6558, + "step": 184200 + }, + { + "epoch": 0.7442317093371364, + "grad_norm": 1389.0880126953125, + "learning_rate": 7.861358796132417e-06, + "loss": 114.0711, + "step": 184210 + }, + { + "epoch": 0.7442721106025041, + "grad_norm": 918.1036987304688, + "learning_rate": 7.85913954340113e-06, + "loss": 97.3565, + "step": 184220 + }, + { + "epoch": 0.7443125118678717, + "grad_norm": 269.21746826171875, + "learning_rate": 7.856920527362172e-06, + "loss": 80.7685, + "step": 184230 + }, + { + "epoch": 0.7443529131332394, + "grad_norm": 598.959716796875, + "learning_rate": 7.854701748058795e-06, + "loss": 47.9695, + "step": 184240 + }, + { + "epoch": 0.744393314398607, + "grad_norm": 485.137939453125, + "learning_rate": 7.852483205534271e-06, + "loss": 51.201, + "step": 184250 + }, + { + "epoch": 0.7444337156639746, + "grad_norm": 1174.1719970703125, + "learning_rate": 7.850264899831835e-06, + "loss": 53.0962, + "step": 184260 + }, + { + "epoch": 0.7444741169293422, + "grad_norm": 623.9575805664062, + "learning_rate": 7.848046830994735e-06, + "loss": 51.8478, + "step": 184270 + }, + { + "epoch": 0.7445145181947098, + "grad_norm": 2110.2958984375, + "learning_rate": 7.845828999066222e-06, + "loss": 109.3903, + "step": 184280 + }, + { + "epoch": 0.7445549194600775, + "grad_norm": 1219.45556640625, + "learning_rate": 7.843611404089528e-06, + "loss": 69.7097, + "step": 184290 + }, + { + "epoch": 0.7445953207254451, + "grad_norm": 1086.445068359375, + "learning_rate": 7.841394046107897e-06, + "loss": 85.7592, + "step": 184300 + }, + { + "epoch": 0.7446357219908127, + "grad_norm": 716.8109741210938, + "learning_rate": 7.839176925164536e-06, + "loss": 55.6263, + "step": 184310 + }, + { + "epoch": 0.7446761232561804, + "grad_norm": 1357.3536376953125, + "learning_rate": 7.836960041302692e-06, + "loss": 67.6504, + "step": 184320 + }, + { + "epoch": 0.744716524521548, + "grad_norm": 682.3057861328125, + "learning_rate": 7.83474339456557e-06, + "loss": 64.8481, + "step": 184330 + }, + { + "epoch": 0.7447569257869157, + "grad_norm": 457.460205078125, + "learning_rate": 7.832526984996387e-06, + "loss": 82.1978, + "step": 184340 + }, + { + "epoch": 0.7447973270522833, + "grad_norm": 918.8685302734375, + "learning_rate": 7.830310812638362e-06, + "loss": 73.7281, + "step": 184350 + }, + { + "epoch": 0.7448377283176509, + "grad_norm": 1393.2366943359375, + "learning_rate": 7.82809487753468e-06, + "loss": 59.8516, + "step": 184360 + }, + { + "epoch": 0.7448781295830186, + "grad_norm": 1296.72705078125, + "learning_rate": 7.825879179728567e-06, + "loss": 53.8771, + "step": 184370 + }, + { + "epoch": 0.7449185308483862, + "grad_norm": 878.5518798828125, + "learning_rate": 7.8236637192632e-06, + "loss": 118.9947, + "step": 184380 + }, + { + "epoch": 0.7449589321137539, + "grad_norm": 1256.3421630859375, + "learning_rate": 7.82144849618178e-06, + "loss": 69.041, + "step": 184390 + }, + { + "epoch": 0.7449993333791214, + "grad_norm": 619.653564453125, + "learning_rate": 7.819233510527491e-06, + "loss": 63.7749, + "step": 184400 + }, + { + "epoch": 0.745039734644489, + "grad_norm": 498.7259521484375, + "learning_rate": 7.81701876234352e-06, + "loss": 59.0167, + "step": 184410 + }, + { + "epoch": 0.7450801359098567, + "grad_norm": 677.7562866210938, + "learning_rate": 7.814804251673036e-06, + "loss": 92.173, + "step": 184420 + }, + { + "epoch": 0.7451205371752243, + "grad_norm": 364.2186584472656, + "learning_rate": 7.812589978559216e-06, + "loss": 82.9776, + "step": 184430 + }, + { + "epoch": 0.745160938440592, + "grad_norm": 312.45025634765625, + "learning_rate": 7.81037594304523e-06, + "loss": 85.9244, + "step": 184440 + }, + { + "epoch": 0.7452013397059596, + "grad_norm": 337.4607849121094, + "learning_rate": 7.80816214517424e-06, + "loss": 60.027, + "step": 184450 + }, + { + "epoch": 0.7452417409713272, + "grad_norm": 729.3280029296875, + "learning_rate": 7.805948584989411e-06, + "loss": 55.1952, + "step": 184460 + }, + { + "epoch": 0.7452821422366949, + "grad_norm": 1363.7520751953125, + "learning_rate": 7.803735262533878e-06, + "loss": 77.294, + "step": 184470 + }, + { + "epoch": 0.7453225435020625, + "grad_norm": 1042.7598876953125, + "learning_rate": 7.801522177850822e-06, + "loss": 77.8694, + "step": 184480 + }, + { + "epoch": 0.7453629447674301, + "grad_norm": 505.0323181152344, + "learning_rate": 7.799309330983362e-06, + "loss": 87.5813, + "step": 184490 + }, + { + "epoch": 0.7454033460327978, + "grad_norm": 446.5401916503906, + "learning_rate": 7.79709672197465e-06, + "loss": 50.1102, + "step": 184500 + }, + { + "epoch": 0.7454437472981654, + "grad_norm": 842.9430541992188, + "learning_rate": 7.794884350867818e-06, + "loss": 75.9116, + "step": 184510 + }, + { + "epoch": 0.745484148563533, + "grad_norm": 823.4229125976562, + "learning_rate": 7.792672217705998e-06, + "loss": 70.4222, + "step": 184520 + }, + { + "epoch": 0.7455245498289006, + "grad_norm": 664.3987426757812, + "learning_rate": 7.790460322532326e-06, + "loss": 63.2024, + "step": 184530 + }, + { + "epoch": 0.7455649510942682, + "grad_norm": 732.060546875, + "learning_rate": 7.788248665389912e-06, + "loss": 81.798, + "step": 184540 + }, + { + "epoch": 0.7456053523596359, + "grad_norm": 571.9454345703125, + "learning_rate": 7.786037246321876e-06, + "loss": 72.3409, + "step": 184550 + }, + { + "epoch": 0.7456457536250035, + "grad_norm": 465.03997802734375, + "learning_rate": 7.783826065371329e-06, + "loss": 71.6405, + "step": 184560 + }, + { + "epoch": 0.7456861548903712, + "grad_norm": 222.5946044921875, + "learning_rate": 7.781615122581385e-06, + "loss": 47.7831, + "step": 184570 + }, + { + "epoch": 0.7457265561557388, + "grad_norm": 686.212158203125, + "learning_rate": 7.779404417995145e-06, + "loss": 66.3703, + "step": 184580 + }, + { + "epoch": 0.7457669574211064, + "grad_norm": 735.1443481445312, + "learning_rate": 7.777193951655712e-06, + "loss": 129.8799, + "step": 184590 + }, + { + "epoch": 0.7458073586864741, + "grad_norm": 313.9092712402344, + "learning_rate": 7.77498372360617e-06, + "loss": 53.8346, + "step": 184600 + }, + { + "epoch": 0.7458477599518417, + "grad_norm": 509.513671875, + "learning_rate": 7.772773733889614e-06, + "loss": 95.8408, + "step": 184610 + }, + { + "epoch": 0.7458881612172094, + "grad_norm": 927.1572875976562, + "learning_rate": 7.770563982549135e-06, + "loss": 75.3765, + "step": 184620 + }, + { + "epoch": 0.745928562482577, + "grad_norm": 936.2891235351562, + "learning_rate": 7.768354469627794e-06, + "loss": 75.15, + "step": 184630 + }, + { + "epoch": 0.7459689637479446, + "grad_norm": 676.0682373046875, + "learning_rate": 7.766145195168695e-06, + "loss": 53.1223, + "step": 184640 + }, + { + "epoch": 0.7460093650133122, + "grad_norm": 397.4790954589844, + "learning_rate": 7.76393615921488e-06, + "loss": 71.8923, + "step": 184650 + }, + { + "epoch": 0.7460497662786798, + "grad_norm": 576.7398071289062, + "learning_rate": 7.761727361809442e-06, + "loss": 49.4314, + "step": 184660 + }, + { + "epoch": 0.7460901675440474, + "grad_norm": 5897.49755859375, + "learning_rate": 7.759518802995423e-06, + "loss": 83.685, + "step": 184670 + }, + { + "epoch": 0.7461305688094151, + "grad_norm": 563.11328125, + "learning_rate": 7.757310482815887e-06, + "loss": 122.6776, + "step": 184680 + }, + { + "epoch": 0.7461709700747827, + "grad_norm": 984.9046630859375, + "learning_rate": 7.755102401313892e-06, + "loss": 62.3702, + "step": 184690 + }, + { + "epoch": 0.7462113713401504, + "grad_norm": 288.82745361328125, + "learning_rate": 7.752894558532475e-06, + "loss": 98.4008, + "step": 184700 + }, + { + "epoch": 0.746251772605518, + "grad_norm": 709.7749633789062, + "learning_rate": 7.750686954514681e-06, + "loss": 54.9889, + "step": 184710 + }, + { + "epoch": 0.7462921738708856, + "grad_norm": 891.9237060546875, + "learning_rate": 7.748479589303555e-06, + "loss": 55.9712, + "step": 184720 + }, + { + "epoch": 0.7463325751362533, + "grad_norm": 517.9219970703125, + "learning_rate": 7.746272462942124e-06, + "loss": 90.4149, + "step": 184730 + }, + { + "epoch": 0.7463729764016209, + "grad_norm": 826.6241455078125, + "learning_rate": 7.74406557547342e-06, + "loss": 88.2902, + "step": 184740 + }, + { + "epoch": 0.7464133776669886, + "grad_norm": 294.9768981933594, + "learning_rate": 7.741858926940475e-06, + "loss": 48.977, + "step": 184750 + }, + { + "epoch": 0.7464537789323562, + "grad_norm": 537.7915649414062, + "learning_rate": 7.739652517386293e-06, + "loss": 60.2574, + "step": 184760 + }, + { + "epoch": 0.7464941801977238, + "grad_norm": 609.2705688476562, + "learning_rate": 7.737446346853899e-06, + "loss": 110.3924, + "step": 184770 + }, + { + "epoch": 0.7465345814630914, + "grad_norm": 796.8223876953125, + "learning_rate": 7.7352404153863e-06, + "loss": 84.3193, + "step": 184780 + }, + { + "epoch": 0.746574982728459, + "grad_norm": 465.95477294921875, + "learning_rate": 7.733034723026507e-06, + "loss": 69.0084, + "step": 184790 + }, + { + "epoch": 0.7466153839938267, + "grad_norm": 850.6217041015625, + "learning_rate": 7.73082926981752e-06, + "loss": 72.8136, + "step": 184800 + }, + { + "epoch": 0.7466557852591943, + "grad_norm": 771.73681640625, + "learning_rate": 7.72862405580232e-06, + "loss": 56.9186, + "step": 184810 + }, + { + "epoch": 0.7466961865245619, + "grad_norm": 1299.7508544921875, + "learning_rate": 7.726419081023927e-06, + "loss": 69.5203, + "step": 184820 + }, + { + "epoch": 0.7467365877899296, + "grad_norm": 1028.259033203125, + "learning_rate": 7.724214345525306e-06, + "loss": 65.2572, + "step": 184830 + }, + { + "epoch": 0.7467769890552972, + "grad_norm": 374.9214782714844, + "learning_rate": 7.722009849349448e-06, + "loss": 64.0459, + "step": 184840 + }, + { + "epoch": 0.7468173903206649, + "grad_norm": 726.1256103515625, + "learning_rate": 7.719805592539327e-06, + "loss": 70.9617, + "step": 184850 + }, + { + "epoch": 0.7468577915860325, + "grad_norm": 802.1351318359375, + "learning_rate": 7.717601575137922e-06, + "loss": 58.939, + "step": 184860 + }, + { + "epoch": 0.7468981928514001, + "grad_norm": 1169.5992431640625, + "learning_rate": 7.715397797188195e-06, + "loss": 62.5593, + "step": 184870 + }, + { + "epoch": 0.7469385941167678, + "grad_norm": 226.7778778076172, + "learning_rate": 7.713194258733111e-06, + "loss": 45.4634, + "step": 184880 + }, + { + "epoch": 0.7469789953821354, + "grad_norm": 535.3946533203125, + "learning_rate": 7.710990959815632e-06, + "loss": 52.9346, + "step": 184890 + }, + { + "epoch": 0.747019396647503, + "grad_norm": 616.666015625, + "learning_rate": 7.708787900478711e-06, + "loss": 37.1966, + "step": 184900 + }, + { + "epoch": 0.7470597979128706, + "grad_norm": 691.33154296875, + "learning_rate": 7.706585080765304e-06, + "loss": 64.6496, + "step": 184910 + }, + { + "epoch": 0.7471001991782382, + "grad_norm": 3638.22265625, + "learning_rate": 7.704382500718336e-06, + "loss": 104.5615, + "step": 184920 + }, + { + "epoch": 0.7471406004436059, + "grad_norm": 1020.41845703125, + "learning_rate": 7.702180160380775e-06, + "loss": 72.8404, + "step": 184930 + }, + { + "epoch": 0.7471810017089735, + "grad_norm": 474.7359313964844, + "learning_rate": 7.699978059795536e-06, + "loss": 33.5991, + "step": 184940 + }, + { + "epoch": 0.7472214029743411, + "grad_norm": 631.0577392578125, + "learning_rate": 7.697776199005558e-06, + "loss": 59.171, + "step": 184950 + }, + { + "epoch": 0.7472618042397088, + "grad_norm": 915.826904296875, + "learning_rate": 7.695574578053773e-06, + "loss": 70.7886, + "step": 184960 + }, + { + "epoch": 0.7473022055050764, + "grad_norm": 551.6441650390625, + "learning_rate": 7.693373196983083e-06, + "loss": 63.5125, + "step": 184970 + }, + { + "epoch": 0.7473426067704441, + "grad_norm": 1601.6119384765625, + "learning_rate": 7.69117205583643e-06, + "loss": 87.7549, + "step": 184980 + }, + { + "epoch": 0.7473830080358117, + "grad_norm": 598.809326171875, + "learning_rate": 7.68897115465671e-06, + "loss": 62.4191, + "step": 184990 + }, + { + "epoch": 0.7474234093011793, + "grad_norm": 589.251220703125, + "learning_rate": 7.686770493486835e-06, + "loss": 87.1173, + "step": 185000 + }, + { + "epoch": 0.747463810566547, + "grad_norm": 644.1456298828125, + "learning_rate": 7.68457007236971e-06, + "loss": 55.2548, + "step": 185010 + }, + { + "epoch": 0.7475042118319146, + "grad_norm": 787.8197631835938, + "learning_rate": 7.682369891348228e-06, + "loss": 62.7979, + "step": 185020 + }, + { + "epoch": 0.7475446130972823, + "grad_norm": 510.9496765136719, + "learning_rate": 7.680169950465297e-06, + "loss": 60.1715, + "step": 185030 + }, + { + "epoch": 0.7475850143626498, + "grad_norm": 670.4130249023438, + "learning_rate": 7.677970249763788e-06, + "loss": 53.3051, + "step": 185040 + }, + { + "epoch": 0.7476254156280174, + "grad_norm": 401.10491943359375, + "learning_rate": 7.675770789286591e-06, + "loss": 87.3505, + "step": 185050 + }, + { + "epoch": 0.7476658168933851, + "grad_norm": 477.5834045410156, + "learning_rate": 7.67357156907659e-06, + "loss": 89.8461, + "step": 185060 + }, + { + "epoch": 0.7477062181587527, + "grad_norm": 126.58316802978516, + "learning_rate": 7.671372589176664e-06, + "loss": 46.9072, + "step": 185070 + }, + { + "epoch": 0.7477466194241204, + "grad_norm": 1218.6884765625, + "learning_rate": 7.669173849629661e-06, + "loss": 84.1749, + "step": 185080 + }, + { + "epoch": 0.747787020689488, + "grad_norm": 994.7040405273438, + "learning_rate": 7.666975350478476e-06, + "loss": 57.5238, + "step": 185090 + }, + { + "epoch": 0.7478274219548556, + "grad_norm": 153.82437133789062, + "learning_rate": 7.66477709176595e-06, + "loss": 91.2618, + "step": 185100 + }, + { + "epoch": 0.7478678232202233, + "grad_norm": 457.44256591796875, + "learning_rate": 7.662579073534946e-06, + "loss": 63.7266, + "step": 185110 + }, + { + "epoch": 0.7479082244855909, + "grad_norm": 508.24920654296875, + "learning_rate": 7.660381295828313e-06, + "loss": 64.7061, + "step": 185120 + }, + { + "epoch": 0.7479486257509586, + "grad_norm": 821.879638671875, + "learning_rate": 7.658183758688899e-06, + "loss": 48.8094, + "step": 185130 + }, + { + "epoch": 0.7479890270163262, + "grad_norm": 619.922607421875, + "learning_rate": 7.655986462159558e-06, + "loss": 56.0755, + "step": 185140 + }, + { + "epoch": 0.7480294282816938, + "grad_norm": 1449.849853515625, + "learning_rate": 7.653789406283103e-06, + "loss": 73.1636, + "step": 185150 + }, + { + "epoch": 0.7480698295470614, + "grad_norm": 577.6577758789062, + "learning_rate": 7.651592591102384e-06, + "loss": 59.0854, + "step": 185160 + }, + { + "epoch": 0.748110230812429, + "grad_norm": 617.166748046875, + "learning_rate": 7.649396016660222e-06, + "loss": 58.0473, + "step": 185170 + }, + { + "epoch": 0.7481506320777966, + "grad_norm": 593.1734008789062, + "learning_rate": 7.647199682999444e-06, + "loss": 62.1127, + "step": 185180 + }, + { + "epoch": 0.7481910333431643, + "grad_norm": 557.178466796875, + "learning_rate": 7.64500359016287e-06, + "loss": 100.884, + "step": 185190 + }, + { + "epoch": 0.7482314346085319, + "grad_norm": 630.3068237304688, + "learning_rate": 7.642807738193316e-06, + "loss": 59.0134, + "step": 185200 + }, + { + "epoch": 0.7482718358738996, + "grad_norm": 408.6548767089844, + "learning_rate": 7.640612127133583e-06, + "loss": 65.505, + "step": 185210 + }, + { + "epoch": 0.7483122371392672, + "grad_norm": 912.1123046875, + "learning_rate": 7.638416757026476e-06, + "loss": 94.1881, + "step": 185220 + }, + { + "epoch": 0.7483526384046348, + "grad_norm": 455.6291198730469, + "learning_rate": 7.636221627914808e-06, + "loss": 52.2129, + "step": 185230 + }, + { + "epoch": 0.7483930396700025, + "grad_norm": 579.6227416992188, + "learning_rate": 7.63402673984135e-06, + "loss": 75.0452, + "step": 185240 + }, + { + "epoch": 0.7484334409353701, + "grad_norm": 827.2658081054688, + "learning_rate": 7.63183209284892e-06, + "loss": 80.2517, + "step": 185250 + }, + { + "epoch": 0.7484738422007378, + "grad_norm": 758.6801147460938, + "learning_rate": 7.62963768698028e-06, + "loss": 70.9974, + "step": 185260 + }, + { + "epoch": 0.7485142434661054, + "grad_norm": 877.3491821289062, + "learning_rate": 7.627443522278235e-06, + "loss": 53.996, + "step": 185270 + }, + { + "epoch": 0.748554644731473, + "grad_norm": 1146.6033935546875, + "learning_rate": 7.62524959878554e-06, + "loss": 76.1481, + "step": 185280 + }, + { + "epoch": 0.7485950459968406, + "grad_norm": 443.43328857421875, + "learning_rate": 7.623055916544975e-06, + "loss": 70.0487, + "step": 185290 + }, + { + "epoch": 0.7486354472622082, + "grad_norm": 399.1827392578125, + "learning_rate": 7.620862475599314e-06, + "loss": 53.9186, + "step": 185300 + }, + { + "epoch": 0.7486758485275758, + "grad_norm": 675.5008544921875, + "learning_rate": 7.618669275991306e-06, + "loss": 83.6794, + "step": 185310 + }, + { + "epoch": 0.7487162497929435, + "grad_norm": 601.1447143554688, + "learning_rate": 7.616476317763715e-06, + "loss": 66.7645, + "step": 185320 + }, + { + "epoch": 0.7487566510583111, + "grad_norm": 3296.31396484375, + "learning_rate": 7.6142836009592935e-06, + "loss": 76.711, + "step": 185330 + }, + { + "epoch": 0.7487970523236788, + "grad_norm": 436.2550048828125, + "learning_rate": 7.612091125620788e-06, + "loss": 48.8194, + "step": 185340 + }, + { + "epoch": 0.7488374535890464, + "grad_norm": 949.6910400390625, + "learning_rate": 7.609898891790946e-06, + "loss": 65.0167, + "step": 185350 + }, + { + "epoch": 0.748877854854414, + "grad_norm": 906.5824584960938, + "learning_rate": 7.607706899512511e-06, + "loss": 65.9967, + "step": 185360 + }, + { + "epoch": 0.7489182561197817, + "grad_norm": 958.782958984375, + "learning_rate": 7.605515148828202e-06, + "loss": 83.5025, + "step": 185370 + }, + { + "epoch": 0.7489586573851493, + "grad_norm": 637.6248779296875, + "learning_rate": 7.603323639780756e-06, + "loss": 99.1808, + "step": 185380 + }, + { + "epoch": 0.748999058650517, + "grad_norm": 1088.28125, + "learning_rate": 7.601132372412905e-06, + "loss": 56.7852, + "step": 185390 + }, + { + "epoch": 0.7490394599158846, + "grad_norm": 373.6999206542969, + "learning_rate": 7.5989413467673475e-06, + "loss": 72.8694, + "step": 185400 + }, + { + "epoch": 0.7490798611812522, + "grad_norm": 441.5162353515625, + "learning_rate": 7.596750562886827e-06, + "loss": 55.1033, + "step": 185410 + }, + { + "epoch": 0.7491202624466198, + "grad_norm": 1093.3133544921875, + "learning_rate": 7.594560020814028e-06, + "loss": 105.055, + "step": 185420 + }, + { + "epoch": 0.7491606637119874, + "grad_norm": 678.2324829101562, + "learning_rate": 7.592369720591679e-06, + "loss": 60.9649, + "step": 185430 + }, + { + "epoch": 0.7492010649773551, + "grad_norm": 648.845947265625, + "learning_rate": 7.590179662262465e-06, + "loss": 72.6507, + "step": 185440 + }, + { + "epoch": 0.7492414662427227, + "grad_norm": 1225.8375244140625, + "learning_rate": 7.587989845869088e-06, + "loss": 64.9724, + "step": 185450 + }, + { + "epoch": 0.7492818675080903, + "grad_norm": 607.2803955078125, + "learning_rate": 7.585800271454244e-06, + "loss": 44.5027, + "step": 185460 + }, + { + "epoch": 0.749322268773458, + "grad_norm": 707.4487915039062, + "learning_rate": 7.583610939060611e-06, + "loss": 44.7966, + "step": 185470 + }, + { + "epoch": 0.7493626700388256, + "grad_norm": 542.288330078125, + "learning_rate": 7.581421848730872e-06, + "loss": 62.7702, + "step": 185480 + }, + { + "epoch": 0.7494030713041933, + "grad_norm": 1247.7786865234375, + "learning_rate": 7.579233000507709e-06, + "loss": 66.931, + "step": 185490 + }, + { + "epoch": 0.7494434725695609, + "grad_norm": 644.3566284179688, + "learning_rate": 7.577044394433795e-06, + "loss": 61.8914, + "step": 185500 + }, + { + "epoch": 0.7494838738349285, + "grad_norm": 449.9168395996094, + "learning_rate": 7.574856030551794e-06, + "loss": 89.2268, + "step": 185510 + }, + { + "epoch": 0.7495242751002962, + "grad_norm": 1148.7255859375, + "learning_rate": 7.572667908904378e-06, + "loss": 57.4781, + "step": 185520 + }, + { + "epoch": 0.7495646763656638, + "grad_norm": 676.1135864257812, + "learning_rate": 7.5704800295341865e-06, + "loss": 86.2839, + "step": 185530 + }, + { + "epoch": 0.7496050776310315, + "grad_norm": 616.903076171875, + "learning_rate": 7.5682923924839e-06, + "loss": 56.7806, + "step": 185540 + }, + { + "epoch": 0.749645478896399, + "grad_norm": 587.1575927734375, + "learning_rate": 7.566104997796146e-06, + "loss": 86.0238, + "step": 185550 + }, + { + "epoch": 0.7496858801617666, + "grad_norm": 696.4818725585938, + "learning_rate": 7.563917845513577e-06, + "loss": 100.369, + "step": 185560 + }, + { + "epoch": 0.7497262814271343, + "grad_norm": 1805.377685546875, + "learning_rate": 7.561730935678839e-06, + "loss": 51.2772, + "step": 185570 + }, + { + "epoch": 0.7497666826925019, + "grad_norm": 630.5001831054688, + "learning_rate": 7.559544268334547e-06, + "loss": 74.6144, + "step": 185580 + }, + { + "epoch": 0.7498070839578695, + "grad_norm": 1200.9794921875, + "learning_rate": 7.557357843523357e-06, + "loss": 100.0312, + "step": 185590 + }, + { + "epoch": 0.7498474852232372, + "grad_norm": 985.9768676757812, + "learning_rate": 7.555171661287875e-06, + "loss": 69.3873, + "step": 185600 + }, + { + "epoch": 0.7498878864886048, + "grad_norm": 747.0347290039062, + "learning_rate": 7.552985721670727e-06, + "loss": 83.8823, + "step": 185610 + }, + { + "epoch": 0.7499282877539725, + "grad_norm": 1246.4488525390625, + "learning_rate": 7.550800024714533e-06, + "loss": 84.2764, + "step": 185620 + }, + { + "epoch": 0.7499686890193401, + "grad_norm": 581.9185180664062, + "learning_rate": 7.548614570461901e-06, + "loss": 53.2251, + "step": 185630 + }, + { + "epoch": 0.7500090902847077, + "grad_norm": 855.5613403320312, + "learning_rate": 7.546429358955445e-06, + "loss": 66.2292, + "step": 185640 + }, + { + "epoch": 0.7500494915500754, + "grad_norm": 1095.954833984375, + "learning_rate": 7.544244390237754e-06, + "loss": 51.0427, + "step": 185650 + }, + { + "epoch": 0.750089892815443, + "grad_norm": 682.3202514648438, + "learning_rate": 7.5420596643514284e-06, + "loss": 55.4672, + "step": 185660 + }, + { + "epoch": 0.7501302940808106, + "grad_norm": 707.7089233398438, + "learning_rate": 7.5398751813390665e-06, + "loss": 61.9817, + "step": 185670 + }, + { + "epoch": 0.7501706953461782, + "grad_norm": 369.9849853515625, + "learning_rate": 7.5376909412432585e-06, + "loss": 64.0308, + "step": 185680 + }, + { + "epoch": 0.7502110966115458, + "grad_norm": 885.5504150390625, + "learning_rate": 7.53550694410657e-06, + "loss": 76.4685, + "step": 185690 + }, + { + "epoch": 0.7502514978769135, + "grad_norm": 371.1095275878906, + "learning_rate": 7.533323189971602e-06, + "loss": 61.3489, + "step": 185700 + }, + { + "epoch": 0.7502918991422811, + "grad_norm": 487.19232177734375, + "learning_rate": 7.53113967888091e-06, + "loss": 111.7204, + "step": 185710 + }, + { + "epoch": 0.7503323004076488, + "grad_norm": 371.8272705078125, + "learning_rate": 7.528956410877071e-06, + "loss": 66.394, + "step": 185720 + }, + { + "epoch": 0.7503727016730164, + "grad_norm": 1002.5442504882812, + "learning_rate": 7.526773386002652e-06, + "loss": 82.1275, + "step": 185730 + }, + { + "epoch": 0.750413102938384, + "grad_norm": 970.8106079101562, + "learning_rate": 7.5245906043001945e-06, + "loss": 78.5628, + "step": 185740 + }, + { + "epoch": 0.7504535042037517, + "grad_norm": 657.2499389648438, + "learning_rate": 7.522408065812281e-06, + "loss": 71.0084, + "step": 185750 + }, + { + "epoch": 0.7504939054691193, + "grad_norm": 977.8369140625, + "learning_rate": 7.520225770581438e-06, + "loss": 114.9148, + "step": 185760 + }, + { + "epoch": 0.750534306734487, + "grad_norm": 244.71861267089844, + "learning_rate": 7.518043718650218e-06, + "loss": 35.6113, + "step": 185770 + }, + { + "epoch": 0.7505747079998546, + "grad_norm": 895.7169189453125, + "learning_rate": 7.515861910061164e-06, + "loss": 93.3502, + "step": 185780 + }, + { + "epoch": 0.7506151092652222, + "grad_norm": 392.0560302734375, + "learning_rate": 7.513680344856808e-06, + "loss": 59.2461, + "step": 185790 + }, + { + "epoch": 0.7506555105305898, + "grad_norm": 868.563232421875, + "learning_rate": 7.511499023079689e-06, + "loss": 78.248, + "step": 185800 + }, + { + "epoch": 0.7506959117959574, + "grad_norm": 693.932861328125, + "learning_rate": 7.509317944772319e-06, + "loss": 66.2822, + "step": 185810 + }, + { + "epoch": 0.750736313061325, + "grad_norm": 664.475830078125, + "learning_rate": 7.507137109977227e-06, + "loss": 57.4018, + "step": 185820 + }, + { + "epoch": 0.7507767143266927, + "grad_norm": 439.4372863769531, + "learning_rate": 7.504956518736928e-06, + "loss": 38.8307, + "step": 185830 + }, + { + "epoch": 0.7508171155920603, + "grad_norm": 976.6162109375, + "learning_rate": 7.5027761710939415e-06, + "loss": 61.0791, + "step": 185840 + }, + { + "epoch": 0.750857516857428, + "grad_norm": 510.29638671875, + "learning_rate": 7.500596067090755e-06, + "loss": 96.7738, + "step": 185850 + }, + { + "epoch": 0.7508979181227956, + "grad_norm": 687.0966796875, + "learning_rate": 7.498416206769896e-06, + "loss": 68.8034, + "step": 185860 + }, + { + "epoch": 0.7509383193881632, + "grad_norm": 1163.836669921875, + "learning_rate": 7.496236590173835e-06, + "loss": 53.0676, + "step": 185870 + }, + { + "epoch": 0.7509787206535309, + "grad_norm": 483.62506103515625, + "learning_rate": 7.494057217345094e-06, + "loss": 67.945, + "step": 185880 + }, + { + "epoch": 0.7510191219188985, + "grad_norm": 698.1952514648438, + "learning_rate": 7.4918780883261385e-06, + "loss": 71.7334, + "step": 185890 + }, + { + "epoch": 0.7510595231842662, + "grad_norm": 382.50592041015625, + "learning_rate": 7.489699203159459e-06, + "loss": 77.0994, + "step": 185900 + }, + { + "epoch": 0.7510999244496338, + "grad_norm": 605.9642944335938, + "learning_rate": 7.4875205618875425e-06, + "loss": 47.6432, + "step": 185910 + }, + { + "epoch": 0.7511403257150014, + "grad_norm": 1200.9190673828125, + "learning_rate": 7.485342164552847e-06, + "loss": 81.6545, + "step": 185920 + }, + { + "epoch": 0.751180726980369, + "grad_norm": 692.8749389648438, + "learning_rate": 7.483164011197848e-06, + "loss": 59.6191, + "step": 185930 + }, + { + "epoch": 0.7512211282457366, + "grad_norm": 527.755126953125, + "learning_rate": 7.480986101865011e-06, + "loss": 60.777, + "step": 185940 + }, + { + "epoch": 0.7512615295111043, + "grad_norm": 1329.9962158203125, + "learning_rate": 7.4788084365967965e-06, + "loss": 75.873, + "step": 185950 + }, + { + "epoch": 0.7513019307764719, + "grad_norm": 524.0508422851562, + "learning_rate": 7.476631015435656e-06, + "loss": 74.3218, + "step": 185960 + }, + { + "epoch": 0.7513423320418395, + "grad_norm": 849.1664428710938, + "learning_rate": 7.474453838424049e-06, + "loss": 54.9409, + "step": 185970 + }, + { + "epoch": 0.7513827333072072, + "grad_norm": 1997.8948974609375, + "learning_rate": 7.4722769056044054e-06, + "loss": 74.7346, + "step": 185980 + }, + { + "epoch": 0.7514231345725748, + "grad_norm": 476.829345703125, + "learning_rate": 7.470100217019176e-06, + "loss": 41.9007, + "step": 185990 + }, + { + "epoch": 0.7514635358379425, + "grad_norm": 872.91845703125, + "learning_rate": 7.4679237727107965e-06, + "loss": 95.3273, + "step": 186000 + }, + { + "epoch": 0.7515039371033101, + "grad_norm": 574.6484985351562, + "learning_rate": 7.4657475727216845e-06, + "loss": 41.313, + "step": 186010 + }, + { + "epoch": 0.7515443383686777, + "grad_norm": 684.7257080078125, + "learning_rate": 7.46357161709429e-06, + "loss": 68.1427, + "step": 186020 + }, + { + "epoch": 0.7515847396340454, + "grad_norm": 849.9849853515625, + "learning_rate": 7.461395905871005e-06, + "loss": 61.4186, + "step": 186030 + }, + { + "epoch": 0.751625140899413, + "grad_norm": 964.2676391601562, + "learning_rate": 7.459220439094279e-06, + "loss": 60.0716, + "step": 186040 + }, + { + "epoch": 0.7516655421647807, + "grad_norm": 889.2099609375, + "learning_rate": 7.457045216806498e-06, + "loss": 63.213, + "step": 186050 + }, + { + "epoch": 0.7517059434301482, + "grad_norm": 588.1858520507812, + "learning_rate": 7.454870239050078e-06, + "loss": 65.2314, + "step": 186060 + }, + { + "epoch": 0.7517463446955158, + "grad_norm": 628.9429931640625, + "learning_rate": 7.4526955058674286e-06, + "loss": 48.239, + "step": 186070 + }, + { + "epoch": 0.7517867459608835, + "grad_norm": 516.0821533203125, + "learning_rate": 7.450521017300927e-06, + "loss": 48.9228, + "step": 186080 + }, + { + "epoch": 0.7518271472262511, + "grad_norm": 717.8193359375, + "learning_rate": 7.448346773392992e-06, + "loss": 69.1269, + "step": 186090 + }, + { + "epoch": 0.7518675484916187, + "grad_norm": 412.3203125, + "learning_rate": 7.446172774185994e-06, + "loss": 45.425, + "step": 186100 + }, + { + "epoch": 0.7519079497569864, + "grad_norm": 647.871826171875, + "learning_rate": 7.443999019722317e-06, + "loss": 43.843, + "step": 186110 + }, + { + "epoch": 0.751948351022354, + "grad_norm": 1167.092529296875, + "learning_rate": 7.441825510044347e-06, + "loss": 80.6497, + "step": 186120 + }, + { + "epoch": 0.7519887522877217, + "grad_norm": 539.0654296875, + "learning_rate": 7.43965224519446e-06, + "loss": 59.4568, + "step": 186130 + }, + { + "epoch": 0.7520291535530893, + "grad_norm": 924.3494262695312, + "learning_rate": 7.43747922521501e-06, + "loss": 60.816, + "step": 186140 + }, + { + "epoch": 0.7520695548184569, + "grad_norm": 675.2388916015625, + "learning_rate": 7.4353064501483715e-06, + "loss": 84.8799, + "step": 186150 + }, + { + "epoch": 0.7521099560838246, + "grad_norm": 595.9772338867188, + "learning_rate": 7.433133920036904e-06, + "loss": 60.8141, + "step": 186160 + }, + { + "epoch": 0.7521503573491922, + "grad_norm": 520.6526489257812, + "learning_rate": 7.430961634922958e-06, + "loss": 57.4502, + "step": 186170 + }, + { + "epoch": 0.7521907586145599, + "grad_norm": 919.0918579101562, + "learning_rate": 7.428789594848893e-06, + "loss": 55.0135, + "step": 186180 + }, + { + "epoch": 0.7522311598799274, + "grad_norm": 804.1625366210938, + "learning_rate": 7.426617799857035e-06, + "loss": 67.9286, + "step": 186190 + }, + { + "epoch": 0.752271561145295, + "grad_norm": 642.2439575195312, + "learning_rate": 7.4244462499897495e-06, + "loss": 74.0812, + "step": 186200 + }, + { + "epoch": 0.7523119624106627, + "grad_norm": 1109.81884765625, + "learning_rate": 7.422274945289352e-06, + "loss": 82.3767, + "step": 186210 + }, + { + "epoch": 0.7523523636760303, + "grad_norm": 892.4169921875, + "learning_rate": 7.420103885798178e-06, + "loss": 98.07, + "step": 186220 + }, + { + "epoch": 0.752392764941398, + "grad_norm": 653.829833984375, + "learning_rate": 7.417933071558556e-06, + "loss": 45.3525, + "step": 186230 + }, + { + "epoch": 0.7524331662067656, + "grad_norm": 908.9268798828125, + "learning_rate": 7.415762502612809e-06, + "loss": 55.8247, + "step": 186240 + }, + { + "epoch": 0.7524735674721332, + "grad_norm": 847.855224609375, + "learning_rate": 7.413592179003255e-06, + "loss": 100.6661, + "step": 186250 + }, + { + "epoch": 0.7525139687375009, + "grad_norm": 684.947509765625, + "learning_rate": 7.411422100772197e-06, + "loss": 58.9498, + "step": 186260 + }, + { + "epoch": 0.7525543700028685, + "grad_norm": 1428.1446533203125, + "learning_rate": 7.409252267961944e-06, + "loss": 57.0716, + "step": 186270 + }, + { + "epoch": 0.7525947712682362, + "grad_norm": 655.5177612304688, + "learning_rate": 7.407082680614799e-06, + "loss": 50.9605, + "step": 186280 + }, + { + "epoch": 0.7526351725336038, + "grad_norm": 423.8834228515625, + "learning_rate": 7.40491333877307e-06, + "loss": 56.0619, + "step": 186290 + }, + { + "epoch": 0.7526755737989714, + "grad_norm": 320.5574645996094, + "learning_rate": 7.4027442424790256e-06, + "loss": 81.6615, + "step": 186300 + }, + { + "epoch": 0.752715975064339, + "grad_norm": 1357.1239013671875, + "learning_rate": 7.40057539177498e-06, + "loss": 85.402, + "step": 186310 + }, + { + "epoch": 0.7527563763297066, + "grad_norm": 3454.65625, + "learning_rate": 7.398406786703198e-06, + "loss": 116.3718, + "step": 186320 + }, + { + "epoch": 0.7527967775950742, + "grad_norm": 412.3581237792969, + "learning_rate": 7.396238427305964e-06, + "loss": 45.5975, + "step": 186330 + }, + { + "epoch": 0.7528371788604419, + "grad_norm": 549.17578125, + "learning_rate": 7.394070313625556e-06, + "loss": 77.625, + "step": 186340 + }, + { + "epoch": 0.7528775801258095, + "grad_norm": 844.4102783203125, + "learning_rate": 7.391902445704225e-06, + "loss": 64.0046, + "step": 186350 + }, + { + "epoch": 0.7529179813911772, + "grad_norm": 803.2444458007812, + "learning_rate": 7.389734823584258e-06, + "loss": 69.0954, + "step": 186360 + }, + { + "epoch": 0.7529583826565448, + "grad_norm": 1263.1317138671875, + "learning_rate": 7.387567447307899e-06, + "loss": 53.3439, + "step": 186370 + }, + { + "epoch": 0.7529987839219124, + "grad_norm": 755.9862060546875, + "learning_rate": 7.385400316917404e-06, + "loss": 57.9107, + "step": 186380 + }, + { + "epoch": 0.7530391851872801, + "grad_norm": 197.36988830566406, + "learning_rate": 7.383233432455026e-06, + "loss": 101.5039, + "step": 186390 + }, + { + "epoch": 0.7530795864526477, + "grad_norm": 728.79541015625, + "learning_rate": 7.3810667939630075e-06, + "loss": 57.6558, + "step": 186400 + }, + { + "epoch": 0.7531199877180154, + "grad_norm": 476.35052490234375, + "learning_rate": 7.378900401483593e-06, + "loss": 81.0766, + "step": 186410 + }, + { + "epoch": 0.753160388983383, + "grad_norm": 367.59234619140625, + "learning_rate": 7.376734255059008e-06, + "loss": 64.3991, + "step": 186420 + }, + { + "epoch": 0.7532007902487506, + "grad_norm": 327.35089111328125, + "learning_rate": 7.374568354731488e-06, + "loss": 76.8256, + "step": 186430 + }, + { + "epoch": 0.7532411915141182, + "grad_norm": 351.67822265625, + "learning_rate": 7.372402700543257e-06, + "loss": 72.3075, + "step": 186440 + }, + { + "epoch": 0.7532815927794858, + "grad_norm": 515.5404663085938, + "learning_rate": 7.370237292536544e-06, + "loss": 54.5645, + "step": 186450 + }, + { + "epoch": 0.7533219940448534, + "grad_norm": 510.3112487792969, + "learning_rate": 7.368072130753543e-06, + "loss": 96.3001, + "step": 186460 + }, + { + "epoch": 0.7533623953102211, + "grad_norm": 870.7503662109375, + "learning_rate": 7.365907215236494e-06, + "loss": 60.5959, + "step": 186470 + }, + { + "epoch": 0.7534027965755887, + "grad_norm": 917.4067993164062, + "learning_rate": 7.363742546027579e-06, + "loss": 94.3442, + "step": 186480 + }, + { + "epoch": 0.7534431978409564, + "grad_norm": 314.6139831542969, + "learning_rate": 7.3615781231690105e-06, + "loss": 51.1758, + "step": 186490 + }, + { + "epoch": 0.753483599106324, + "grad_norm": 528.3482666015625, + "learning_rate": 7.359413946702982e-06, + "loss": 66.8784, + "step": 186500 + }, + { + "epoch": 0.7535240003716916, + "grad_norm": 651.52294921875, + "learning_rate": 7.3572500166716885e-06, + "loss": 60.6835, + "step": 186510 + }, + { + "epoch": 0.7535644016370593, + "grad_norm": 234.10211181640625, + "learning_rate": 7.35508633311732e-06, + "loss": 37.3811, + "step": 186520 + }, + { + "epoch": 0.7536048029024269, + "grad_norm": 683.1952514648438, + "learning_rate": 7.35292289608204e-06, + "loss": 67.0356, + "step": 186530 + }, + { + "epoch": 0.7536452041677946, + "grad_norm": 419.66131591796875, + "learning_rate": 7.350759705608055e-06, + "loss": 72.2181, + "step": 186540 + }, + { + "epoch": 0.7536856054331622, + "grad_norm": 528.3739013671875, + "learning_rate": 7.348596761737512e-06, + "loss": 67.8656, + "step": 186550 + }, + { + "epoch": 0.7537260066985298, + "grad_norm": 1115.04736328125, + "learning_rate": 7.346434064512591e-06, + "loss": 91.9254, + "step": 186560 + }, + { + "epoch": 0.7537664079638974, + "grad_norm": 269.6934509277344, + "learning_rate": 7.344271613975451e-06, + "loss": 60.2003, + "step": 186570 + }, + { + "epoch": 0.753806809229265, + "grad_norm": 745.8848876953125, + "learning_rate": 7.342109410168257e-06, + "loss": 93.0315, + "step": 186580 + }, + { + "epoch": 0.7538472104946327, + "grad_norm": 790.6614990234375, + "learning_rate": 7.339947453133152e-06, + "loss": 58.434, + "step": 186590 + }, + { + "epoch": 0.7538876117600003, + "grad_norm": 786.9722290039062, + "learning_rate": 7.337785742912289e-06, + "loss": 59.0047, + "step": 186600 + }, + { + "epoch": 0.7539280130253679, + "grad_norm": 390.84228515625, + "learning_rate": 7.33562427954781e-06, + "loss": 68.8836, + "step": 186610 + }, + { + "epoch": 0.7539684142907356, + "grad_norm": 1102.3074951171875, + "learning_rate": 7.333463063081858e-06, + "loss": 52.1942, + "step": 186620 + }, + { + "epoch": 0.7540088155561032, + "grad_norm": 923.0013427734375, + "learning_rate": 7.331302093556571e-06, + "loss": 59.9686, + "step": 186630 + }, + { + "epoch": 0.7540492168214709, + "grad_norm": 646.8573608398438, + "learning_rate": 7.329141371014059e-06, + "loss": 81.1634, + "step": 186640 + }, + { + "epoch": 0.7540896180868385, + "grad_norm": 147.6384735107422, + "learning_rate": 7.326980895496472e-06, + "loss": 40.4456, + "step": 186650 + }, + { + "epoch": 0.7541300193522061, + "grad_norm": 1011.257080078125, + "learning_rate": 7.324820667045909e-06, + "loss": 110.3125, + "step": 186660 + }, + { + "epoch": 0.7541704206175738, + "grad_norm": 415.6960754394531, + "learning_rate": 7.322660685704495e-06, + "loss": 77.9561, + "step": 186670 + }, + { + "epoch": 0.7542108218829414, + "grad_norm": 507.41192626953125, + "learning_rate": 7.320500951514342e-06, + "loss": 53.871, + "step": 186680 + }, + { + "epoch": 0.7542512231483091, + "grad_norm": 496.76898193359375, + "learning_rate": 7.31834146451754e-06, + "loss": 86.3407, + "step": 186690 + }, + { + "epoch": 0.7542916244136766, + "grad_norm": 587.3931274414062, + "learning_rate": 7.316182224756212e-06, + "loss": 84.1901, + "step": 186700 + }, + { + "epoch": 0.7543320256790442, + "grad_norm": 464.81439208984375, + "learning_rate": 7.314023232272436e-06, + "loss": 80.449, + "step": 186710 + }, + { + "epoch": 0.7543724269444119, + "grad_norm": 500.1733703613281, + "learning_rate": 7.3118644871083066e-06, + "loss": 38.2765, + "step": 186720 + }, + { + "epoch": 0.7544128282097795, + "grad_norm": 433.20330810546875, + "learning_rate": 7.309705989305913e-06, + "loss": 65.7371, + "step": 186730 + }, + { + "epoch": 0.7544532294751471, + "grad_norm": 407.8216552734375, + "learning_rate": 7.307547738907339e-06, + "loss": 62.138, + "step": 186740 + }, + { + "epoch": 0.7544936307405148, + "grad_norm": 645.6759033203125, + "learning_rate": 7.305389735954651e-06, + "loss": 75.5475, + "step": 186750 + }, + { + "epoch": 0.7545340320058824, + "grad_norm": 601.2525024414062, + "learning_rate": 7.303231980489927e-06, + "loss": 42.2078, + "step": 186760 + }, + { + "epoch": 0.7545744332712501, + "grad_norm": 1091.3134765625, + "learning_rate": 7.301074472555232e-06, + "loss": 60.8248, + "step": 186770 + }, + { + "epoch": 0.7546148345366177, + "grad_norm": 704.0828247070312, + "learning_rate": 7.298917212192627e-06, + "loss": 74.7673, + "step": 186780 + }, + { + "epoch": 0.7546552358019853, + "grad_norm": 849.5340576171875, + "learning_rate": 7.296760199444175e-06, + "loss": 84.683, + "step": 186790 + }, + { + "epoch": 0.754695637067353, + "grad_norm": 324.5838623046875, + "learning_rate": 7.2946034343519125e-06, + "loss": 59.0935, + "step": 186800 + }, + { + "epoch": 0.7547360383327206, + "grad_norm": 494.446044921875, + "learning_rate": 7.292446916957909e-06, + "loss": 54.0565, + "step": 186810 + }, + { + "epoch": 0.7547764395980883, + "grad_norm": 861.8260498046875, + "learning_rate": 7.290290647304188e-06, + "loss": 69.584, + "step": 186820 + }, + { + "epoch": 0.7548168408634558, + "grad_norm": 1001.5390625, + "learning_rate": 7.288134625432794e-06, + "loss": 52.162, + "step": 186830 + }, + { + "epoch": 0.7548572421288234, + "grad_norm": 584.9155883789062, + "learning_rate": 7.285978851385762e-06, + "loss": 80.6509, + "step": 186840 + }, + { + "epoch": 0.7548976433941911, + "grad_norm": 368.9531555175781, + "learning_rate": 7.283823325205117e-06, + "loss": 86.0007, + "step": 186850 + }, + { + "epoch": 0.7549380446595587, + "grad_norm": 807.7420654296875, + "learning_rate": 7.28166804693289e-06, + "loss": 60.9218, + "step": 186860 + }, + { + "epoch": 0.7549784459249264, + "grad_norm": 782.8348999023438, + "learning_rate": 7.279513016611086e-06, + "loss": 78.4514, + "step": 186870 + }, + { + "epoch": 0.755018847190294, + "grad_norm": 634.0875244140625, + "learning_rate": 7.277358234281724e-06, + "loss": 49.4069, + "step": 186880 + }, + { + "epoch": 0.7550592484556616, + "grad_norm": 977.0324096679688, + "learning_rate": 7.275203699986812e-06, + "loss": 83.5947, + "step": 186890 + }, + { + "epoch": 0.7550996497210293, + "grad_norm": 336.1335754394531, + "learning_rate": 7.273049413768362e-06, + "loss": 97.0551, + "step": 186900 + }, + { + "epoch": 0.7551400509863969, + "grad_norm": 713.4405517578125, + "learning_rate": 7.270895375668354e-06, + "loss": 30.4371, + "step": 186910 + }, + { + "epoch": 0.7551804522517646, + "grad_norm": 510.8126220703125, + "learning_rate": 7.2687415857288066e-06, + "loss": 45.5792, + "step": 186920 + }, + { + "epoch": 0.7552208535171322, + "grad_norm": 892.2973022460938, + "learning_rate": 7.266588043991692e-06, + "loss": 61.2122, + "step": 186930 + }, + { + "epoch": 0.7552612547824998, + "grad_norm": 818.02880859375, + "learning_rate": 7.264434750498997e-06, + "loss": 85.17, + "step": 186940 + }, + { + "epoch": 0.7553016560478674, + "grad_norm": 914.3629760742188, + "learning_rate": 7.262281705292711e-06, + "loss": 91.9488, + "step": 186950 + }, + { + "epoch": 0.755342057313235, + "grad_norm": 726.5889282226562, + "learning_rate": 7.260128908414785e-06, + "loss": 108.4085, + "step": 186960 + }, + { + "epoch": 0.7553824585786026, + "grad_norm": 633.0655517578125, + "learning_rate": 7.25797635990722e-06, + "loss": 73.1963, + "step": 186970 + }, + { + "epoch": 0.7554228598439703, + "grad_norm": 819.8031616210938, + "learning_rate": 7.25582405981196e-06, + "loss": 62.3483, + "step": 186980 + }, + { + "epoch": 0.7554632611093379, + "grad_norm": 779.9705200195312, + "learning_rate": 7.253672008170969e-06, + "loss": 93.5519, + "step": 186990 + }, + { + "epoch": 0.7555036623747056, + "grad_norm": 262.1471252441406, + "learning_rate": 7.251520205026206e-06, + "loss": 53.9868, + "step": 187000 + }, + { + "epoch": 0.7555440636400732, + "grad_norm": 795.322509765625, + "learning_rate": 7.249368650419619e-06, + "loss": 45.7649, + "step": 187010 + }, + { + "epoch": 0.7555844649054408, + "grad_norm": 511.7061462402344, + "learning_rate": 7.247217344393162e-06, + "loss": 60.976, + "step": 187020 + }, + { + "epoch": 0.7556248661708085, + "grad_norm": 1992.8133544921875, + "learning_rate": 7.245066286988762e-06, + "loss": 102.3952, + "step": 187030 + }, + { + "epoch": 0.7556652674361761, + "grad_norm": 911.9163208007812, + "learning_rate": 7.242915478248362e-06, + "loss": 80.5683, + "step": 187040 + }, + { + "epoch": 0.7557056687015438, + "grad_norm": 702.101318359375, + "learning_rate": 7.240764918213892e-06, + "loss": 80.0796, + "step": 187050 + }, + { + "epoch": 0.7557460699669114, + "grad_norm": 506.9548034667969, + "learning_rate": 7.238614606927279e-06, + "loss": 78.0258, + "step": 187060 + }, + { + "epoch": 0.755786471232279, + "grad_norm": 1097.616943359375, + "learning_rate": 7.236464544430444e-06, + "loss": 55.6902, + "step": 187070 + }, + { + "epoch": 0.7558268724976466, + "grad_norm": 261.63140869140625, + "learning_rate": 7.234314730765308e-06, + "loss": 78.5947, + "step": 187080 + }, + { + "epoch": 0.7558672737630142, + "grad_norm": 605.481689453125, + "learning_rate": 7.232165165973774e-06, + "loss": 90.4454, + "step": 187090 + }, + { + "epoch": 0.7559076750283819, + "grad_norm": 468.44000244140625, + "learning_rate": 7.230015850097756e-06, + "loss": 44.3215, + "step": 187100 + }, + { + "epoch": 0.7559480762937495, + "grad_norm": 648.1309204101562, + "learning_rate": 7.227866783179149e-06, + "loss": 58.3528, + "step": 187110 + }, + { + "epoch": 0.7559884775591171, + "grad_norm": 158.54656982421875, + "learning_rate": 7.225717965259858e-06, + "loss": 55.4438, + "step": 187120 + }, + { + "epoch": 0.7560288788244848, + "grad_norm": 830.6312866210938, + "learning_rate": 7.223569396381777e-06, + "loss": 47.6865, + "step": 187130 + }, + { + "epoch": 0.7560692800898524, + "grad_norm": 706.6644287109375, + "learning_rate": 7.221421076586774e-06, + "loss": 64.3564, + "step": 187140 + }, + { + "epoch": 0.75610968135522, + "grad_norm": 1130.078369140625, + "learning_rate": 7.2192730059167606e-06, + "loss": 66.3563, + "step": 187150 + }, + { + "epoch": 0.7561500826205877, + "grad_norm": 544.8982543945312, + "learning_rate": 7.217125184413593e-06, + "loss": 67.5316, + "step": 187160 + }, + { + "epoch": 0.7561904838859553, + "grad_norm": 644.00439453125, + "learning_rate": 7.214977612119151e-06, + "loss": 68.1023, + "step": 187170 + }, + { + "epoch": 0.756230885151323, + "grad_norm": 834.6742553710938, + "learning_rate": 7.212830289075304e-06, + "loss": 52.2599, + "step": 187180 + }, + { + "epoch": 0.7562712864166906, + "grad_norm": 1644.79736328125, + "learning_rate": 7.210683215323919e-06, + "loss": 70.6794, + "step": 187190 + }, + { + "epoch": 0.7563116876820583, + "grad_norm": 441.7187194824219, + "learning_rate": 7.208536390906842e-06, + "loss": 43.5381, + "step": 187200 + }, + { + "epoch": 0.7563520889474258, + "grad_norm": 551.8931884765625, + "learning_rate": 7.2063898158659355e-06, + "loss": 100.2674, + "step": 187210 + }, + { + "epoch": 0.7563924902127934, + "grad_norm": 497.831787109375, + "learning_rate": 7.204243490243044e-06, + "loss": 56.0785, + "step": 187220 + }, + { + "epoch": 0.7564328914781611, + "grad_norm": 713.273193359375, + "learning_rate": 7.2020974140800135e-06, + "loss": 60.0587, + "step": 187230 + }, + { + "epoch": 0.7564732927435287, + "grad_norm": 479.0289611816406, + "learning_rate": 7.19995158741869e-06, + "loss": 50.2842, + "step": 187240 + }, + { + "epoch": 0.7565136940088963, + "grad_norm": 345.1194152832031, + "learning_rate": 7.197806010300888e-06, + "loss": 46.4268, + "step": 187250 + }, + { + "epoch": 0.756554095274264, + "grad_norm": 578.0177612304688, + "learning_rate": 7.195660682768462e-06, + "loss": 60.4412, + "step": 187260 + }, + { + "epoch": 0.7565944965396316, + "grad_norm": 1153.9326171875, + "learning_rate": 7.193515604863215e-06, + "loss": 59.2004, + "step": 187270 + }, + { + "epoch": 0.7566348978049993, + "grad_norm": 369.4091491699219, + "learning_rate": 7.191370776626974e-06, + "loss": 52.2628, + "step": 187280 + }, + { + "epoch": 0.7566752990703669, + "grad_norm": 614.7246704101562, + "learning_rate": 7.189226198101562e-06, + "loss": 58.4758, + "step": 187290 + }, + { + "epoch": 0.7567157003357345, + "grad_norm": 353.8721923828125, + "learning_rate": 7.187081869328767e-06, + "loss": 75.7032, + "step": 187300 + }, + { + "epoch": 0.7567561016011022, + "grad_norm": 651.6696166992188, + "learning_rate": 7.1849377903504214e-06, + "loss": 45.5545, + "step": 187310 + }, + { + "epoch": 0.7567965028664698, + "grad_norm": 631.0756225585938, + "learning_rate": 7.182793961208303e-06, + "loss": 73.9404, + "step": 187320 + }, + { + "epoch": 0.7568369041318375, + "grad_norm": 602.0848388671875, + "learning_rate": 7.1806503819442165e-06, + "loss": 90.0933, + "step": 187330 + }, + { + "epoch": 0.756877305397205, + "grad_norm": 1306.8358154296875, + "learning_rate": 7.17850705259995e-06, + "loss": 106.2228, + "step": 187340 + }, + { + "epoch": 0.7569177066625726, + "grad_norm": 426.4610595703125, + "learning_rate": 7.176363973217297e-06, + "loss": 72.8111, + "step": 187350 + }, + { + "epoch": 0.7569581079279403, + "grad_norm": 1264.8819580078125, + "learning_rate": 7.174221143838023e-06, + "loss": 71.8451, + "step": 187360 + }, + { + "epoch": 0.7569985091933079, + "grad_norm": 798.4447631835938, + "learning_rate": 7.172078564503913e-06, + "loss": 84.8772, + "step": 187370 + }, + { + "epoch": 0.7570389104586756, + "grad_norm": 679.2728271484375, + "learning_rate": 7.1699362352567315e-06, + "loss": 51.2573, + "step": 187380 + }, + { + "epoch": 0.7570793117240432, + "grad_norm": 641.6388549804688, + "learning_rate": 7.1677941561382525e-06, + "loss": 59.5785, + "step": 187390 + }, + { + "epoch": 0.7571197129894108, + "grad_norm": 600.2508544921875, + "learning_rate": 7.165652327190238e-06, + "loss": 49.7564, + "step": 187400 + }, + { + "epoch": 0.7571601142547785, + "grad_norm": 207.33245849609375, + "learning_rate": 7.163510748454425e-06, + "loss": 108.6819, + "step": 187410 + }, + { + "epoch": 0.7572005155201461, + "grad_norm": 208.03323364257812, + "learning_rate": 7.161369419972593e-06, + "loss": 61.8933, + "step": 187420 + }, + { + "epoch": 0.7572409167855138, + "grad_norm": 758.22412109375, + "learning_rate": 7.159228341786466e-06, + "loss": 97.4494, + "step": 187430 + }, + { + "epoch": 0.7572813180508814, + "grad_norm": 846.6593017578125, + "learning_rate": 7.157087513937793e-06, + "loss": 79.1611, + "step": 187440 + }, + { + "epoch": 0.757321719316249, + "grad_norm": 867.4005737304688, + "learning_rate": 7.1549469364683126e-06, + "loss": 85.378, + "step": 187450 + }, + { + "epoch": 0.7573621205816167, + "grad_norm": 173.42578125, + "learning_rate": 7.152806609419753e-06, + "loss": 54.4735, + "step": 187460 + }, + { + "epoch": 0.7574025218469842, + "grad_norm": 734.9559936523438, + "learning_rate": 7.150666532833852e-06, + "loss": 70.7557, + "step": 187470 + }, + { + "epoch": 0.7574429231123518, + "grad_norm": 606.5060424804688, + "learning_rate": 7.148526706752312e-06, + "loss": 99.2862, + "step": 187480 + }, + { + "epoch": 0.7574833243777195, + "grad_norm": 315.1516418457031, + "learning_rate": 7.146387131216863e-06, + "loss": 85.7325, + "step": 187490 + }, + { + "epoch": 0.7575237256430871, + "grad_norm": 375.21112060546875, + "learning_rate": 7.1442478062692135e-06, + "loss": 73.4434, + "step": 187500 + }, + { + "epoch": 0.7575641269084548, + "grad_norm": 771.897705078125, + "learning_rate": 7.1421087319510784e-06, + "loss": 56.6956, + "step": 187510 + }, + { + "epoch": 0.7576045281738224, + "grad_norm": 1243.237548828125, + "learning_rate": 7.139969908304141e-06, + "loss": 69.9951, + "step": 187520 + }, + { + "epoch": 0.75764492943919, + "grad_norm": 443.31427001953125, + "learning_rate": 7.137831335370122e-06, + "loss": 45.4006, + "step": 187530 + }, + { + "epoch": 0.7576853307045577, + "grad_norm": 1332.189208984375, + "learning_rate": 7.1356930131906986e-06, + "loss": 76.0582, + "step": 187540 + }, + { + "epoch": 0.7577257319699253, + "grad_norm": 489.630859375, + "learning_rate": 7.133554941807561e-06, + "loss": 53.4473, + "step": 187550 + }, + { + "epoch": 0.757766133235293, + "grad_norm": 744.7509765625, + "learning_rate": 7.131417121262403e-06, + "loss": 50.5869, + "step": 187560 + }, + { + "epoch": 0.7578065345006606, + "grad_norm": 609.1389770507812, + "learning_rate": 7.129279551596877e-06, + "loss": 49.9825, + "step": 187570 + }, + { + "epoch": 0.7578469357660282, + "grad_norm": 792.8884887695312, + "learning_rate": 7.127142232852688e-06, + "loss": 75.4147, + "step": 187580 + }, + { + "epoch": 0.7578873370313958, + "grad_norm": 638.085205078125, + "learning_rate": 7.125005165071475e-06, + "loss": 71.2408, + "step": 187590 + }, + { + "epoch": 0.7579277382967634, + "grad_norm": 580.7232055664062, + "learning_rate": 7.122868348294927e-06, + "loss": 48.23, + "step": 187600 + }, + { + "epoch": 0.757968139562131, + "grad_norm": 703.93115234375, + "learning_rate": 7.120731782564685e-06, + "loss": 40.898, + "step": 187610 + }, + { + "epoch": 0.7580085408274987, + "grad_norm": 482.88592529296875, + "learning_rate": 7.118595467922404e-06, + "loss": 71.2567, + "step": 187620 + }, + { + "epoch": 0.7580489420928663, + "grad_norm": 1030.909912109375, + "learning_rate": 7.116459404409748e-06, + "loss": 95.2096, + "step": 187630 + }, + { + "epoch": 0.758089343358234, + "grad_norm": 337.6048889160156, + "learning_rate": 7.114323592068339e-06, + "loss": 41.1759, + "step": 187640 + }, + { + "epoch": 0.7581297446236016, + "grad_norm": 1044.1971435546875, + "learning_rate": 7.1121880309398265e-06, + "loss": 86.9021, + "step": 187650 + }, + { + "epoch": 0.7581701458889692, + "grad_norm": 788.5702514648438, + "learning_rate": 7.110052721065843e-06, + "loss": 55.7955, + "step": 187660 + }, + { + "epoch": 0.7582105471543369, + "grad_norm": 584.1162109375, + "learning_rate": 7.107917662488017e-06, + "loss": 122.5632, + "step": 187670 + }, + { + "epoch": 0.7582509484197045, + "grad_norm": 599.9359741210938, + "learning_rate": 7.105782855247976e-06, + "loss": 76.2705, + "step": 187680 + }, + { + "epoch": 0.7582913496850722, + "grad_norm": 903.2108154296875, + "learning_rate": 7.103648299387342e-06, + "loss": 71.8739, + "step": 187690 + }, + { + "epoch": 0.7583317509504398, + "grad_norm": 478.894775390625, + "learning_rate": 7.101513994947715e-06, + "loss": 83.6879, + "step": 187700 + }, + { + "epoch": 0.7583721522158074, + "grad_norm": 1673.816650390625, + "learning_rate": 7.099379941970717e-06, + "loss": 81.946, + "step": 187710 + }, + { + "epoch": 0.758412553481175, + "grad_norm": 639.7545166015625, + "learning_rate": 7.097246140497953e-06, + "loss": 52.3916, + "step": 187720 + }, + { + "epoch": 0.7584529547465426, + "grad_norm": 667.9055786132812, + "learning_rate": 7.095112590571005e-06, + "loss": 85.7179, + "step": 187730 + }, + { + "epoch": 0.7584933560119103, + "grad_norm": 323.40728759765625, + "learning_rate": 7.092979292231497e-06, + "loss": 57.4681, + "step": 187740 + }, + { + "epoch": 0.7585337572772779, + "grad_norm": 713.4212036132812, + "learning_rate": 7.090846245520986e-06, + "loss": 83.526, + "step": 187750 + }, + { + "epoch": 0.7585741585426455, + "grad_norm": 1440.0552978515625, + "learning_rate": 7.088713450481088e-06, + "loss": 68.4316, + "step": 187760 + }, + { + "epoch": 0.7586145598080132, + "grad_norm": 1288.300048828125, + "learning_rate": 7.086580907153362e-06, + "loss": 104.0582, + "step": 187770 + }, + { + "epoch": 0.7586549610733808, + "grad_norm": 474.1349182128906, + "learning_rate": 7.084448615579389e-06, + "loss": 87.1047, + "step": 187780 + }, + { + "epoch": 0.7586953623387485, + "grad_norm": 697.9580078125, + "learning_rate": 7.0823165758007405e-06, + "loss": 72.8818, + "step": 187790 + }, + { + "epoch": 0.7587357636041161, + "grad_norm": 1103.5340576171875, + "learning_rate": 7.080184787858988e-06, + "loss": 88.0819, + "step": 187800 + }, + { + "epoch": 0.7587761648694837, + "grad_norm": 644.75, + "learning_rate": 7.078053251795676e-06, + "loss": 66.1286, + "step": 187810 + }, + { + "epoch": 0.7588165661348514, + "grad_norm": 481.65863037109375, + "learning_rate": 7.075921967652368e-06, + "loss": 50.334, + "step": 187820 + }, + { + "epoch": 0.758856967400219, + "grad_norm": 544.4684448242188, + "learning_rate": 7.073790935470617e-06, + "loss": 56.9801, + "step": 187830 + }, + { + "epoch": 0.7588973686655867, + "grad_norm": 774.4539184570312, + "learning_rate": 7.071660155291966e-06, + "loss": 54.8377, + "step": 187840 + }, + { + "epoch": 0.7589377699309542, + "grad_norm": 489.9494934082031, + "learning_rate": 7.069529627157965e-06, + "loss": 55.8695, + "step": 187850 + }, + { + "epoch": 0.7589781711963218, + "grad_norm": 1083.471435546875, + "learning_rate": 7.067399351110125e-06, + "loss": 59.2225, + "step": 187860 + }, + { + "epoch": 0.7590185724616895, + "grad_norm": 327.3133239746094, + "learning_rate": 7.065269327190005e-06, + "loss": 35.9081, + "step": 187870 + }, + { + "epoch": 0.7590589737270571, + "grad_norm": 757.127685546875, + "learning_rate": 7.063139555439116e-06, + "loss": 94.2587, + "step": 187880 + }, + { + "epoch": 0.7590993749924247, + "grad_norm": 1086.137939453125, + "learning_rate": 7.061010035898979e-06, + "loss": 82.9389, + "step": 187890 + }, + { + "epoch": 0.7591397762577924, + "grad_norm": 548.812744140625, + "learning_rate": 7.058880768611121e-06, + "loss": 73.5107, + "step": 187900 + }, + { + "epoch": 0.75918017752316, + "grad_norm": 2256.38623046875, + "learning_rate": 7.0567517536170285e-06, + "loss": 67.5114, + "step": 187910 + }, + { + "epoch": 0.7592205787885277, + "grad_norm": 1506.3675537109375, + "learning_rate": 7.05462299095824e-06, + "loss": 112.0183, + "step": 187920 + }, + { + "epoch": 0.7592609800538953, + "grad_norm": 606.0617065429688, + "learning_rate": 7.052494480676233e-06, + "loss": 47.2476, + "step": 187930 + }, + { + "epoch": 0.759301381319263, + "grad_norm": 849.75439453125, + "learning_rate": 7.050366222812515e-06, + "loss": 84.2169, + "step": 187940 + }, + { + "epoch": 0.7593417825846306, + "grad_norm": 1162.701171875, + "learning_rate": 7.048238217408572e-06, + "loss": 80.0798, + "step": 187950 + }, + { + "epoch": 0.7593821838499982, + "grad_norm": 1014.5158081054688, + "learning_rate": 7.0461104645059e-06, + "loss": 76.2899, + "step": 187960 + }, + { + "epoch": 0.7594225851153659, + "grad_norm": 532.9381713867188, + "learning_rate": 7.043982964145965e-06, + "loss": 56.5015, + "step": 187970 + }, + { + "epoch": 0.7594629863807334, + "grad_norm": 517.0951538085938, + "learning_rate": 7.041855716370256e-06, + "loss": 54.024, + "step": 187980 + }, + { + "epoch": 0.759503387646101, + "grad_norm": 356.8224182128906, + "learning_rate": 7.039728721220238e-06, + "loss": 59.89, + "step": 187990 + }, + { + "epoch": 0.7595437889114687, + "grad_norm": 475.2518615722656, + "learning_rate": 7.037601978737383e-06, + "loss": 72.8852, + "step": 188000 + }, + { + "epoch": 0.7595841901768363, + "grad_norm": 581.750244140625, + "learning_rate": 7.035475488963157e-06, + "loss": 63.5307, + "step": 188010 + }, + { + "epoch": 0.759624591442204, + "grad_norm": 1996.45361328125, + "learning_rate": 7.033349251938997e-06, + "loss": 96.7829, + "step": 188020 + }, + { + "epoch": 0.7596649927075716, + "grad_norm": 762.0792846679688, + "learning_rate": 7.031223267706382e-06, + "loss": 103.4701, + "step": 188030 + }, + { + "epoch": 0.7597053939729392, + "grad_norm": 444.5074462890625, + "learning_rate": 7.029097536306742e-06, + "loss": 65.3874, + "step": 188040 + }, + { + "epoch": 0.7597457952383069, + "grad_norm": 605.592041015625, + "learning_rate": 7.0269720577815204e-06, + "loss": 54.0769, + "step": 188050 + }, + { + "epoch": 0.7597861965036745, + "grad_norm": 1011.8488159179688, + "learning_rate": 7.024846832172165e-06, + "loss": 94.3701, + "step": 188060 + }, + { + "epoch": 0.7598265977690422, + "grad_norm": 503.2260437011719, + "learning_rate": 7.022721859520088e-06, + "loss": 42.527, + "step": 188070 + }, + { + "epoch": 0.7598669990344098, + "grad_norm": 387.4369812011719, + "learning_rate": 7.020597139866743e-06, + "loss": 67.2731, + "step": 188080 + }, + { + "epoch": 0.7599074002997774, + "grad_norm": 567.8618774414062, + "learning_rate": 7.0184726732535315e-06, + "loss": 69.8029, + "step": 188090 + }, + { + "epoch": 0.7599478015651451, + "grad_norm": 491.1247253417969, + "learning_rate": 7.01634845972188e-06, + "loss": 74.537, + "step": 188100 + }, + { + "epoch": 0.7599882028305126, + "grad_norm": 705.9019775390625, + "learning_rate": 7.014224499313198e-06, + "loss": 51.4494, + "step": 188110 + }, + { + "epoch": 0.7600286040958802, + "grad_norm": 1553.544921875, + "learning_rate": 7.0121007920688965e-06, + "loss": 94.5206, + "step": 188120 + }, + { + "epoch": 0.7600690053612479, + "grad_norm": 564.6571044921875, + "learning_rate": 7.009977338030383e-06, + "loss": 44.0296, + "step": 188130 + }, + { + "epoch": 0.7601094066266155, + "grad_norm": 1143.1024169921875, + "learning_rate": 7.0078541372390406e-06, + "loss": 56.3338, + "step": 188140 + }, + { + "epoch": 0.7601498078919832, + "grad_norm": 369.5805358886719, + "learning_rate": 7.005731189736272e-06, + "loss": 76.2499, + "step": 188150 + }, + { + "epoch": 0.7601902091573508, + "grad_norm": 480.5211181640625, + "learning_rate": 7.0036084955634634e-06, + "loss": 65.9414, + "step": 188160 + }, + { + "epoch": 0.7602306104227184, + "grad_norm": 615.050537109375, + "learning_rate": 7.0014860547620055e-06, + "loss": 48.5045, + "step": 188170 + }, + { + "epoch": 0.7602710116880861, + "grad_norm": 430.948974609375, + "learning_rate": 6.999363867373254e-06, + "loss": 39.4676, + "step": 188180 + }, + { + "epoch": 0.7603114129534537, + "grad_norm": 429.5802001953125, + "learning_rate": 6.997241933438612e-06, + "loss": 54.5492, + "step": 188190 + }, + { + "epoch": 0.7603518142188214, + "grad_norm": 997.848388671875, + "learning_rate": 6.995120252999419e-06, + "loss": 66.0152, + "step": 188200 + }, + { + "epoch": 0.760392215484189, + "grad_norm": 1127.0921630859375, + "learning_rate": 6.992998826097066e-06, + "loss": 93.0352, + "step": 188210 + }, + { + "epoch": 0.7604326167495566, + "grad_norm": 878.4893798828125, + "learning_rate": 6.990877652772891e-06, + "loss": 110.6094, + "step": 188220 + }, + { + "epoch": 0.7604730180149242, + "grad_norm": 477.27239990234375, + "learning_rate": 6.988756733068252e-06, + "loss": 60.381, + "step": 188230 + }, + { + "epoch": 0.7605134192802918, + "grad_norm": 886.4388427734375, + "learning_rate": 6.986636067024508e-06, + "loss": 67.9152, + "step": 188240 + }, + { + "epoch": 0.7605538205456595, + "grad_norm": 658.3610229492188, + "learning_rate": 6.984515654682984e-06, + "loss": 49.5363, + "step": 188250 + }, + { + "epoch": 0.7605942218110271, + "grad_norm": 1000.6824340820312, + "learning_rate": 6.98239549608503e-06, + "loss": 63.5303, + "step": 188260 + }, + { + "epoch": 0.7606346230763947, + "grad_norm": 612.42529296875, + "learning_rate": 6.980275591271979e-06, + "loss": 78.1147, + "step": 188270 + }, + { + "epoch": 0.7606750243417624, + "grad_norm": 577.2488403320312, + "learning_rate": 6.978155940285156e-06, + "loss": 77.1028, + "step": 188280 + }, + { + "epoch": 0.76071542560713, + "grad_norm": 978.0053100585938, + "learning_rate": 6.976036543165887e-06, + "loss": 67.3579, + "step": 188290 + }, + { + "epoch": 0.7607558268724977, + "grad_norm": 1001.4780883789062, + "learning_rate": 6.9739173999554984e-06, + "loss": 54.5986, + "step": 188300 + }, + { + "epoch": 0.7607962281378653, + "grad_norm": 227.4033966064453, + "learning_rate": 6.971798510695289e-06, + "loss": 62.2775, + "step": 188310 + }, + { + "epoch": 0.7608366294032329, + "grad_norm": 951.342529296875, + "learning_rate": 6.969679875426576e-06, + "loss": 71.7306, + "step": 188320 + }, + { + "epoch": 0.7608770306686006, + "grad_norm": 415.0693359375, + "learning_rate": 6.967561494190669e-06, + "loss": 76.9005, + "step": 188330 + }, + { + "epoch": 0.7609174319339682, + "grad_norm": 636.7401733398438, + "learning_rate": 6.965443367028845e-06, + "loss": 58.6315, + "step": 188340 + }, + { + "epoch": 0.7609578331993359, + "grad_norm": 1277.81689453125, + "learning_rate": 6.963325493982429e-06, + "loss": 82.6955, + "step": 188350 + }, + { + "epoch": 0.7609982344647034, + "grad_norm": 525.806640625, + "learning_rate": 6.961207875092682e-06, + "loss": 76.9583, + "step": 188360 + }, + { + "epoch": 0.761038635730071, + "grad_norm": 481.15936279296875, + "learning_rate": 6.95909051040091e-06, + "loss": 46.5968, + "step": 188370 + }, + { + "epoch": 0.7610790369954387, + "grad_norm": 585.7222900390625, + "learning_rate": 6.9569733999483764e-06, + "loss": 90.5514, + "step": 188380 + }, + { + "epoch": 0.7611194382608063, + "grad_norm": 819.1632080078125, + "learning_rate": 6.954856543776363e-06, + "loss": 109.6331, + "step": 188390 + }, + { + "epoch": 0.7611598395261739, + "grad_norm": 1226.65771484375, + "learning_rate": 6.952739941926144e-06, + "loss": 72.1336, + "step": 188400 + }, + { + "epoch": 0.7612002407915416, + "grad_norm": 641.413330078125, + "learning_rate": 6.9506235944389696e-06, + "loss": 59.7747, + "step": 188410 + }, + { + "epoch": 0.7612406420569092, + "grad_norm": 801.3861083984375, + "learning_rate": 6.948507501356106e-06, + "loss": 75.3276, + "step": 188420 + }, + { + "epoch": 0.7612810433222769, + "grad_norm": 585.097412109375, + "learning_rate": 6.946391662718808e-06, + "loss": 48.6197, + "step": 188430 + }, + { + "epoch": 0.7613214445876445, + "grad_norm": 519.8457641601562, + "learning_rate": 6.9442760785683265e-06, + "loss": 82.9684, + "step": 188440 + }, + { + "epoch": 0.7613618458530121, + "grad_norm": 406.70404052734375, + "learning_rate": 6.942160748945903e-06, + "loss": 95.2191, + "step": 188450 + }, + { + "epoch": 0.7614022471183798, + "grad_norm": 622.1261596679688, + "learning_rate": 6.940045673892786e-06, + "loss": 88.9355, + "step": 188460 + }, + { + "epoch": 0.7614426483837474, + "grad_norm": 440.8005065917969, + "learning_rate": 6.937930853450195e-06, + "loss": 48.4879, + "step": 188470 + }, + { + "epoch": 0.7614830496491151, + "grad_norm": 550.2733764648438, + "learning_rate": 6.935816287659367e-06, + "loss": 72.4389, + "step": 188480 + }, + { + "epoch": 0.7615234509144826, + "grad_norm": 1830.2081298828125, + "learning_rate": 6.933701976561524e-06, + "loss": 62.6602, + "step": 188490 + }, + { + "epoch": 0.7615638521798502, + "grad_norm": 318.3728942871094, + "learning_rate": 6.931587920197891e-06, + "loss": 59.8625, + "step": 188500 + }, + { + "epoch": 0.7616042534452179, + "grad_norm": 414.15582275390625, + "learning_rate": 6.929474118609685e-06, + "loss": 79.778, + "step": 188510 + }, + { + "epoch": 0.7616446547105855, + "grad_norm": 496.21124267578125, + "learning_rate": 6.927360571838098e-06, + "loss": 66.8492, + "step": 188520 + }, + { + "epoch": 0.7616850559759532, + "grad_norm": 1154.761962890625, + "learning_rate": 6.925247279924359e-06, + "loss": 72.1701, + "step": 188530 + }, + { + "epoch": 0.7617254572413208, + "grad_norm": 936.3662719726562, + "learning_rate": 6.92313424290965e-06, + "loss": 129.3364, + "step": 188540 + }, + { + "epoch": 0.7617658585066884, + "grad_norm": 626.2116088867188, + "learning_rate": 6.921021460835171e-06, + "loss": 41.1317, + "step": 188550 + }, + { + "epoch": 0.7618062597720561, + "grad_norm": 213.8795166015625, + "learning_rate": 6.918908933742112e-06, + "loss": 74.0037, + "step": 188560 + }, + { + "epoch": 0.7618466610374237, + "grad_norm": 2638.8154296875, + "learning_rate": 6.916796661671665e-06, + "loss": 78.7061, + "step": 188570 + }, + { + "epoch": 0.7618870623027914, + "grad_norm": 620.4100952148438, + "learning_rate": 6.914684644664995e-06, + "loss": 48.4882, + "step": 188580 + }, + { + "epoch": 0.761927463568159, + "grad_norm": 998.7606811523438, + "learning_rate": 6.9125728827632845e-06, + "loss": 79.5008, + "step": 188590 + }, + { + "epoch": 0.7619678648335266, + "grad_norm": 990.1956176757812, + "learning_rate": 6.910461376007704e-06, + "loss": 76.8525, + "step": 188600 + }, + { + "epoch": 0.7620082660988943, + "grad_norm": 968.5269165039062, + "learning_rate": 6.90835012443942e-06, + "loss": 71.1551, + "step": 188610 + }, + { + "epoch": 0.7620486673642618, + "grad_norm": 657.8392944335938, + "learning_rate": 6.906239128099595e-06, + "loss": 58.0753, + "step": 188620 + }, + { + "epoch": 0.7620890686296294, + "grad_norm": 594.7510986328125, + "learning_rate": 6.904128387029365e-06, + "loss": 69.5069, + "step": 188630 + }, + { + "epoch": 0.7621294698949971, + "grad_norm": 895.2841796875, + "learning_rate": 6.902017901269911e-06, + "loss": 81.9961, + "step": 188640 + }, + { + "epoch": 0.7621698711603647, + "grad_norm": 1072.6514892578125, + "learning_rate": 6.899907670862351e-06, + "loss": 69.5183, + "step": 188650 + }, + { + "epoch": 0.7622102724257324, + "grad_norm": 547.6685791015625, + "learning_rate": 6.897797695847837e-06, + "loss": 68.5294, + "step": 188660 + }, + { + "epoch": 0.7622506736911, + "grad_norm": 918.8134155273438, + "learning_rate": 6.8956879762675086e-06, + "loss": 55.8407, + "step": 188670 + }, + { + "epoch": 0.7622910749564676, + "grad_norm": 748.0855712890625, + "learning_rate": 6.893578512162477e-06, + "loss": 74.4757, + "step": 188680 + }, + { + "epoch": 0.7623314762218353, + "grad_norm": 373.9626159667969, + "learning_rate": 6.891469303573892e-06, + "loss": 89.1337, + "step": 188690 + }, + { + "epoch": 0.7623718774872029, + "grad_norm": 786.5165405273438, + "learning_rate": 6.8893603505428575e-06, + "loss": 65.9773, + "step": 188700 + }, + { + "epoch": 0.7624122787525706, + "grad_norm": 731.5614013671875, + "learning_rate": 6.8872516531104895e-06, + "loss": 74.2133, + "step": 188710 + }, + { + "epoch": 0.7624526800179382, + "grad_norm": 367.6202087402344, + "learning_rate": 6.885143211317904e-06, + "loss": 57.1697, + "step": 188720 + }, + { + "epoch": 0.7624930812833058, + "grad_norm": 2016.620849609375, + "learning_rate": 6.883035025206202e-06, + "loss": 101.0345, + "step": 188730 + }, + { + "epoch": 0.7625334825486734, + "grad_norm": 665.6162109375, + "learning_rate": 6.880927094816494e-06, + "loss": 53.0782, + "step": 188740 + }, + { + "epoch": 0.762573883814041, + "grad_norm": 674.1903076171875, + "learning_rate": 6.878819420189857e-06, + "loss": 70.2041, + "step": 188750 + }, + { + "epoch": 0.7626142850794086, + "grad_norm": 606.2689819335938, + "learning_rate": 6.876712001367394e-06, + "loss": 83.0372, + "step": 188760 + }, + { + "epoch": 0.7626546863447763, + "grad_norm": 920.287841796875, + "learning_rate": 6.874604838390184e-06, + "loss": 61.2145, + "step": 188770 + }, + { + "epoch": 0.7626950876101439, + "grad_norm": 388.3805236816406, + "learning_rate": 6.872497931299318e-06, + "loss": 59.1532, + "step": 188780 + }, + { + "epoch": 0.7627354888755116, + "grad_norm": 232.0879364013672, + "learning_rate": 6.870391280135849e-06, + "loss": 75.4873, + "step": 188790 + }, + { + "epoch": 0.7627758901408792, + "grad_norm": 513.8314819335938, + "learning_rate": 6.868284884940875e-06, + "loss": 52.8679, + "step": 188800 + }, + { + "epoch": 0.7628162914062468, + "grad_norm": 722.6613159179688, + "learning_rate": 6.866178745755443e-06, + "loss": 71.3281, + "step": 188810 + }, + { + "epoch": 0.7628566926716145, + "grad_norm": 892.8251342773438, + "learning_rate": 6.864072862620617e-06, + "loss": 62.9017, + "step": 188820 + }, + { + "epoch": 0.7628970939369821, + "grad_norm": 643.6943359375, + "learning_rate": 6.8619672355774515e-06, + "loss": 67.78, + "step": 188830 + }, + { + "epoch": 0.7629374952023498, + "grad_norm": 483.09857177734375, + "learning_rate": 6.859861864667001e-06, + "loss": 86.7315, + "step": 188840 + }, + { + "epoch": 0.7629778964677174, + "grad_norm": 694.5239868164062, + "learning_rate": 6.8577567499303136e-06, + "loss": 80.9442, + "step": 188850 + }, + { + "epoch": 0.763018297733085, + "grad_norm": 966.896484375, + "learning_rate": 6.855651891408419e-06, + "loss": 86.8298, + "step": 188860 + }, + { + "epoch": 0.7630586989984526, + "grad_norm": 612.2806396484375, + "learning_rate": 6.853547289142357e-06, + "loss": 67.0797, + "step": 188870 + }, + { + "epoch": 0.7630991002638202, + "grad_norm": 832.1422119140625, + "learning_rate": 6.851442943173161e-06, + "loss": 72.1476, + "step": 188880 + }, + { + "epoch": 0.7631395015291879, + "grad_norm": 1116.2589111328125, + "learning_rate": 6.849338853541851e-06, + "loss": 73.4718, + "step": 188890 + }, + { + "epoch": 0.7631799027945555, + "grad_norm": 669.2181396484375, + "learning_rate": 6.847235020289453e-06, + "loss": 61.8296, + "step": 188900 + }, + { + "epoch": 0.7632203040599231, + "grad_norm": 710.724853515625, + "learning_rate": 6.845131443456985e-06, + "loss": 63.6349, + "step": 188910 + }, + { + "epoch": 0.7632607053252908, + "grad_norm": 937.5764770507812, + "learning_rate": 6.843028123085447e-06, + "loss": 66.8481, + "step": 188920 + }, + { + "epoch": 0.7633011065906584, + "grad_norm": 565.7047729492188, + "learning_rate": 6.840925059215846e-06, + "loss": 59.8162, + "step": 188930 + }, + { + "epoch": 0.7633415078560261, + "grad_norm": 456.9344177246094, + "learning_rate": 6.8388222518891965e-06, + "loss": 55.9066, + "step": 188940 + }, + { + "epoch": 0.7633819091213937, + "grad_norm": 438.3843688964844, + "learning_rate": 6.836719701146468e-06, + "loss": 82.3441, + "step": 188950 + }, + { + "epoch": 0.7634223103867613, + "grad_norm": 949.9837036132812, + "learning_rate": 6.834617407028679e-06, + "loss": 105.9829, + "step": 188960 + }, + { + "epoch": 0.763462711652129, + "grad_norm": 813.074462890625, + "learning_rate": 6.832515369576787e-06, + "loss": 106.4736, + "step": 188970 + }, + { + "epoch": 0.7635031129174966, + "grad_norm": 483.5528259277344, + "learning_rate": 6.830413588831801e-06, + "loss": 52.9557, + "step": 188980 + }, + { + "epoch": 0.7635435141828643, + "grad_norm": 970.957763671875, + "learning_rate": 6.8283120648346765e-06, + "loss": 68.6692, + "step": 188990 + }, + { + "epoch": 0.7635839154482318, + "grad_norm": 555.3351440429688, + "learning_rate": 6.826210797626389e-06, + "loss": 49.3143, + "step": 189000 + }, + { + "epoch": 0.7636243167135994, + "grad_norm": 836.4052734375, + "learning_rate": 6.824109787247913e-06, + "loss": 84.6562, + "step": 189010 + }, + { + "epoch": 0.7636647179789671, + "grad_norm": 594.2843017578125, + "learning_rate": 6.822009033740191e-06, + "loss": 53.856, + "step": 189020 + }, + { + "epoch": 0.7637051192443347, + "grad_norm": 341.9842834472656, + "learning_rate": 6.819908537144188e-06, + "loss": 50.3893, + "step": 189030 + }, + { + "epoch": 0.7637455205097023, + "grad_norm": 858.8280029296875, + "learning_rate": 6.817808297500854e-06, + "loss": 59.8233, + "step": 189040 + }, + { + "epoch": 0.76378592177507, + "grad_norm": 1010.714599609375, + "learning_rate": 6.815708314851133e-06, + "loss": 79.4481, + "step": 189050 + }, + { + "epoch": 0.7638263230404376, + "grad_norm": 727.966064453125, + "learning_rate": 6.813608589235967e-06, + "loss": 63.3032, + "step": 189060 + }, + { + "epoch": 0.7638667243058053, + "grad_norm": 489.0967102050781, + "learning_rate": 6.811509120696296e-06, + "loss": 48.6363, + "step": 189070 + }, + { + "epoch": 0.7639071255711729, + "grad_norm": 721.1561279296875, + "learning_rate": 6.809409909273039e-06, + "loss": 48.1504, + "step": 189080 + }, + { + "epoch": 0.7639475268365405, + "grad_norm": 1203.9013671875, + "learning_rate": 6.807310955007127e-06, + "loss": 60.3027, + "step": 189090 + }, + { + "epoch": 0.7639879281019082, + "grad_norm": 798.1652221679688, + "learning_rate": 6.805212257939479e-06, + "loss": 67.114, + "step": 189100 + }, + { + "epoch": 0.7640283293672758, + "grad_norm": 1156.73046875, + "learning_rate": 6.803113818111011e-06, + "loss": 77.292, + "step": 189110 + }, + { + "epoch": 0.7640687306326435, + "grad_norm": 1025.5556640625, + "learning_rate": 6.801015635562642e-06, + "loss": 76.9283, + "step": 189120 + }, + { + "epoch": 0.764109131898011, + "grad_norm": 1084.420654296875, + "learning_rate": 6.7989177103352534e-06, + "loss": 96.6411, + "step": 189130 + }, + { + "epoch": 0.7641495331633786, + "grad_norm": 744.471435546875, + "learning_rate": 6.796820042469774e-06, + "loss": 58.3563, + "step": 189140 + }, + { + "epoch": 0.7641899344287463, + "grad_norm": 649.8134765625, + "learning_rate": 6.794722632007078e-06, + "loss": 79.3432, + "step": 189150 + }, + { + "epoch": 0.7642303356941139, + "grad_norm": 561.8554077148438, + "learning_rate": 6.792625478988064e-06, + "loss": 76.2612, + "step": 189160 + }, + { + "epoch": 0.7642707369594816, + "grad_norm": 702.2109985351562, + "learning_rate": 6.790528583453615e-06, + "loss": 57.7148, + "step": 189170 + }, + { + "epoch": 0.7643111382248492, + "grad_norm": 781.2349243164062, + "learning_rate": 6.788431945444614e-06, + "loss": 70.3182, + "step": 189180 + }, + { + "epoch": 0.7643515394902168, + "grad_norm": 1899.4051513671875, + "learning_rate": 6.786335565001938e-06, + "loss": 69.312, + "step": 189190 + }, + { + "epoch": 0.7643919407555845, + "grad_norm": 755.2907104492188, + "learning_rate": 6.784239442166447e-06, + "loss": 61.8662, + "step": 189200 + }, + { + "epoch": 0.7644323420209521, + "grad_norm": 545.4260864257812, + "learning_rate": 6.782143576979012e-06, + "loss": 80.9013, + "step": 189210 + }, + { + "epoch": 0.7644727432863198, + "grad_norm": 673.3159790039062, + "learning_rate": 6.780047969480494e-06, + "loss": 88.814, + "step": 189220 + }, + { + "epoch": 0.7645131445516874, + "grad_norm": 607.2380981445312, + "learning_rate": 6.777952619711754e-06, + "loss": 79.4492, + "step": 189230 + }, + { + "epoch": 0.764553545817055, + "grad_norm": 303.07989501953125, + "learning_rate": 6.775857527713621e-06, + "loss": 69.5376, + "step": 189240 + }, + { + "epoch": 0.7645939470824227, + "grad_norm": 549.315185546875, + "learning_rate": 6.773762693526967e-06, + "loss": 57.0193, + "step": 189250 + }, + { + "epoch": 0.7646343483477902, + "grad_norm": 608.8783569335938, + "learning_rate": 6.771668117192611e-06, + "loss": 75.7486, + "step": 189260 + }, + { + "epoch": 0.7646747496131578, + "grad_norm": 380.8100891113281, + "learning_rate": 6.769573798751394e-06, + "loss": 46.7195, + "step": 189270 + }, + { + "epoch": 0.7647151508785255, + "grad_norm": 1674.7188720703125, + "learning_rate": 6.767479738244158e-06, + "loss": 65.9049, + "step": 189280 + }, + { + "epoch": 0.7647555521438931, + "grad_norm": 1150.0416259765625, + "learning_rate": 6.765385935711702e-06, + "loss": 75.715, + "step": 189290 + }, + { + "epoch": 0.7647959534092608, + "grad_norm": 1280.1080322265625, + "learning_rate": 6.763292391194874e-06, + "loss": 67.8313, + "step": 189300 + }, + { + "epoch": 0.7648363546746284, + "grad_norm": 290.21417236328125, + "learning_rate": 6.761199104734468e-06, + "loss": 61.0676, + "step": 189310 + }, + { + "epoch": 0.764876755939996, + "grad_norm": 597.0133056640625, + "learning_rate": 6.759106076371303e-06, + "loss": 55.0202, + "step": 189320 + }, + { + "epoch": 0.7649171572053637, + "grad_norm": 851.4161987304688, + "learning_rate": 6.757013306146178e-06, + "loss": 59.8131, + "step": 189330 + }, + { + "epoch": 0.7649575584707313, + "grad_norm": 1095.5576171875, + "learning_rate": 6.754920794099902e-06, + "loss": 92.6023, + "step": 189340 + }, + { + "epoch": 0.764997959736099, + "grad_norm": 860.0962524414062, + "learning_rate": 6.752828540273266e-06, + "loss": 69.1045, + "step": 189350 + }, + { + "epoch": 0.7650383610014666, + "grad_norm": 638.0568237304688, + "learning_rate": 6.750736544707055e-06, + "loss": 69.0989, + "step": 189360 + }, + { + "epoch": 0.7650787622668342, + "grad_norm": 681.0977783203125, + "learning_rate": 6.748644807442053e-06, + "loss": 87.2004, + "step": 189370 + }, + { + "epoch": 0.7651191635322018, + "grad_norm": 511.8110656738281, + "learning_rate": 6.746553328519046e-06, + "loss": 66.3888, + "step": 189380 + }, + { + "epoch": 0.7651595647975694, + "grad_norm": 596.735107421875, + "learning_rate": 6.744462107978809e-06, + "loss": 70.6137, + "step": 189390 + }, + { + "epoch": 0.765199966062937, + "grad_norm": 1632.048828125, + "learning_rate": 6.7423711458620964e-06, + "loss": 87.8455, + "step": 189400 + }, + { + "epoch": 0.7652403673283047, + "grad_norm": 1130.7247314453125, + "learning_rate": 6.7402804422096945e-06, + "loss": 79.6602, + "step": 189410 + }, + { + "epoch": 0.7652807685936723, + "grad_norm": 353.8612976074219, + "learning_rate": 6.738189997062348e-06, + "loss": 72.5685, + "step": 189420 + }, + { + "epoch": 0.76532116985904, + "grad_norm": 483.01898193359375, + "learning_rate": 6.7360998104608144e-06, + "loss": 54.4754, + "step": 189430 + }, + { + "epoch": 0.7653615711244076, + "grad_norm": 583.0953369140625, + "learning_rate": 6.734009882445842e-06, + "loss": 84.1692, + "step": 189440 + }, + { + "epoch": 0.7654019723897753, + "grad_norm": 426.40863037109375, + "learning_rate": 6.731920213058179e-06, + "loss": 40.9038, + "step": 189450 + }, + { + "epoch": 0.7654423736551429, + "grad_norm": 1110.40869140625, + "learning_rate": 6.7298308023385684e-06, + "loss": 77.0272, + "step": 189460 + }, + { + "epoch": 0.7654827749205105, + "grad_norm": 1249.0963134765625, + "learning_rate": 6.727741650327731e-06, + "loss": 67.2977, + "step": 189470 + }, + { + "epoch": 0.7655231761858782, + "grad_norm": 377.71136474609375, + "learning_rate": 6.7256527570664034e-06, + "loss": 52.4784, + "step": 189480 + }, + { + "epoch": 0.7655635774512458, + "grad_norm": 678.1589965820312, + "learning_rate": 6.7235641225953095e-06, + "loss": 91.8264, + "step": 189490 + }, + { + "epoch": 0.7656039787166135, + "grad_norm": 1279.087890625, + "learning_rate": 6.721475746955168e-06, + "loss": 59.4189, + "step": 189500 + }, + { + "epoch": 0.765644379981981, + "grad_norm": 266.1073303222656, + "learning_rate": 6.7193876301866935e-06, + "loss": 67.372, + "step": 189510 + }, + { + "epoch": 0.7656847812473486, + "grad_norm": 318.35845947265625, + "learning_rate": 6.717299772330601e-06, + "loss": 104.356, + "step": 189520 + }, + { + "epoch": 0.7657251825127163, + "grad_norm": 580.206787109375, + "learning_rate": 6.715212173427581e-06, + "loss": 47.1984, + "step": 189530 + }, + { + "epoch": 0.7657655837780839, + "grad_norm": 475.68560791015625, + "learning_rate": 6.713124833518338e-06, + "loss": 82.8596, + "step": 189540 + }, + { + "epoch": 0.7658059850434515, + "grad_norm": 486.5301208496094, + "learning_rate": 6.711037752643574e-06, + "loss": 64.5186, + "step": 189550 + }, + { + "epoch": 0.7658463863088192, + "grad_norm": 582.0872192382812, + "learning_rate": 6.708950930843958e-06, + "loss": 73.6277, + "step": 189560 + }, + { + "epoch": 0.7658867875741868, + "grad_norm": 941.3411254882812, + "learning_rate": 6.7068643681602e-06, + "loss": 82.727, + "step": 189570 + }, + { + "epoch": 0.7659271888395545, + "grad_norm": 621.3583984375, + "learning_rate": 6.704778064632953e-06, + "loss": 60.6301, + "step": 189580 + }, + { + "epoch": 0.7659675901049221, + "grad_norm": 554.7158813476562, + "learning_rate": 6.702692020302912e-06, + "loss": 71.7657, + "step": 189590 + }, + { + "epoch": 0.7660079913702897, + "grad_norm": 890.2998657226562, + "learning_rate": 6.700606235210731e-06, + "loss": 88.768, + "step": 189600 + }, + { + "epoch": 0.7660483926356574, + "grad_norm": 1169.2220458984375, + "learning_rate": 6.6985207093970805e-06, + "loss": 67.6145, + "step": 189610 + }, + { + "epoch": 0.766088793901025, + "grad_norm": 543.5916748046875, + "learning_rate": 6.6964354429026224e-06, + "loss": 68.9486, + "step": 189620 + }, + { + "epoch": 0.7661291951663927, + "grad_norm": 690.3978881835938, + "learning_rate": 6.694350435767989e-06, + "loss": 75.8305, + "step": 189630 + }, + { + "epoch": 0.7661695964317602, + "grad_norm": 343.31427001953125, + "learning_rate": 6.692265688033859e-06, + "loss": 55.134, + "step": 189640 + }, + { + "epoch": 0.7662099976971278, + "grad_norm": 1550.248046875, + "learning_rate": 6.690181199740855e-06, + "loss": 100.306, + "step": 189650 + }, + { + "epoch": 0.7662503989624955, + "grad_norm": 622.28369140625, + "learning_rate": 6.688096970929618e-06, + "loss": 50.3057, + "step": 189660 + }, + { + "epoch": 0.7662908002278631, + "grad_norm": 294.98419189453125, + "learning_rate": 6.686013001640787e-06, + "loss": 60.2076, + "step": 189670 + }, + { + "epoch": 0.7663312014932308, + "grad_norm": 376.04046630859375, + "learning_rate": 6.683929291914992e-06, + "loss": 53.8187, + "step": 189680 + }, + { + "epoch": 0.7663716027585984, + "grad_norm": 542.3377685546875, + "learning_rate": 6.681845841792845e-06, + "loss": 62.1366, + "step": 189690 + }, + { + "epoch": 0.766412004023966, + "grad_norm": 974.8051147460938, + "learning_rate": 6.679762651314969e-06, + "loss": 61.471, + "step": 189700 + }, + { + "epoch": 0.7664524052893337, + "grad_norm": 465.47607421875, + "learning_rate": 6.677679720521979e-06, + "loss": 65.2075, + "step": 189710 + }, + { + "epoch": 0.7664928065547013, + "grad_norm": 757.8418579101562, + "learning_rate": 6.6755970494544805e-06, + "loss": 75.0754, + "step": 189720 + }, + { + "epoch": 0.766533207820069, + "grad_norm": 580.18408203125, + "learning_rate": 6.6735146381530865e-06, + "loss": 70.4301, + "step": 189730 + }, + { + "epoch": 0.7665736090854366, + "grad_norm": 924.4154663085938, + "learning_rate": 6.671432486658371e-06, + "loss": 50.1199, + "step": 189740 + }, + { + "epoch": 0.7666140103508042, + "grad_norm": 522.9467163085938, + "learning_rate": 6.669350595010953e-06, + "loss": 61.8218, + "step": 189750 + }, + { + "epoch": 0.7666544116161719, + "grad_norm": 918.3714599609375, + "learning_rate": 6.6672689632514035e-06, + "loss": 61.632, + "step": 189760 + }, + { + "epoch": 0.7666948128815394, + "grad_norm": 699.2474975585938, + "learning_rate": 6.665187591420308e-06, + "loss": 60.5899, + "step": 189770 + }, + { + "epoch": 0.766735214146907, + "grad_norm": 1241.8499755859375, + "learning_rate": 6.663106479558248e-06, + "loss": 75.2482, + "step": 189780 + }, + { + "epoch": 0.7667756154122747, + "grad_norm": 461.50079345703125, + "learning_rate": 6.66102562770579e-06, + "loss": 50.7787, + "step": 189790 + }, + { + "epoch": 0.7668160166776423, + "grad_norm": 808.5877685546875, + "learning_rate": 6.658945035903515e-06, + "loss": 95.637, + "step": 189800 + }, + { + "epoch": 0.76685641794301, + "grad_norm": 861.9600219726562, + "learning_rate": 6.656864704191965e-06, + "loss": 47.9792, + "step": 189810 + }, + { + "epoch": 0.7668968192083776, + "grad_norm": 384.26605224609375, + "learning_rate": 6.654784632611711e-06, + "loss": 57.8752, + "step": 189820 + }, + { + "epoch": 0.7669372204737452, + "grad_norm": 684.581787109375, + "learning_rate": 6.6527048212032975e-06, + "loss": 51.5964, + "step": 189830 + }, + { + "epoch": 0.7669776217391129, + "grad_norm": 742.0848388671875, + "learning_rate": 6.650625270007283e-06, + "loss": 72.8324, + "step": 189840 + }, + { + "epoch": 0.7670180230044805, + "grad_norm": 407.1302490234375, + "learning_rate": 6.648545979064189e-06, + "loss": 67.2055, + "step": 189850 + }, + { + "epoch": 0.7670584242698482, + "grad_norm": 367.6036071777344, + "learning_rate": 6.646466948414578e-06, + "loss": 115.5097, + "step": 189860 + }, + { + "epoch": 0.7670988255352158, + "grad_norm": 545.5615234375, + "learning_rate": 6.644388178098962e-06, + "loss": 76.9488, + "step": 189870 + }, + { + "epoch": 0.7671392268005834, + "grad_norm": 737.835693359375, + "learning_rate": 6.642309668157873e-06, + "loss": 85.902, + "step": 189880 + }, + { + "epoch": 0.7671796280659511, + "grad_norm": 410.6335144042969, + "learning_rate": 6.6402314186318415e-06, + "loss": 79.4609, + "step": 189890 + }, + { + "epoch": 0.7672200293313186, + "grad_norm": 160.69558715820312, + "learning_rate": 6.638153429561365e-06, + "loss": 91.1432, + "step": 189900 + }, + { + "epoch": 0.7672604305966862, + "grad_norm": 674.5972900390625, + "learning_rate": 6.636075700986979e-06, + "loss": 56.1305, + "step": 189910 + }, + { + "epoch": 0.7673008318620539, + "grad_norm": 957.2105712890625, + "learning_rate": 6.633998232949171e-06, + "loss": 71.5634, + "step": 189920 + }, + { + "epoch": 0.7673412331274215, + "grad_norm": 697.7079467773438, + "learning_rate": 6.631921025488446e-06, + "loss": 47.8747, + "step": 189930 + }, + { + "epoch": 0.7673816343927892, + "grad_norm": 344.3099670410156, + "learning_rate": 6.629844078645304e-06, + "loss": 95.0805, + "step": 189940 + }, + { + "epoch": 0.7674220356581568, + "grad_norm": 472.14678955078125, + "learning_rate": 6.6277673924602385e-06, + "loss": 44.9424, + "step": 189950 + }, + { + "epoch": 0.7674624369235244, + "grad_norm": 891.1736450195312, + "learning_rate": 6.625690966973734e-06, + "loss": 83.7155, + "step": 189960 + }, + { + "epoch": 0.7675028381888921, + "grad_norm": 354.3487548828125, + "learning_rate": 6.623614802226266e-06, + "loss": 53.2805, + "step": 189970 + }, + { + "epoch": 0.7675432394542597, + "grad_norm": 872.1060791015625, + "learning_rate": 6.621538898258315e-06, + "loss": 72.8829, + "step": 189980 + }, + { + "epoch": 0.7675836407196274, + "grad_norm": 274.9184875488281, + "learning_rate": 6.619463255110347e-06, + "loss": 72.169, + "step": 189990 + }, + { + "epoch": 0.767624041984995, + "grad_norm": 462.15374755859375, + "learning_rate": 6.617387872822842e-06, + "loss": 60.075, + "step": 190000 + }, + { + "epoch": 0.7676644432503626, + "grad_norm": 615.9740600585938, + "learning_rate": 6.615312751436236e-06, + "loss": 71.0631, + "step": 190010 + }, + { + "epoch": 0.7677048445157302, + "grad_norm": 422.23480224609375, + "learning_rate": 6.613237890991011e-06, + "loss": 82.6971, + "step": 190020 + }, + { + "epoch": 0.7677452457810978, + "grad_norm": 385.18896484375, + "learning_rate": 6.6111632915276e-06, + "loss": 74.8147, + "step": 190030 + }, + { + "epoch": 0.7677856470464655, + "grad_norm": 1042.2801513671875, + "learning_rate": 6.609088953086453e-06, + "loss": 53.0451, + "step": 190040 + }, + { + "epoch": 0.7678260483118331, + "grad_norm": 554.4845581054688, + "learning_rate": 6.607014875708017e-06, + "loss": 69.0554, + "step": 190050 + }, + { + "epoch": 0.7678664495772007, + "grad_norm": 461.3852844238281, + "learning_rate": 6.604941059432708e-06, + "loss": 76.6464, + "step": 190060 + }, + { + "epoch": 0.7679068508425684, + "grad_norm": 717.5750122070312, + "learning_rate": 6.602867504300983e-06, + "loss": 88.9784, + "step": 190070 + }, + { + "epoch": 0.767947252107936, + "grad_norm": 757.75732421875, + "learning_rate": 6.600794210353248e-06, + "loss": 61.6036, + "step": 190080 + }, + { + "epoch": 0.7679876533733037, + "grad_norm": 746.795166015625, + "learning_rate": 6.598721177629926e-06, + "loss": 52.8891, + "step": 190090 + }, + { + "epoch": 0.7680280546386713, + "grad_norm": 864.9928588867188, + "learning_rate": 6.596648406171435e-06, + "loss": 79.2395, + "step": 190100 + }, + { + "epoch": 0.7680684559040389, + "grad_norm": 714.5169677734375, + "learning_rate": 6.594575896018185e-06, + "loss": 66.4727, + "step": 190110 + }, + { + "epoch": 0.7681088571694066, + "grad_norm": 7920.95068359375, + "learning_rate": 6.592503647210578e-06, + "loss": 75.8851, + "step": 190120 + }, + { + "epoch": 0.7681492584347742, + "grad_norm": 378.0105285644531, + "learning_rate": 6.590431659789021e-06, + "loss": 62.7346, + "step": 190130 + }, + { + "epoch": 0.7681896597001419, + "grad_norm": 638.637451171875, + "learning_rate": 6.5883599337938974e-06, + "loss": 72.6359, + "step": 190140 + }, + { + "epoch": 0.7682300609655094, + "grad_norm": 153.34121704101562, + "learning_rate": 6.5862884692656e-06, + "loss": 42.9558, + "step": 190150 + }, + { + "epoch": 0.768270462230877, + "grad_norm": 258.4450378417969, + "learning_rate": 6.5842172662445165e-06, + "loss": 48.7623, + "step": 190160 + }, + { + "epoch": 0.7683108634962447, + "grad_norm": 733.5274047851562, + "learning_rate": 6.5821463247710236e-06, + "loss": 52.5029, + "step": 190170 + }, + { + "epoch": 0.7683512647616123, + "grad_norm": 500.1925964355469, + "learning_rate": 6.580075644885504e-06, + "loss": 59.0043, + "step": 190180 + }, + { + "epoch": 0.76839166602698, + "grad_norm": 865.663818359375, + "learning_rate": 6.578005226628303e-06, + "loss": 49.6149, + "step": 190190 + }, + { + "epoch": 0.7684320672923476, + "grad_norm": 679.999755859375, + "learning_rate": 6.575935070039816e-06, + "loss": 33.3424, + "step": 190200 + }, + { + "epoch": 0.7684724685577152, + "grad_norm": 649.2726440429688, + "learning_rate": 6.573865175160377e-06, + "loss": 49.1295, + "step": 190210 + }, + { + "epoch": 0.7685128698230829, + "grad_norm": 548.59326171875, + "learning_rate": 6.5717955420303505e-06, + "loss": 75.6645, + "step": 190220 + }, + { + "epoch": 0.7685532710884505, + "grad_norm": 633.4735107421875, + "learning_rate": 6.569726170690089e-06, + "loss": 50.8107, + "step": 190230 + }, + { + "epoch": 0.7685936723538181, + "grad_norm": 1948.6717529296875, + "learning_rate": 6.567657061179917e-06, + "loss": 94.6202, + "step": 190240 + }, + { + "epoch": 0.7686340736191858, + "grad_norm": 461.013427734375, + "learning_rate": 6.565588213540199e-06, + "loss": 53.8193, + "step": 190250 + }, + { + "epoch": 0.7686744748845534, + "grad_norm": 713.9374389648438, + "learning_rate": 6.5635196278112476e-06, + "loss": 50.3379, + "step": 190260 + }, + { + "epoch": 0.7687148761499211, + "grad_norm": 718.1594848632812, + "learning_rate": 6.561451304033401e-06, + "loss": 71.8234, + "step": 190270 + }, + { + "epoch": 0.7687552774152886, + "grad_norm": 1004.2112426757812, + "learning_rate": 6.55938324224698e-06, + "loss": 55.6879, + "step": 190280 + }, + { + "epoch": 0.7687956786806562, + "grad_norm": 985.9564208984375, + "learning_rate": 6.557315442492307e-06, + "loss": 89.0765, + "step": 190290 + }, + { + "epoch": 0.7688360799460239, + "grad_norm": 889.1470336914062, + "learning_rate": 6.555247904809685e-06, + "loss": 92.8266, + "step": 190300 + }, + { + "epoch": 0.7688764812113915, + "grad_norm": 613.9267578125, + "learning_rate": 6.553180629239426e-06, + "loss": 73.294, + "step": 190310 + }, + { + "epoch": 0.7689168824767592, + "grad_norm": 517.21240234375, + "learning_rate": 6.5511136158218335e-06, + "loss": 56.3072, + "step": 190320 + }, + { + "epoch": 0.7689572837421268, + "grad_norm": 953.6690063476562, + "learning_rate": 6.549046864597206e-06, + "loss": 71.0792, + "step": 190330 + }, + { + "epoch": 0.7689976850074944, + "grad_norm": 765.76171875, + "learning_rate": 6.54698037560584e-06, + "loss": 98.5149, + "step": 190340 + }, + { + "epoch": 0.7690380862728621, + "grad_norm": 543.9337768554688, + "learning_rate": 6.544914148888006e-06, + "loss": 88.2649, + "step": 190350 + }, + { + "epoch": 0.7690784875382297, + "grad_norm": 623.8895874023438, + "learning_rate": 6.54284818448401e-06, + "loss": 80.4054, + "step": 190360 + }, + { + "epoch": 0.7691188888035974, + "grad_norm": 2251.6103515625, + "learning_rate": 6.540782482434109e-06, + "loss": 62.1923, + "step": 190370 + }, + { + "epoch": 0.769159290068965, + "grad_norm": 1066.6488037109375, + "learning_rate": 6.538717042778586e-06, + "loss": 69.9105, + "step": 190380 + }, + { + "epoch": 0.7691996913343326, + "grad_norm": 573.7947998046875, + "learning_rate": 6.536651865557708e-06, + "loss": 57.1547, + "step": 190390 + }, + { + "epoch": 0.7692400925997003, + "grad_norm": 498.8564758300781, + "learning_rate": 6.534586950811723e-06, + "loss": 63.1078, + "step": 190400 + }, + { + "epoch": 0.7692804938650678, + "grad_norm": 1875.03466796875, + "learning_rate": 6.53252229858091e-06, + "loss": 68.1868, + "step": 190410 + }, + { + "epoch": 0.7693208951304354, + "grad_norm": 927.0859375, + "learning_rate": 6.530457908905501e-06, + "loss": 69.0762, + "step": 190420 + }, + { + "epoch": 0.7693612963958031, + "grad_norm": 954.7362060546875, + "learning_rate": 6.528393781825751e-06, + "loss": 55.4591, + "step": 190430 + }, + { + "epoch": 0.7694016976611707, + "grad_norm": 1053.924072265625, + "learning_rate": 6.526329917381897e-06, + "loss": 100.1899, + "step": 190440 + }, + { + "epoch": 0.7694420989265384, + "grad_norm": 294.7392883300781, + "learning_rate": 6.524266315614187e-06, + "loss": 33.1186, + "step": 190450 + }, + { + "epoch": 0.769482500191906, + "grad_norm": 961.7716674804688, + "learning_rate": 6.52220297656283e-06, + "loss": 110.2929, + "step": 190460 + }, + { + "epoch": 0.7695229014572736, + "grad_norm": 1748.997314453125, + "learning_rate": 6.5201399002680766e-06, + "loss": 57.4576, + "step": 190470 + }, + { + "epoch": 0.7695633027226413, + "grad_norm": 939.7906494140625, + "learning_rate": 6.5180770867701296e-06, + "loss": 74.1702, + "step": 190480 + }, + { + "epoch": 0.7696037039880089, + "grad_norm": 461.5770263671875, + "learning_rate": 6.516014536109212e-06, + "loss": 45.2678, + "step": 190490 + }, + { + "epoch": 0.7696441052533766, + "grad_norm": 377.8082275390625, + "learning_rate": 6.513952248325539e-06, + "loss": 62.2947, + "step": 190500 + }, + { + "epoch": 0.7696845065187442, + "grad_norm": 608.4674072265625, + "learning_rate": 6.511890223459296e-06, + "loss": 58.2895, + "step": 190510 + }, + { + "epoch": 0.7697249077841118, + "grad_norm": 318.7868957519531, + "learning_rate": 6.509828461550711e-06, + "loss": 78.8968, + "step": 190520 + }, + { + "epoch": 0.7697653090494795, + "grad_norm": 1130.9185791015625, + "learning_rate": 6.507766962639959e-06, + "loss": 62.8123, + "step": 190530 + }, + { + "epoch": 0.769805710314847, + "grad_norm": 752.0926513671875, + "learning_rate": 6.505705726767235e-06, + "loss": 62.3554, + "step": 190540 + }, + { + "epoch": 0.7698461115802147, + "grad_norm": 271.91693115234375, + "learning_rate": 6.503644753972727e-06, + "loss": 45.4225, + "step": 190550 + }, + { + "epoch": 0.7698865128455823, + "grad_norm": 365.96527099609375, + "learning_rate": 6.501584044296611e-06, + "loss": 70.8228, + "step": 190560 + }, + { + "epoch": 0.7699269141109499, + "grad_norm": 435.7467346191406, + "learning_rate": 6.499523597779071e-06, + "loss": 87.1472, + "step": 190570 + }, + { + "epoch": 0.7699673153763176, + "grad_norm": 440.9698791503906, + "learning_rate": 6.497463414460263e-06, + "loss": 49.8014, + "step": 190580 + }, + { + "epoch": 0.7700077166416852, + "grad_norm": 881.2247924804688, + "learning_rate": 6.495403494380357e-06, + "loss": 77.6349, + "step": 190590 + }, + { + "epoch": 0.7700481179070529, + "grad_norm": 339.3438415527344, + "learning_rate": 6.493343837579511e-06, + "loss": 70.0535, + "step": 190600 + }, + { + "epoch": 0.7700885191724205, + "grad_norm": 747.866943359375, + "learning_rate": 6.49128444409789e-06, + "loss": 46.9609, + "step": 190610 + }, + { + "epoch": 0.7701289204377881, + "grad_norm": 1445.71826171875, + "learning_rate": 6.489225313975618e-06, + "loss": 74.1643, + "step": 190620 + }, + { + "epoch": 0.7701693217031558, + "grad_norm": 1179.379638671875, + "learning_rate": 6.487166447252868e-06, + "loss": 89.6389, + "step": 190630 + }, + { + "epoch": 0.7702097229685234, + "grad_norm": 669.6832885742188, + "learning_rate": 6.48510784396976e-06, + "loss": 111.6766, + "step": 190640 + }, + { + "epoch": 0.770250124233891, + "grad_norm": 1106.0050048828125, + "learning_rate": 6.48304950416643e-06, + "loss": 66.1637, + "step": 190650 + }, + { + "epoch": 0.7702905254992586, + "grad_norm": 621.4625244140625, + "learning_rate": 6.480991427883016e-06, + "loss": 67.7641, + "step": 190660 + }, + { + "epoch": 0.7703309267646262, + "grad_norm": 459.8868103027344, + "learning_rate": 6.478933615159624e-06, + "loss": 33.3347, + "step": 190670 + }, + { + "epoch": 0.7703713280299939, + "grad_norm": 646.8049926757812, + "learning_rate": 6.476876066036393e-06, + "loss": 49.8683, + "step": 190680 + }, + { + "epoch": 0.7704117292953615, + "grad_norm": 699.8062133789062, + "learning_rate": 6.474818780553412e-06, + "loss": 52.9341, + "step": 190690 + }, + { + "epoch": 0.7704521305607291, + "grad_norm": 339.10296630859375, + "learning_rate": 6.4727617587508164e-06, + "loss": 56.8448, + "step": 190700 + }, + { + "epoch": 0.7704925318260968, + "grad_norm": 368.4428405761719, + "learning_rate": 6.470705000668687e-06, + "loss": 82.027, + "step": 190710 + }, + { + "epoch": 0.7705329330914644, + "grad_norm": 434.3442687988281, + "learning_rate": 6.468648506347128e-06, + "loss": 64.9866, + "step": 190720 + }, + { + "epoch": 0.7705733343568321, + "grad_norm": 453.04534912109375, + "learning_rate": 6.46659227582624e-06, + "loss": 46.3064, + "step": 190730 + }, + { + "epoch": 0.7706137356221997, + "grad_norm": 734.4913330078125, + "learning_rate": 6.464536309146099e-06, + "loss": 50.1476, + "step": 190740 + }, + { + "epoch": 0.7706541368875673, + "grad_norm": 509.9897155761719, + "learning_rate": 6.4624806063467885e-06, + "loss": 63.4646, + "step": 190750 + }, + { + "epoch": 0.770694538152935, + "grad_norm": 570.4957885742188, + "learning_rate": 6.460425167468389e-06, + "loss": 84.9752, + "step": 190760 + }, + { + "epoch": 0.7707349394183026, + "grad_norm": 479.7458190917969, + "learning_rate": 6.45836999255097e-06, + "loss": 47.9135, + "step": 190770 + }, + { + "epoch": 0.7707753406836703, + "grad_norm": 1187.158447265625, + "learning_rate": 6.456315081634601e-06, + "loss": 64.4686, + "step": 190780 + }, + { + "epoch": 0.7708157419490378, + "grad_norm": 1028.2825927734375, + "learning_rate": 6.454260434759349e-06, + "loss": 73.399, + "step": 190790 + }, + { + "epoch": 0.7708561432144054, + "grad_norm": 466.76324462890625, + "learning_rate": 6.4522060519652574e-06, + "loss": 51.7793, + "step": 190800 + }, + { + "epoch": 0.7708965444797731, + "grad_norm": 586.8349609375, + "learning_rate": 6.450151933292384e-06, + "loss": 60.0963, + "step": 190810 + }, + { + "epoch": 0.7709369457451407, + "grad_norm": 573.009521484375, + "learning_rate": 6.448098078780773e-06, + "loss": 55.2995, + "step": 190820 + }, + { + "epoch": 0.7709773470105084, + "grad_norm": 991.2747802734375, + "learning_rate": 6.446044488470469e-06, + "loss": 62.1485, + "step": 190830 + }, + { + "epoch": 0.771017748275876, + "grad_norm": 449.85528564453125, + "learning_rate": 6.443991162401511e-06, + "loss": 118.2428, + "step": 190840 + }, + { + "epoch": 0.7710581495412436, + "grad_norm": 807.4923095703125, + "learning_rate": 6.441938100613909e-06, + "loss": 40.7691, + "step": 190850 + }, + { + "epoch": 0.7710985508066113, + "grad_norm": 304.4437561035156, + "learning_rate": 6.439885303147721e-06, + "loss": 52.984, + "step": 190860 + }, + { + "epoch": 0.7711389520719789, + "grad_norm": 623.5908203125, + "learning_rate": 6.4378327700429425e-06, + "loss": 49.9533, + "step": 190870 + }, + { + "epoch": 0.7711793533373466, + "grad_norm": 538.954833984375, + "learning_rate": 6.435780501339595e-06, + "loss": 80.6219, + "step": 190880 + }, + { + "epoch": 0.7712197546027142, + "grad_norm": 68.42324829101562, + "learning_rate": 6.433728497077692e-06, + "loss": 66.1329, + "step": 190890 + }, + { + "epoch": 0.7712601558680818, + "grad_norm": 586.590576171875, + "learning_rate": 6.431676757297241e-06, + "loss": 54.3464, + "step": 190900 + }, + { + "epoch": 0.7713005571334495, + "grad_norm": 925.2080688476562, + "learning_rate": 6.42962528203823e-06, + "loss": 80.0032, + "step": 190910 + }, + { + "epoch": 0.771340958398817, + "grad_norm": 773.5975341796875, + "learning_rate": 6.427574071340663e-06, + "loss": 73.0037, + "step": 190920 + }, + { + "epoch": 0.7713813596641846, + "grad_norm": 1005.4553833007812, + "learning_rate": 6.425523125244526e-06, + "loss": 100.5488, + "step": 190930 + }, + { + "epoch": 0.7714217609295523, + "grad_norm": 451.8177185058594, + "learning_rate": 6.4234724437898045e-06, + "loss": 73.3186, + "step": 190940 + }, + { + "epoch": 0.7714621621949199, + "grad_norm": 1423.0772705078125, + "learning_rate": 6.421422027016484e-06, + "loss": 74.8619, + "step": 190950 + }, + { + "epoch": 0.7715025634602876, + "grad_norm": 380.0577392578125, + "learning_rate": 6.419371874964517e-06, + "loss": 65.5312, + "step": 190960 + }, + { + "epoch": 0.7715429647256552, + "grad_norm": 875.1213989257812, + "learning_rate": 6.417321987673901e-06, + "loss": 68.5478, + "step": 190970 + }, + { + "epoch": 0.7715833659910228, + "grad_norm": 1299.07275390625, + "learning_rate": 6.415272365184582e-06, + "loss": 125.4197, + "step": 190980 + }, + { + "epoch": 0.7716237672563905, + "grad_norm": 620.374267578125, + "learning_rate": 6.4132230075365174e-06, + "loss": 72.9265, + "step": 190990 + }, + { + "epoch": 0.7716641685217581, + "grad_norm": 758.3084106445312, + "learning_rate": 6.411173914769675e-06, + "loss": 88.5917, + "step": 191000 + }, + { + "epoch": 0.7717045697871258, + "grad_norm": 639.29541015625, + "learning_rate": 6.40912508692398e-06, + "loss": 48.4248, + "step": 191010 + }, + { + "epoch": 0.7717449710524934, + "grad_norm": 632.3731079101562, + "learning_rate": 6.4070765240394e-06, + "loss": 61.1927, + "step": 191020 + }, + { + "epoch": 0.771785372317861, + "grad_norm": 714.8125610351562, + "learning_rate": 6.405028226155856e-06, + "loss": 62.1172, + "step": 191030 + }, + { + "epoch": 0.7718257735832287, + "grad_norm": 1295.268798828125, + "learning_rate": 6.402980193313286e-06, + "loss": 86.4561, + "step": 191040 + }, + { + "epoch": 0.7718661748485962, + "grad_norm": 609.6432495117188, + "learning_rate": 6.400932425551616e-06, + "loss": 78.1552, + "step": 191050 + }, + { + "epoch": 0.7719065761139638, + "grad_norm": 503.58160400390625, + "learning_rate": 6.398884922910777e-06, + "loss": 48.6493, + "step": 191060 + }, + { + "epoch": 0.7719469773793315, + "grad_norm": 911.647216796875, + "learning_rate": 6.396837685430673e-06, + "loss": 86.832, + "step": 191070 + }, + { + "epoch": 0.7719873786446991, + "grad_norm": 797.71533203125, + "learning_rate": 6.39479071315122e-06, + "loss": 81.5523, + "step": 191080 + }, + { + "epoch": 0.7720277799100668, + "grad_norm": 556.1776123046875, + "learning_rate": 6.392744006112328e-06, + "loss": 53.8265, + "step": 191090 + }, + { + "epoch": 0.7720681811754344, + "grad_norm": 655.4906616210938, + "learning_rate": 6.390697564353896e-06, + "loss": 47.4413, + "step": 191100 + }, + { + "epoch": 0.772108582440802, + "grad_norm": 539.3427124023438, + "learning_rate": 6.3886513879158276e-06, + "loss": 74.2423, + "step": 191110 + }, + { + "epoch": 0.7721489837061697, + "grad_norm": 1033.9366455078125, + "learning_rate": 6.386605476837997e-06, + "loss": 64.7274, + "step": 191120 + }, + { + "epoch": 0.7721893849715373, + "grad_norm": 2350.822998046875, + "learning_rate": 6.384559831160313e-06, + "loss": 79.3491, + "step": 191130 + }, + { + "epoch": 0.772229786236905, + "grad_norm": 564.466064453125, + "learning_rate": 6.3825144509226366e-06, + "loss": 52.2864, + "step": 191140 + }, + { + "epoch": 0.7722701875022726, + "grad_norm": 449.1656188964844, + "learning_rate": 6.380469336164854e-06, + "loss": 59.6453, + "step": 191150 + }, + { + "epoch": 0.7723105887676402, + "grad_norm": 1133.794921875, + "learning_rate": 6.378424486926833e-06, + "loss": 47.8476, + "step": 191160 + }, + { + "epoch": 0.7723509900330079, + "grad_norm": 472.85736083984375, + "learning_rate": 6.37637990324844e-06, + "loss": 54.5064, + "step": 191170 + }, + { + "epoch": 0.7723913912983754, + "grad_norm": 232.22286987304688, + "learning_rate": 6.3743355851695395e-06, + "loss": 65.9735, + "step": 191180 + }, + { + "epoch": 0.7724317925637431, + "grad_norm": 697.4982299804688, + "learning_rate": 6.372291532729977e-06, + "loss": 56.1236, + "step": 191190 + }, + { + "epoch": 0.7724721938291107, + "grad_norm": 632.9629516601562, + "learning_rate": 6.3702477459696065e-06, + "loss": 66.3784, + "step": 191200 + }, + { + "epoch": 0.7725125950944783, + "grad_norm": 710.5852661132812, + "learning_rate": 6.368204224928274e-06, + "loss": 82.8564, + "step": 191210 + }, + { + "epoch": 0.772552996359846, + "grad_norm": 1384.5115966796875, + "learning_rate": 6.366160969645818e-06, + "loss": 58.8928, + "step": 191220 + }, + { + "epoch": 0.7725933976252136, + "grad_norm": 990.8935546875, + "learning_rate": 6.364117980162072e-06, + "loss": 68.3125, + "step": 191230 + }, + { + "epoch": 0.7726337988905813, + "grad_norm": 1858.6026611328125, + "learning_rate": 6.362075256516873e-06, + "loss": 86.7325, + "step": 191240 + }, + { + "epoch": 0.7726742001559489, + "grad_norm": 671.0968627929688, + "learning_rate": 6.360032798750033e-06, + "loss": 64.0212, + "step": 191250 + }, + { + "epoch": 0.7727146014213165, + "grad_norm": 832.0399169921875, + "learning_rate": 6.357990606901374e-06, + "loss": 131.2876, + "step": 191260 + }, + { + "epoch": 0.7727550026866842, + "grad_norm": 488.19696044921875, + "learning_rate": 6.3559486810107195e-06, + "loss": 77.4147, + "step": 191270 + }, + { + "epoch": 0.7727954039520518, + "grad_norm": 739.49072265625, + "learning_rate": 6.3539070211178555e-06, + "loss": 64.446, + "step": 191280 + }, + { + "epoch": 0.7728358052174195, + "grad_norm": 495.807861328125, + "learning_rate": 6.351865627262614e-06, + "loss": 53.6028, + "step": 191290 + }, + { + "epoch": 0.772876206482787, + "grad_norm": 478.5067443847656, + "learning_rate": 6.349824499484764e-06, + "loss": 60.1334, + "step": 191300 + }, + { + "epoch": 0.7729166077481546, + "grad_norm": 447.1767883300781, + "learning_rate": 6.347783637824128e-06, + "loss": 39.9242, + "step": 191310 + }, + { + "epoch": 0.7729570090135223, + "grad_norm": 417.71917724609375, + "learning_rate": 6.345743042320472e-06, + "loss": 55.3933, + "step": 191320 + }, + { + "epoch": 0.7729974102788899, + "grad_norm": 642.5586547851562, + "learning_rate": 6.343702713013584e-06, + "loss": 79.8977, + "step": 191330 + }, + { + "epoch": 0.7730378115442575, + "grad_norm": 444.4623718261719, + "learning_rate": 6.3416626499432475e-06, + "loss": 74.616, + "step": 191340 + }, + { + "epoch": 0.7730782128096252, + "grad_norm": 826.3330688476562, + "learning_rate": 6.339622853149225e-06, + "loss": 60.1513, + "step": 191350 + }, + { + "epoch": 0.7731186140749928, + "grad_norm": 932.6246337890625, + "learning_rate": 6.3375833226712875e-06, + "loss": 52.1827, + "step": 191360 + }, + { + "epoch": 0.7731590153403605, + "grad_norm": 341.6733703613281, + "learning_rate": 6.335544058549197e-06, + "loss": 53.8966, + "step": 191370 + }, + { + "epoch": 0.7731994166057281, + "grad_norm": 1657.4544677734375, + "learning_rate": 6.333505060822709e-06, + "loss": 77.4989, + "step": 191380 + }, + { + "epoch": 0.7732398178710957, + "grad_norm": 812.90673828125, + "learning_rate": 6.331466329531577e-06, + "loss": 56.8515, + "step": 191390 + }, + { + "epoch": 0.7732802191364634, + "grad_norm": 1413.89404296875, + "learning_rate": 6.329427864715552e-06, + "loss": 69.7783, + "step": 191400 + }, + { + "epoch": 0.773320620401831, + "grad_norm": 1414.002197265625, + "learning_rate": 6.327389666414363e-06, + "loss": 51.1385, + "step": 191410 + }, + { + "epoch": 0.7733610216671987, + "grad_norm": 414.334716796875, + "learning_rate": 6.325351734667751e-06, + "loss": 49.5178, + "step": 191420 + }, + { + "epoch": 0.7734014229325662, + "grad_norm": 630.6682739257812, + "learning_rate": 6.323314069515447e-06, + "loss": 63.1528, + "step": 191430 + }, + { + "epoch": 0.7734418241979338, + "grad_norm": 814.5623779296875, + "learning_rate": 6.3212766709971765e-06, + "loss": 61.5502, + "step": 191440 + }, + { + "epoch": 0.7734822254633015, + "grad_norm": 450.4351501464844, + "learning_rate": 6.319239539152666e-06, + "loss": 84.7693, + "step": 191450 + }, + { + "epoch": 0.7735226267286691, + "grad_norm": 564.216064453125, + "learning_rate": 6.317202674021612e-06, + "loss": 63.5433, + "step": 191460 + }, + { + "epoch": 0.7735630279940368, + "grad_norm": 234.31298828125, + "learning_rate": 6.315166075643748e-06, + "loss": 62.6835, + "step": 191470 + }, + { + "epoch": 0.7736034292594044, + "grad_norm": 1019.1788330078125, + "learning_rate": 6.313129744058759e-06, + "loss": 136.2877, + "step": 191480 + }, + { + "epoch": 0.773643830524772, + "grad_norm": 742.2164306640625, + "learning_rate": 6.311093679306355e-06, + "loss": 74.0237, + "step": 191490 + }, + { + "epoch": 0.7736842317901397, + "grad_norm": 564.5493774414062, + "learning_rate": 6.3090578814262256e-06, + "loss": 50.1222, + "step": 191500 + }, + { + "epoch": 0.7737246330555073, + "grad_norm": 598.9107666015625, + "learning_rate": 6.307022350458068e-06, + "loss": 85.5209, + "step": 191510 + }, + { + "epoch": 0.773765034320875, + "grad_norm": 755.5479125976562, + "learning_rate": 6.304987086441554e-06, + "loss": 52.011, + "step": 191520 + }, + { + "epoch": 0.7738054355862426, + "grad_norm": 1030.6885986328125, + "learning_rate": 6.302952089416366e-06, + "loss": 86.9388, + "step": 191530 + }, + { + "epoch": 0.7738458368516102, + "grad_norm": 283.48211669921875, + "learning_rate": 6.300917359422178e-06, + "loss": 45.5439, + "step": 191540 + }, + { + "epoch": 0.7738862381169779, + "grad_norm": 1014.779541015625, + "learning_rate": 6.298882896498662e-06, + "loss": 61.4784, + "step": 191550 + }, + { + "epoch": 0.7739266393823454, + "grad_norm": 1113.964111328125, + "learning_rate": 6.296848700685483e-06, + "loss": 58.667, + "step": 191560 + }, + { + "epoch": 0.773967040647713, + "grad_norm": 364.2021789550781, + "learning_rate": 6.29481477202228e-06, + "loss": 55.5754, + "step": 191570 + }, + { + "epoch": 0.7740074419130807, + "grad_norm": 935.03271484375, + "learning_rate": 6.292781110548731e-06, + "loss": 47.2811, + "step": 191580 + }, + { + "epoch": 0.7740478431784483, + "grad_norm": 672.1116333007812, + "learning_rate": 6.290747716304464e-06, + "loss": 64.1227, + "step": 191590 + }, + { + "epoch": 0.774088244443816, + "grad_norm": 728.4430541992188, + "learning_rate": 6.28871458932913e-06, + "loss": 52.2764, + "step": 191600 + }, + { + "epoch": 0.7741286457091836, + "grad_norm": 427.00262451171875, + "learning_rate": 6.286681729662372e-06, + "loss": 65.0047, + "step": 191610 + }, + { + "epoch": 0.7741690469745512, + "grad_norm": 1073.3209228515625, + "learning_rate": 6.284649137343797e-06, + "loss": 111.2843, + "step": 191620 + }, + { + "epoch": 0.7742094482399189, + "grad_norm": 386.9932556152344, + "learning_rate": 6.282616812413065e-06, + "loss": 82.5071, + "step": 191630 + }, + { + "epoch": 0.7742498495052865, + "grad_norm": 582.89111328125, + "learning_rate": 6.280584754909771e-06, + "loss": 48.6536, + "step": 191640 + }, + { + "epoch": 0.7742902507706542, + "grad_norm": 683.801513671875, + "learning_rate": 6.278552964873541e-06, + "loss": 65.7422, + "step": 191650 + }, + { + "epoch": 0.7743306520360218, + "grad_norm": 1512.27783203125, + "learning_rate": 6.276521442343986e-06, + "loss": 87.5563, + "step": 191660 + }, + { + "epoch": 0.7743710533013894, + "grad_norm": 322.765869140625, + "learning_rate": 6.274490187360718e-06, + "loss": 91.3536, + "step": 191670 + }, + { + "epoch": 0.7744114545667571, + "grad_norm": 705.3366088867188, + "learning_rate": 6.272459199963323e-06, + "loss": 76.1564, + "step": 191680 + }, + { + "epoch": 0.7744518558321246, + "grad_norm": 1146.198486328125, + "learning_rate": 6.270428480191402e-06, + "loss": 93.8143, + "step": 191690 + }, + { + "epoch": 0.7744922570974923, + "grad_norm": 1843.5528564453125, + "learning_rate": 6.268398028084548e-06, + "loss": 94.4678, + "step": 191700 + }, + { + "epoch": 0.7745326583628599, + "grad_norm": 609.1464233398438, + "learning_rate": 6.266367843682344e-06, + "loss": 73.1073, + "step": 191710 + }, + { + "epoch": 0.7745730596282275, + "grad_norm": 802.6950073242188, + "learning_rate": 6.2643379270243754e-06, + "loss": 63.6381, + "step": 191720 + }, + { + "epoch": 0.7746134608935952, + "grad_norm": 656.4966430664062, + "learning_rate": 6.2623082781501975e-06, + "loss": 54.4682, + "step": 191730 + }, + { + "epoch": 0.7746538621589628, + "grad_norm": 1270.56298828125, + "learning_rate": 6.260278897099405e-06, + "loss": 83.2015, + "step": 191740 + }, + { + "epoch": 0.7746942634243305, + "grad_norm": 444.7412109375, + "learning_rate": 6.258249783911543e-06, + "loss": 51.88, + "step": 191750 + }, + { + "epoch": 0.7747346646896981, + "grad_norm": 856.7164916992188, + "learning_rate": 6.256220938626177e-06, + "loss": 106.9003, + "step": 191760 + }, + { + "epoch": 0.7747750659550657, + "grad_norm": 839.1027221679688, + "learning_rate": 6.2541923612828555e-06, + "loss": 66.8934, + "step": 191770 + }, + { + "epoch": 0.7748154672204334, + "grad_norm": 1597.9354248046875, + "learning_rate": 6.252164051921135e-06, + "loss": 104.8467, + "step": 191780 + }, + { + "epoch": 0.774855868485801, + "grad_norm": 443.55047607421875, + "learning_rate": 6.2501360105805586e-06, + "loss": 47.5749, + "step": 191790 + }, + { + "epoch": 0.7748962697511687, + "grad_norm": 646.2819213867188, + "learning_rate": 6.248108237300654e-06, + "loss": 65.3502, + "step": 191800 + }, + { + "epoch": 0.7749366710165362, + "grad_norm": 648.8126831054688, + "learning_rate": 6.246080732120961e-06, + "loss": 52.0919, + "step": 191810 + }, + { + "epoch": 0.7749770722819038, + "grad_norm": 729.031494140625, + "learning_rate": 6.244053495081001e-06, + "loss": 65.6154, + "step": 191820 + }, + { + "epoch": 0.7750174735472715, + "grad_norm": 880.466552734375, + "learning_rate": 6.2420265262203035e-06, + "loss": 52.1908, + "step": 191830 + }, + { + "epoch": 0.7750578748126391, + "grad_norm": 887.5421752929688, + "learning_rate": 6.239999825578382e-06, + "loss": 53.6738, + "step": 191840 + }, + { + "epoch": 0.7750982760780067, + "grad_norm": 988.9507446289062, + "learning_rate": 6.237973393194754e-06, + "loss": 73.0981, + "step": 191850 + }, + { + "epoch": 0.7751386773433744, + "grad_norm": 594.8179321289062, + "learning_rate": 6.2359472291089115e-06, + "loss": 105.0557, + "step": 191860 + }, + { + "epoch": 0.775179078608742, + "grad_norm": 770.0471801757812, + "learning_rate": 6.233921333360367e-06, + "loss": 60.1264, + "step": 191870 + }, + { + "epoch": 0.7752194798741097, + "grad_norm": 1229.397705078125, + "learning_rate": 6.2318957059886175e-06, + "loss": 61.3688, + "step": 191880 + }, + { + "epoch": 0.7752598811394773, + "grad_norm": 616.4024658203125, + "learning_rate": 6.229870347033138e-06, + "loss": 65.3249, + "step": 191890 + }, + { + "epoch": 0.7753002824048449, + "grad_norm": 652.3720703125, + "learning_rate": 6.227845256533436e-06, + "loss": 51.9941, + "step": 191900 + }, + { + "epoch": 0.7753406836702126, + "grad_norm": 709.734375, + "learning_rate": 6.225820434528971e-06, + "loss": 63.7624, + "step": 191910 + }, + { + "epoch": 0.7753810849355802, + "grad_norm": 757.5690307617188, + "learning_rate": 6.223795881059238e-06, + "loss": 38.9841, + "step": 191920 + }, + { + "epoch": 0.7754214862009479, + "grad_norm": 405.1529846191406, + "learning_rate": 6.221771596163693e-06, + "loss": 53.887, + "step": 191930 + }, + { + "epoch": 0.7754618874663154, + "grad_norm": 938.506103515625, + "learning_rate": 6.219747579881803e-06, + "loss": 65.4152, + "step": 191940 + }, + { + "epoch": 0.775502288731683, + "grad_norm": 892.7459106445312, + "learning_rate": 6.217723832253036e-06, + "loss": 90.5183, + "step": 191950 + }, + { + "epoch": 0.7755426899970507, + "grad_norm": 398.6973571777344, + "learning_rate": 6.215700353316831e-06, + "loss": 57.8186, + "step": 191960 + }, + { + "epoch": 0.7755830912624183, + "grad_norm": 1066.22021484375, + "learning_rate": 6.2136771431126445e-06, + "loss": 68.9586, + "step": 191970 + }, + { + "epoch": 0.775623492527786, + "grad_norm": 199.497314453125, + "learning_rate": 6.21165420167992e-06, + "loss": 53.4938, + "step": 191980 + }, + { + "epoch": 0.7756638937931536, + "grad_norm": 342.9711608886719, + "learning_rate": 6.209631529058096e-06, + "loss": 54.7633, + "step": 191990 + }, + { + "epoch": 0.7757042950585212, + "grad_norm": 780.1665649414062, + "learning_rate": 6.207609125286604e-06, + "loss": 81.845, + "step": 192000 + }, + { + "epoch": 0.7757446963238889, + "grad_norm": 633.3209228515625, + "learning_rate": 6.205586990404879e-06, + "loss": 61.596, + "step": 192010 + }, + { + "epoch": 0.7757850975892565, + "grad_norm": 622.4115600585938, + "learning_rate": 6.203565124452333e-06, + "loss": 57.8331, + "step": 192020 + }, + { + "epoch": 0.7758254988546242, + "grad_norm": 525.5982666015625, + "learning_rate": 6.201543527468385e-06, + "loss": 61.8056, + "step": 192030 + }, + { + "epoch": 0.7758659001199918, + "grad_norm": 791.05810546875, + "learning_rate": 6.1995221994924584e-06, + "loss": 69.4042, + "step": 192040 + }, + { + "epoch": 0.7759063013853594, + "grad_norm": 988.77490234375, + "learning_rate": 6.1975011405639375e-06, + "loss": 85.9597, + "step": 192050 + }, + { + "epoch": 0.7759467026507271, + "grad_norm": 1007.81640625, + "learning_rate": 6.195480350722252e-06, + "loss": 98.9707, + "step": 192060 + }, + { + "epoch": 0.7759871039160946, + "grad_norm": 502.5628662109375, + "learning_rate": 6.1934598300067714e-06, + "loss": 86.4695, + "step": 192070 + }, + { + "epoch": 0.7760275051814622, + "grad_norm": 718.0623168945312, + "learning_rate": 6.191439578456911e-06, + "loss": 70.434, + "step": 192080 + }, + { + "epoch": 0.7760679064468299, + "grad_norm": 1073.2186279296875, + "learning_rate": 6.189419596112039e-06, + "loss": 81.4068, + "step": 192090 + }, + { + "epoch": 0.7761083077121975, + "grad_norm": 671.04736328125, + "learning_rate": 6.1873998830115425e-06, + "loss": 53.035, + "step": 192100 + }, + { + "epoch": 0.7761487089775652, + "grad_norm": 494.220703125, + "learning_rate": 6.185380439194797e-06, + "loss": 74.0048, + "step": 192110 + }, + { + "epoch": 0.7761891102429328, + "grad_norm": 932.9890747070312, + "learning_rate": 6.183361264701177e-06, + "loss": 79.0048, + "step": 192120 + }, + { + "epoch": 0.7762295115083004, + "grad_norm": 812.2348022460938, + "learning_rate": 6.1813423595700375e-06, + "loss": 92.4473, + "step": 192130 + }, + { + "epoch": 0.7762699127736681, + "grad_norm": 1155.57080078125, + "learning_rate": 6.179323723840742e-06, + "loss": 110.232, + "step": 192140 + }, + { + "epoch": 0.7763103140390357, + "grad_norm": 713.0004272460938, + "learning_rate": 6.1773053575526475e-06, + "loss": 53.2221, + "step": 192150 + }, + { + "epoch": 0.7763507153044034, + "grad_norm": 999.5262451171875, + "learning_rate": 6.1752872607451e-06, + "loss": 61.7108, + "step": 192160 + }, + { + "epoch": 0.776391116569771, + "grad_norm": 475.6064758300781, + "learning_rate": 6.173269433457451e-06, + "loss": 62.7497, + "step": 192170 + }, + { + "epoch": 0.7764315178351386, + "grad_norm": 472.2151794433594, + "learning_rate": 6.171251875729021e-06, + "loss": 67.5533, + "step": 192180 + }, + { + "epoch": 0.7764719191005063, + "grad_norm": 1266.0361328125, + "learning_rate": 6.169234587599167e-06, + "loss": 47.695, + "step": 192190 + }, + { + "epoch": 0.7765123203658738, + "grad_norm": 492.7273864746094, + "learning_rate": 6.1672175691072e-06, + "loss": 49.8885, + "step": 192200 + }, + { + "epoch": 0.7765527216312414, + "grad_norm": 602.6182861328125, + "learning_rate": 6.165200820292447e-06, + "loss": 79.0584, + "step": 192210 + }, + { + "epoch": 0.7765931228966091, + "grad_norm": 834.0003051757812, + "learning_rate": 6.163184341194233e-06, + "loss": 63.228, + "step": 192220 + }, + { + "epoch": 0.7766335241619767, + "grad_norm": 1044.143310546875, + "learning_rate": 6.161168131851851e-06, + "loss": 55.7816, + "step": 192230 + }, + { + "epoch": 0.7766739254273444, + "grad_norm": 656.3073120117188, + "learning_rate": 6.159152192304636e-06, + "loss": 55.918, + "step": 192240 + }, + { + "epoch": 0.776714326692712, + "grad_norm": 549.8525390625, + "learning_rate": 6.157136522591867e-06, + "loss": 97.7119, + "step": 192250 + }, + { + "epoch": 0.7767547279580796, + "grad_norm": 484.082763671875, + "learning_rate": 6.155121122752847e-06, + "loss": 48.4009, + "step": 192260 + }, + { + "epoch": 0.7767951292234473, + "grad_norm": 1287.5958251953125, + "learning_rate": 6.153105992826869e-06, + "loss": 84.8068, + "step": 192270 + }, + { + "epoch": 0.7768355304888149, + "grad_norm": 482.5641174316406, + "learning_rate": 6.1510911328532196e-06, + "loss": 85.6454, + "step": 192280 + }, + { + "epoch": 0.7768759317541826, + "grad_norm": 752.2254028320312, + "learning_rate": 6.149076542871184e-06, + "loss": 66.5494, + "step": 192290 + }, + { + "epoch": 0.7769163330195502, + "grad_norm": 389.2235107421875, + "learning_rate": 6.147062222920026e-06, + "loss": 42.1979, + "step": 192300 + }, + { + "epoch": 0.7769567342849178, + "grad_norm": 1351.02783203125, + "learning_rate": 6.145048173039023e-06, + "loss": 85.1078, + "step": 192310 + }, + { + "epoch": 0.7769971355502855, + "grad_norm": 1861.569091796875, + "learning_rate": 6.143034393267437e-06, + "loss": 101.2367, + "step": 192320 + }, + { + "epoch": 0.777037536815653, + "grad_norm": 608.4346923828125, + "learning_rate": 6.141020883644538e-06, + "loss": 69.0672, + "step": 192330 + }, + { + "epoch": 0.7770779380810207, + "grad_norm": 845.0034790039062, + "learning_rate": 6.139007644209558e-06, + "loss": 71.6527, + "step": 192340 + }, + { + "epoch": 0.7771183393463883, + "grad_norm": 448.2313537597656, + "learning_rate": 6.136994675001773e-06, + "loss": 83.5164, + "step": 192350 + }, + { + "epoch": 0.7771587406117559, + "grad_norm": 561.2680053710938, + "learning_rate": 6.134981976060408e-06, + "loss": 46.6002, + "step": 192360 + }, + { + "epoch": 0.7771991418771236, + "grad_norm": 585.8251953125, + "learning_rate": 6.1329695474247076e-06, + "loss": 102.3212, + "step": 192370 + }, + { + "epoch": 0.7772395431424912, + "grad_norm": 821.9971313476562, + "learning_rate": 6.130957389133909e-06, + "loss": 50.9963, + "step": 192380 + }, + { + "epoch": 0.7772799444078589, + "grad_norm": 1098.508056640625, + "learning_rate": 6.1289455012272256e-06, + "loss": 56.5813, + "step": 192390 + }, + { + "epoch": 0.7773203456732265, + "grad_norm": 566.7648315429688, + "learning_rate": 6.126933883743904e-06, + "loss": 57.7636, + "step": 192400 + }, + { + "epoch": 0.7773607469385941, + "grad_norm": 830.5091552734375, + "learning_rate": 6.124922536723143e-06, + "loss": 72.3474, + "step": 192410 + }, + { + "epoch": 0.7774011482039618, + "grad_norm": 565.48095703125, + "learning_rate": 6.1229114602041594e-06, + "loss": 73.4954, + "step": 192420 + }, + { + "epoch": 0.7774415494693294, + "grad_norm": 1588.011474609375, + "learning_rate": 6.1209006542261605e-06, + "loss": 90.5782, + "step": 192430 + }, + { + "epoch": 0.7774819507346971, + "grad_norm": 399.1236572265625, + "learning_rate": 6.1188901188283516e-06, + "loss": 61.7964, + "step": 192440 + }, + { + "epoch": 0.7775223520000646, + "grad_norm": 1073.4281005859375, + "learning_rate": 6.116879854049924e-06, + "loss": 58.0468, + "step": 192450 + }, + { + "epoch": 0.7775627532654322, + "grad_norm": 628.6279907226562, + "learning_rate": 6.1148698599300775e-06, + "loss": 71.3962, + "step": 192460 + }, + { + "epoch": 0.7776031545307999, + "grad_norm": 731.9573974609375, + "learning_rate": 6.112860136507986e-06, + "loss": 47.4258, + "step": 192470 + }, + { + "epoch": 0.7776435557961675, + "grad_norm": 1154.928955078125, + "learning_rate": 6.110850683822835e-06, + "loss": 67.6917, + "step": 192480 + }, + { + "epoch": 0.7776839570615351, + "grad_norm": 1337.6942138671875, + "learning_rate": 6.108841501913807e-06, + "loss": 83.948, + "step": 192490 + }, + { + "epoch": 0.7777243583269028, + "grad_norm": 593.1469116210938, + "learning_rate": 6.106832590820053e-06, + "loss": 61.6718, + "step": 192500 + }, + { + "epoch": 0.7777647595922704, + "grad_norm": 680.2430419921875, + "learning_rate": 6.1048239505807625e-06, + "loss": 79.2565, + "step": 192510 + }, + { + "epoch": 0.7778051608576381, + "grad_norm": 488.7096862792969, + "learning_rate": 6.102815581235071e-06, + "loss": 55.1154, + "step": 192520 + }, + { + "epoch": 0.7778455621230057, + "grad_norm": 546.9906005859375, + "learning_rate": 6.100807482822157e-06, + "loss": 66.31, + "step": 192530 + }, + { + "epoch": 0.7778859633883733, + "grad_norm": 451.1929626464844, + "learning_rate": 6.098799655381151e-06, + "loss": 47.8171, + "step": 192540 + }, + { + "epoch": 0.777926364653741, + "grad_norm": 1005.1991577148438, + "learning_rate": 6.096792098951203e-06, + "loss": 68.0849, + "step": 192550 + }, + { + "epoch": 0.7779667659191086, + "grad_norm": 337.366455078125, + "learning_rate": 6.0947848135714545e-06, + "loss": 51.8558, + "step": 192560 + }, + { + "epoch": 0.7780071671844763, + "grad_norm": 628.5864868164062, + "learning_rate": 6.092777799281031e-06, + "loss": 69.521, + "step": 192570 + }, + { + "epoch": 0.7780475684498438, + "grad_norm": 1133.491455078125, + "learning_rate": 6.090771056119065e-06, + "loss": 93.9408, + "step": 192580 + }, + { + "epoch": 0.7780879697152114, + "grad_norm": 1647.654296875, + "learning_rate": 6.088764584124676e-06, + "loss": 78.6563, + "step": 192590 + }, + { + "epoch": 0.7781283709805791, + "grad_norm": 268.3278503417969, + "learning_rate": 6.086758383336984e-06, + "loss": 91.0652, + "step": 192600 + }, + { + "epoch": 0.7781687722459467, + "grad_norm": 833.716552734375, + "learning_rate": 6.084752453795102e-06, + "loss": 68.2321, + "step": 192610 + }, + { + "epoch": 0.7782091735113144, + "grad_norm": 500.3929138183594, + "learning_rate": 6.082746795538139e-06, + "loss": 92.6453, + "step": 192620 + }, + { + "epoch": 0.778249574776682, + "grad_norm": 1957.5723876953125, + "learning_rate": 6.080741408605187e-06, + "loss": 81.2132, + "step": 192630 + }, + { + "epoch": 0.7782899760420496, + "grad_norm": 665.4374389648438, + "learning_rate": 6.078736293035348e-06, + "loss": 85.2876, + "step": 192640 + }, + { + "epoch": 0.7783303773074173, + "grad_norm": 1295.927978515625, + "learning_rate": 6.076731448867716e-06, + "loss": 74.1995, + "step": 192650 + }, + { + "epoch": 0.7783707785727849, + "grad_norm": 644.1216430664062, + "learning_rate": 6.0747268761413615e-06, + "loss": 49.6221, + "step": 192660 + }, + { + "epoch": 0.7784111798381526, + "grad_norm": 770.0709228515625, + "learning_rate": 6.072722574895389e-06, + "loss": 60.3407, + "step": 192670 + }, + { + "epoch": 0.7784515811035202, + "grad_norm": 623.1210327148438, + "learning_rate": 6.0707185451688475e-06, + "loss": 64.8495, + "step": 192680 + }, + { + "epoch": 0.7784919823688878, + "grad_norm": 828.9789428710938, + "learning_rate": 6.068714787000831e-06, + "loss": 68.1498, + "step": 192690 + }, + { + "epoch": 0.7785323836342555, + "grad_norm": 768.6949462890625, + "learning_rate": 6.066711300430386e-06, + "loss": 78.675, + "step": 192700 + }, + { + "epoch": 0.778572784899623, + "grad_norm": 381.85284423828125, + "learning_rate": 6.0647080854965774e-06, + "loss": 52.3116, + "step": 192710 + }, + { + "epoch": 0.7786131861649906, + "grad_norm": 556.9998168945312, + "learning_rate": 6.062705142238465e-06, + "loss": 57.2637, + "step": 192720 + }, + { + "epoch": 0.7786535874303583, + "grad_norm": 523.2205810546875, + "learning_rate": 6.060702470695081e-06, + "loss": 62.5484, + "step": 192730 + }, + { + "epoch": 0.7786939886957259, + "grad_norm": 628.1618041992188, + "learning_rate": 6.05870007090549e-06, + "loss": 69.8523, + "step": 192740 + }, + { + "epoch": 0.7787343899610936, + "grad_norm": 862.494873046875, + "learning_rate": 6.056697942908712e-06, + "loss": 79.9763, + "step": 192750 + }, + { + "epoch": 0.7787747912264612, + "grad_norm": 611.233154296875, + "learning_rate": 6.054696086743786e-06, + "loss": 66.8356, + "step": 192760 + }, + { + "epoch": 0.7788151924918288, + "grad_norm": 361.74072265625, + "learning_rate": 6.052694502449738e-06, + "loss": 51.6141, + "step": 192770 + }, + { + "epoch": 0.7788555937571965, + "grad_norm": 361.911376953125, + "learning_rate": 6.0506931900656e-06, + "loss": 49.1206, + "step": 192780 + }, + { + "epoch": 0.7788959950225641, + "grad_norm": 690.0272827148438, + "learning_rate": 6.048692149630367e-06, + "loss": 39.3091, + "step": 192790 + }, + { + "epoch": 0.7789363962879318, + "grad_norm": 596.9361572265625, + "learning_rate": 6.046691381183074e-06, + "loss": 35.0671, + "step": 192800 + }, + { + "epoch": 0.7789767975532994, + "grad_norm": 291.2364807128906, + "learning_rate": 6.044690884762709e-06, + "loss": 59.8056, + "step": 192810 + }, + { + "epoch": 0.779017198818667, + "grad_norm": 564.7855834960938, + "learning_rate": 6.042690660408284e-06, + "loss": 100.4185, + "step": 192820 + }, + { + "epoch": 0.7790576000840347, + "grad_norm": 682.7779541015625, + "learning_rate": 6.040690708158794e-06, + "loss": 63.224, + "step": 192830 + }, + { + "epoch": 0.7790980013494022, + "grad_norm": 892.6104125976562, + "learning_rate": 6.038691028053212e-06, + "loss": 77.1416, + "step": 192840 + }, + { + "epoch": 0.7791384026147699, + "grad_norm": 634.3113403320312, + "learning_rate": 6.0366916201305505e-06, + "loss": 50.299, + "step": 192850 + }, + { + "epoch": 0.7791788038801375, + "grad_norm": 619.4231567382812, + "learning_rate": 6.034692484429767e-06, + "loss": 72.4558, + "step": 192860 + }, + { + "epoch": 0.7792192051455051, + "grad_norm": 971.195556640625, + "learning_rate": 6.032693620989844e-06, + "loss": 88.4117, + "step": 192870 + }, + { + "epoch": 0.7792596064108728, + "grad_norm": 472.37615966796875, + "learning_rate": 6.030695029849749e-06, + "loss": 52.457, + "step": 192880 + }, + { + "epoch": 0.7793000076762404, + "grad_norm": 737.090087890625, + "learning_rate": 6.028696711048448e-06, + "loss": 81.9932, + "step": 192890 + }, + { + "epoch": 0.779340408941608, + "grad_norm": 1274.9072265625, + "learning_rate": 6.026698664624902e-06, + "loss": 73.9556, + "step": 192900 + }, + { + "epoch": 0.7793808102069757, + "grad_norm": 362.95928955078125, + "learning_rate": 6.024700890618053e-06, + "loss": 60.4489, + "step": 192910 + }, + { + "epoch": 0.7794212114723433, + "grad_norm": 757.0093383789062, + "learning_rate": 6.0227033890668574e-06, + "loss": 86.6964, + "step": 192920 + }, + { + "epoch": 0.779461612737711, + "grad_norm": 243.69692993164062, + "learning_rate": 6.020706160010254e-06, + "loss": 56.3482, + "step": 192930 + }, + { + "epoch": 0.7795020140030786, + "grad_norm": 861.7967529296875, + "learning_rate": 6.018709203487187e-06, + "loss": 70.8631, + "step": 192940 + }, + { + "epoch": 0.7795424152684463, + "grad_norm": 1085.746337890625, + "learning_rate": 6.01671251953657e-06, + "loss": 84.0383, + "step": 192950 + }, + { + "epoch": 0.7795828165338139, + "grad_norm": 725.4198608398438, + "learning_rate": 6.014716108197353e-06, + "loss": 84.7657, + "step": 192960 + }, + { + "epoch": 0.7796232177991814, + "grad_norm": 194.5527801513672, + "learning_rate": 6.012719969508441e-06, + "loss": 62.94, + "step": 192970 + }, + { + "epoch": 0.7796636190645491, + "grad_norm": 325.4653625488281, + "learning_rate": 6.010724103508754e-06, + "loss": 69.9466, + "step": 192980 + }, + { + "epoch": 0.7797040203299167, + "grad_norm": 1339.515869140625, + "learning_rate": 6.008728510237207e-06, + "loss": 78.7565, + "step": 192990 + }, + { + "epoch": 0.7797444215952843, + "grad_norm": 644.0164794921875, + "learning_rate": 6.00673318973269e-06, + "loss": 92.5255, + "step": 193000 + }, + { + "epoch": 0.779784822860652, + "grad_norm": 390.31512451171875, + "learning_rate": 6.004738142034128e-06, + "loss": 59.0804, + "step": 193010 + }, + { + "epoch": 0.7798252241260196, + "grad_norm": 845.5083618164062, + "learning_rate": 6.002743367180392e-06, + "loss": 78.4003, + "step": 193020 + }, + { + "epoch": 0.7798656253913873, + "grad_norm": 957.3787231445312, + "learning_rate": 6.0007488652103815e-06, + "loss": 84.7391, + "step": 193030 + }, + { + "epoch": 0.7799060266567549, + "grad_norm": 522.6249389648438, + "learning_rate": 5.998754636162978e-06, + "loss": 67.7759, + "step": 193040 + }, + { + "epoch": 0.7799464279221225, + "grad_norm": 650.2867431640625, + "learning_rate": 5.996760680077063e-06, + "loss": 64.6404, + "step": 193050 + }, + { + "epoch": 0.7799868291874902, + "grad_norm": 306.0176086425781, + "learning_rate": 5.994766996991512e-06, + "loss": 58.879, + "step": 193060 + }, + { + "epoch": 0.7800272304528578, + "grad_norm": 925.4105224609375, + "learning_rate": 5.992773586945184e-06, + "loss": 60.2117, + "step": 193070 + }, + { + "epoch": 0.7800676317182255, + "grad_norm": 531.270263671875, + "learning_rate": 5.9907804499769475e-06, + "loss": 36.7864, + "step": 193080 + }, + { + "epoch": 0.780108032983593, + "grad_norm": 674.1026611328125, + "learning_rate": 5.988787586125657e-06, + "loss": 58.6818, + "step": 193090 + }, + { + "epoch": 0.7801484342489606, + "grad_norm": 461.2280578613281, + "learning_rate": 5.986794995430172e-06, + "loss": 51.9944, + "step": 193100 + }, + { + "epoch": 0.7801888355143283, + "grad_norm": 1053.586669921875, + "learning_rate": 5.984802677929322e-06, + "loss": 69.9722, + "step": 193110 + }, + { + "epoch": 0.7802292367796959, + "grad_norm": 534.9815063476562, + "learning_rate": 5.98281063366197e-06, + "loss": 55.995, + "step": 193120 + }, + { + "epoch": 0.7802696380450636, + "grad_norm": 824.3235473632812, + "learning_rate": 5.9808188626669305e-06, + "loss": 81.7188, + "step": 193130 + }, + { + "epoch": 0.7803100393104312, + "grad_norm": 794.3721313476562, + "learning_rate": 5.978827364983055e-06, + "loss": 65.4864, + "step": 193140 + }, + { + "epoch": 0.7803504405757988, + "grad_norm": 574.0903930664062, + "learning_rate": 5.976836140649154e-06, + "loss": 67.6464, + "step": 193150 + }, + { + "epoch": 0.7803908418411665, + "grad_norm": 505.6428527832031, + "learning_rate": 5.974845189704052e-06, + "loss": 54.0868, + "step": 193160 + }, + { + "epoch": 0.7804312431065341, + "grad_norm": 565.2737426757812, + "learning_rate": 5.97285451218657e-06, + "loss": 62.2761, + "step": 193170 + }, + { + "epoch": 0.7804716443719018, + "grad_norm": 415.7483215332031, + "learning_rate": 5.970864108135504e-06, + "loss": 56.4868, + "step": 193180 + }, + { + "epoch": 0.7805120456372694, + "grad_norm": 1353.9427490234375, + "learning_rate": 5.968873977589667e-06, + "loss": 64.1011, + "step": 193190 + }, + { + "epoch": 0.780552446902637, + "grad_norm": 1068.3836669921875, + "learning_rate": 5.966884120587852e-06, + "loss": 88.6753, + "step": 193200 + }, + { + "epoch": 0.7805928481680047, + "grad_norm": 424.0440979003906, + "learning_rate": 5.964894537168857e-06, + "loss": 56.8664, + "step": 193210 + }, + { + "epoch": 0.7806332494333722, + "grad_norm": 503.011962890625, + "learning_rate": 5.962905227371469e-06, + "loss": 80.5108, + "step": 193220 + }, + { + "epoch": 0.7806736506987398, + "grad_norm": 774.9210205078125, + "learning_rate": 5.960916191234478e-06, + "loss": 38.201, + "step": 193230 + }, + { + "epoch": 0.7807140519641075, + "grad_norm": 658.8988037109375, + "learning_rate": 5.958927428796646e-06, + "loss": 98.4709, + "step": 193240 + }, + { + "epoch": 0.7807544532294751, + "grad_norm": 824.0209350585938, + "learning_rate": 5.956938940096752e-06, + "loss": 49.58, + "step": 193250 + }, + { + "epoch": 0.7807948544948428, + "grad_norm": 655.883544921875, + "learning_rate": 5.954950725173565e-06, + "loss": 70.7274, + "step": 193260 + }, + { + "epoch": 0.7808352557602104, + "grad_norm": 728.4994506835938, + "learning_rate": 5.952962784065846e-06, + "loss": 74.1455, + "step": 193270 + }, + { + "epoch": 0.780875657025578, + "grad_norm": 558.8375854492188, + "learning_rate": 5.950975116812352e-06, + "loss": 67.8936, + "step": 193280 + }, + { + "epoch": 0.7809160582909457, + "grad_norm": 777.0060424804688, + "learning_rate": 5.94898772345182e-06, + "loss": 55.1411, + "step": 193290 + }, + { + "epoch": 0.7809564595563133, + "grad_norm": 566.2543334960938, + "learning_rate": 5.947000604023019e-06, + "loss": 54.1556, + "step": 193300 + }, + { + "epoch": 0.780996860821681, + "grad_norm": 542.4007568359375, + "learning_rate": 5.9450137585646686e-06, + "loss": 73.9296, + "step": 193310 + }, + { + "epoch": 0.7810372620870486, + "grad_norm": 363.5523376464844, + "learning_rate": 5.943027187115513e-06, + "loss": 69.4616, + "step": 193320 + }, + { + "epoch": 0.7810776633524162, + "grad_norm": 1168.805908203125, + "learning_rate": 5.941040889714284e-06, + "loss": 89.7062, + "step": 193330 + }, + { + "epoch": 0.7811180646177839, + "grad_norm": 893.1906127929688, + "learning_rate": 5.9390548663996894e-06, + "loss": 72.9124, + "step": 193340 + }, + { + "epoch": 0.7811584658831514, + "grad_norm": 1158.220947265625, + "learning_rate": 5.937069117210472e-06, + "loss": 66.1332, + "step": 193350 + }, + { + "epoch": 0.781198867148519, + "grad_norm": 883.4813232421875, + "learning_rate": 5.935083642185324e-06, + "loss": 71.9266, + "step": 193360 + }, + { + "epoch": 0.7812392684138867, + "grad_norm": 752.2177734375, + "learning_rate": 5.9330984413629635e-06, + "loss": 53.6306, + "step": 193370 + }, + { + "epoch": 0.7812796696792543, + "grad_norm": 1156.42822265625, + "learning_rate": 5.93111351478209e-06, + "loss": 74.1098, + "step": 193380 + }, + { + "epoch": 0.781320070944622, + "grad_norm": 250.25808715820312, + "learning_rate": 5.929128862481408e-06, + "loss": 60.2861, + "step": 193390 + }, + { + "epoch": 0.7813604722099896, + "grad_norm": 905.2194213867188, + "learning_rate": 5.927144484499598e-06, + "loss": 68.7759, + "step": 193400 + }, + { + "epoch": 0.7814008734753572, + "grad_norm": 716.717529296875, + "learning_rate": 5.9251603808753496e-06, + "loss": 61.6672, + "step": 193410 + }, + { + "epoch": 0.7814412747407249, + "grad_norm": 1032.39990234375, + "learning_rate": 5.923176551647347e-06, + "loss": 79.3003, + "step": 193420 + }, + { + "epoch": 0.7814816760060925, + "grad_norm": 553.2131958007812, + "learning_rate": 5.9211929968542655e-06, + "loss": 55.2663, + "step": 193430 + }, + { + "epoch": 0.7815220772714602, + "grad_norm": 534.0232543945312, + "learning_rate": 5.9192097165347794e-06, + "loss": 60.5868, + "step": 193440 + }, + { + "epoch": 0.7815624785368278, + "grad_norm": 626.5042114257812, + "learning_rate": 5.917226710727537e-06, + "loss": 82.6276, + "step": 193450 + }, + { + "epoch": 0.7816028798021954, + "grad_norm": 494.0221862792969, + "learning_rate": 5.915243979471223e-06, + "loss": 47.01, + "step": 193460 + }, + { + "epoch": 0.7816432810675631, + "grad_norm": 1023.9053955078125, + "learning_rate": 5.913261522804474e-06, + "loss": 118.9052, + "step": 193470 + }, + { + "epoch": 0.7816836823329306, + "grad_norm": 1164.347900390625, + "learning_rate": 5.911279340765941e-06, + "loss": 56.4245, + "step": 193480 + }, + { + "epoch": 0.7817240835982983, + "grad_norm": 670.9444580078125, + "learning_rate": 5.9092974333942725e-06, + "loss": 60.3436, + "step": 193490 + }, + { + "epoch": 0.7817644848636659, + "grad_norm": 850.8844604492188, + "learning_rate": 5.907315800728106e-06, + "loss": 83.3228, + "step": 193500 + }, + { + "epoch": 0.7818048861290335, + "grad_norm": 890.0475463867188, + "learning_rate": 5.905334442806077e-06, + "loss": 60.7459, + "step": 193510 + }, + { + "epoch": 0.7818452873944012, + "grad_norm": 918.8417358398438, + "learning_rate": 5.9033533596668055e-06, + "loss": 58.2635, + "step": 193520 + }, + { + "epoch": 0.7818856886597688, + "grad_norm": 922.885986328125, + "learning_rate": 5.901372551348917e-06, + "loss": 58.6142, + "step": 193530 + }, + { + "epoch": 0.7819260899251365, + "grad_norm": 827.7890625, + "learning_rate": 5.899392017891028e-06, + "loss": 48.0234, + "step": 193540 + }, + { + "epoch": 0.7819664911905041, + "grad_norm": 2104.95556640625, + "learning_rate": 5.897411759331757e-06, + "loss": 82.581, + "step": 193550 + }, + { + "epoch": 0.7820068924558717, + "grad_norm": 756.3392333984375, + "learning_rate": 5.895431775709694e-06, + "loss": 86.6861, + "step": 193560 + }, + { + "epoch": 0.7820472937212394, + "grad_norm": 753.9827880859375, + "learning_rate": 5.893452067063461e-06, + "loss": 75.1246, + "step": 193570 + }, + { + "epoch": 0.782087694986607, + "grad_norm": 1077.4871826171875, + "learning_rate": 5.8914726334316365e-06, + "loss": 61.2877, + "step": 193580 + }, + { + "epoch": 0.7821280962519747, + "grad_norm": 1005.8966064453125, + "learning_rate": 5.8894934748528164e-06, + "loss": 108.7286, + "step": 193590 + }, + { + "epoch": 0.7821684975173423, + "grad_norm": 871.4405517578125, + "learning_rate": 5.887514591365593e-06, + "loss": 78.4792, + "step": 193600 + }, + { + "epoch": 0.7822088987827098, + "grad_norm": 575.4681396484375, + "learning_rate": 5.885535983008526e-06, + "loss": 59.2771, + "step": 193610 + }, + { + "epoch": 0.7822493000480775, + "grad_norm": 1157.1650390625, + "learning_rate": 5.883557649820211e-06, + "loss": 70.5195, + "step": 193620 + }, + { + "epoch": 0.7822897013134451, + "grad_norm": 931.9554443359375, + "learning_rate": 5.881579591839204e-06, + "loss": 57.2368, + "step": 193630 + }, + { + "epoch": 0.7823301025788127, + "grad_norm": 830.0060424804688, + "learning_rate": 5.8796018091040695e-06, + "loss": 78.9626, + "step": 193640 + }, + { + "epoch": 0.7823705038441804, + "grad_norm": 1186.939453125, + "learning_rate": 5.877624301653368e-06, + "loss": 42.3566, + "step": 193650 + }, + { + "epoch": 0.782410905109548, + "grad_norm": 844.5638427734375, + "learning_rate": 5.875647069525654e-06, + "loss": 81.5557, + "step": 193660 + }, + { + "epoch": 0.7824513063749157, + "grad_norm": 674.2859497070312, + "learning_rate": 5.873670112759475e-06, + "loss": 49.2572, + "step": 193670 + }, + { + "epoch": 0.7824917076402833, + "grad_norm": 397.14794921875, + "learning_rate": 5.871693431393366e-06, + "loss": 61.9543, + "step": 193680 + }, + { + "epoch": 0.782532108905651, + "grad_norm": 1331.496337890625, + "learning_rate": 5.869717025465864e-06, + "loss": 65.875, + "step": 193690 + }, + { + "epoch": 0.7825725101710186, + "grad_norm": 767.1388549804688, + "learning_rate": 5.867740895015505e-06, + "loss": 77.0436, + "step": 193700 + }, + { + "epoch": 0.7826129114363862, + "grad_norm": 570.083740234375, + "learning_rate": 5.865765040080822e-06, + "loss": 55.1907, + "step": 193710 + }, + { + "epoch": 0.7826533127017539, + "grad_norm": 224.1779327392578, + "learning_rate": 5.863789460700311e-06, + "loss": 48.4249, + "step": 193720 + }, + { + "epoch": 0.7826937139671214, + "grad_norm": 372.2606506347656, + "learning_rate": 5.861814156912515e-06, + "loss": 63.447, + "step": 193730 + }, + { + "epoch": 0.782734115232489, + "grad_norm": 612.2825317382812, + "learning_rate": 5.859839128755927e-06, + "loss": 68.3997, + "step": 193740 + }, + { + "epoch": 0.7827745164978567, + "grad_norm": 1099.052490234375, + "learning_rate": 5.857864376269051e-06, + "loss": 89.6385, + "step": 193750 + }, + { + "epoch": 0.7828149177632243, + "grad_norm": 409.23809814453125, + "learning_rate": 5.855889899490392e-06, + "loss": 39.1053, + "step": 193760 + }, + { + "epoch": 0.782855319028592, + "grad_norm": 922.84814453125, + "learning_rate": 5.853915698458441e-06, + "loss": 97.2234, + "step": 193770 + }, + { + "epoch": 0.7828957202939596, + "grad_norm": 931.1028442382812, + "learning_rate": 5.8519417732116935e-06, + "loss": 76.6767, + "step": 193780 + }, + { + "epoch": 0.7829361215593272, + "grad_norm": 745.1144409179688, + "learning_rate": 5.849968123788612e-06, + "loss": 49.0621, + "step": 193790 + }, + { + "epoch": 0.7829765228246949, + "grad_norm": 492.2763977050781, + "learning_rate": 5.847994750227699e-06, + "loss": 56.7337, + "step": 193800 + }, + { + "epoch": 0.7830169240900625, + "grad_norm": 634.2130737304688, + "learning_rate": 5.8460216525674085e-06, + "loss": 37.551, + "step": 193810 + }, + { + "epoch": 0.7830573253554302, + "grad_norm": 737.839599609375, + "learning_rate": 5.844048830846212e-06, + "loss": 86.1922, + "step": 193820 + }, + { + "epoch": 0.7830977266207978, + "grad_norm": 205.2071533203125, + "learning_rate": 5.842076285102571e-06, + "loss": 65.8965, + "step": 193830 + }, + { + "epoch": 0.7831381278861654, + "grad_norm": 591.0181274414062, + "learning_rate": 5.84010401537495e-06, + "loss": 78.5163, + "step": 193840 + }, + { + "epoch": 0.7831785291515331, + "grad_norm": 538.7565307617188, + "learning_rate": 5.838132021701784e-06, + "loss": 49.3365, + "step": 193850 + }, + { + "epoch": 0.7832189304169006, + "grad_norm": 954.2677612304688, + "learning_rate": 5.836160304121526e-06, + "loss": 53.7843, + "step": 193860 + }, + { + "epoch": 0.7832593316822682, + "grad_norm": 373.96942138671875, + "learning_rate": 5.834188862672614e-06, + "loss": 76.2244, + "step": 193870 + }, + { + "epoch": 0.7832997329476359, + "grad_norm": 565.0542602539062, + "learning_rate": 5.832217697393483e-06, + "loss": 72.352, + "step": 193880 + }, + { + "epoch": 0.7833401342130035, + "grad_norm": 714.7208862304688, + "learning_rate": 5.830246808322568e-06, + "loss": 59.5585, + "step": 193890 + }, + { + "epoch": 0.7833805354783712, + "grad_norm": 785.1121215820312, + "learning_rate": 5.828276195498275e-06, + "loss": 71.0484, + "step": 193900 + }, + { + "epoch": 0.7834209367437388, + "grad_norm": 774.5046997070312, + "learning_rate": 5.826305858959045e-06, + "loss": 38.9397, + "step": 193910 + }, + { + "epoch": 0.7834613380091064, + "grad_norm": 530.16943359375, + "learning_rate": 5.824335798743275e-06, + "loss": 76.7205, + "step": 193920 + }, + { + "epoch": 0.7835017392744741, + "grad_norm": 282.87017822265625, + "learning_rate": 5.822366014889375e-06, + "loss": 58.1202, + "step": 193930 + }, + { + "epoch": 0.7835421405398417, + "grad_norm": 395.8590087890625, + "learning_rate": 5.820396507435757e-06, + "loss": 78.7767, + "step": 193940 + }, + { + "epoch": 0.7835825418052094, + "grad_norm": 706.666259765625, + "learning_rate": 5.818427276420795e-06, + "loss": 51.23, + "step": 193950 + }, + { + "epoch": 0.783622943070577, + "grad_norm": 859.98974609375, + "learning_rate": 5.816458321882907e-06, + "loss": 78.4799, + "step": 193960 + }, + { + "epoch": 0.7836633443359446, + "grad_norm": 336.512939453125, + "learning_rate": 5.814489643860459e-06, + "loss": 55.7872, + "step": 193970 + }, + { + "epoch": 0.7837037456013123, + "grad_norm": 324.3739013671875, + "learning_rate": 5.812521242391842e-06, + "loss": 41.8546, + "step": 193980 + }, + { + "epoch": 0.7837441468666798, + "grad_norm": 731.3775634765625, + "learning_rate": 5.8105531175154255e-06, + "loss": 56.4714, + "step": 193990 + }, + { + "epoch": 0.7837845481320475, + "grad_norm": 436.9169006347656, + "learning_rate": 5.8085852692695864e-06, + "loss": 51.4766, + "step": 194000 + }, + { + "epoch": 0.7838249493974151, + "grad_norm": 724.273193359375, + "learning_rate": 5.806617697692678e-06, + "loss": 57.1268, + "step": 194010 + }, + { + "epoch": 0.7838653506627827, + "grad_norm": 977.8497924804688, + "learning_rate": 5.804650402823067e-06, + "loss": 31.876, + "step": 194020 + }, + { + "epoch": 0.7839057519281504, + "grad_norm": 650.0667114257812, + "learning_rate": 5.802683384699101e-06, + "loss": 77.6723, + "step": 194030 + }, + { + "epoch": 0.783946153193518, + "grad_norm": 945.32373046875, + "learning_rate": 5.800716643359134e-06, + "loss": 92.5749, + "step": 194040 + }, + { + "epoch": 0.7839865544588857, + "grad_norm": 985.8706665039062, + "learning_rate": 5.798750178841513e-06, + "loss": 72.319, + "step": 194050 + }, + { + "epoch": 0.7840269557242533, + "grad_norm": 612.8291625976562, + "learning_rate": 5.796783991184556e-06, + "loss": 61.9756, + "step": 194060 + }, + { + "epoch": 0.7840673569896209, + "grad_norm": 644.8745727539062, + "learning_rate": 5.794818080426619e-06, + "loss": 71.3221, + "step": 194070 + }, + { + "epoch": 0.7841077582549886, + "grad_norm": 297.77545166015625, + "learning_rate": 5.79285244660601e-06, + "loss": 59.605, + "step": 194080 + }, + { + "epoch": 0.7841481595203562, + "grad_norm": 881.4013671875, + "learning_rate": 5.790887089761057e-06, + "loss": 50.8065, + "step": 194090 + }, + { + "epoch": 0.7841885607857239, + "grad_norm": 1952.3199462890625, + "learning_rate": 5.7889220099300755e-06, + "loss": 72.8767, + "step": 194100 + }, + { + "epoch": 0.7842289620510915, + "grad_norm": 446.1302185058594, + "learning_rate": 5.786957207151378e-06, + "loss": 52.6341, + "step": 194110 + }, + { + "epoch": 0.784269363316459, + "grad_norm": 722.6705932617188, + "learning_rate": 5.784992681463271e-06, + "loss": 68.1103, + "step": 194120 + }, + { + "epoch": 0.7843097645818267, + "grad_norm": 841.8829956054688, + "learning_rate": 5.783028432904045e-06, + "loss": 64.3401, + "step": 194130 + }, + { + "epoch": 0.7843501658471943, + "grad_norm": 699.1734619140625, + "learning_rate": 5.7810644615119985e-06, + "loss": 59.8687, + "step": 194140 + }, + { + "epoch": 0.7843905671125619, + "grad_norm": 694.9148559570312, + "learning_rate": 5.779100767325421e-06, + "loss": 56.3596, + "step": 194150 + }, + { + "epoch": 0.7844309683779296, + "grad_norm": 1404.4365234375, + "learning_rate": 5.7771373503826025e-06, + "loss": 107.1334, + "step": 194160 + }, + { + "epoch": 0.7844713696432972, + "grad_norm": 836.841552734375, + "learning_rate": 5.775174210721799e-06, + "loss": 54.6834, + "step": 194170 + }, + { + "epoch": 0.7845117709086649, + "grad_norm": 687.8237915039062, + "learning_rate": 5.7732113483813115e-06, + "loss": 91.0202, + "step": 194180 + }, + { + "epoch": 0.7845521721740325, + "grad_norm": 237.37388610839844, + "learning_rate": 5.771248763399389e-06, + "loss": 74.4981, + "step": 194190 + }, + { + "epoch": 0.7845925734394001, + "grad_norm": 953.9370727539062, + "learning_rate": 5.769286455814294e-06, + "loss": 61.6179, + "step": 194200 + }, + { + "epoch": 0.7846329747047678, + "grad_norm": 380.17303466796875, + "learning_rate": 5.767324425664294e-06, + "loss": 60.7395, + "step": 194210 + }, + { + "epoch": 0.7846733759701354, + "grad_norm": 816.6992797851562, + "learning_rate": 5.7653626729876176e-06, + "loss": 71.828, + "step": 194220 + }, + { + "epoch": 0.7847137772355031, + "grad_norm": 943.2098388671875, + "learning_rate": 5.76340119782254e-06, + "loss": 93.5925, + "step": 194230 + }, + { + "epoch": 0.7847541785008707, + "grad_norm": 903.6183471679688, + "learning_rate": 5.7614400002072764e-06, + "loss": 92.5654, + "step": 194240 + }, + { + "epoch": 0.7847945797662382, + "grad_norm": 1295.470703125, + "learning_rate": 5.7594790801800724e-06, + "loss": 60.8984, + "step": 194250 + }, + { + "epoch": 0.7848349810316059, + "grad_norm": 774.7119750976562, + "learning_rate": 5.7575184377791545e-06, + "loss": 64.1137, + "step": 194260 + }, + { + "epoch": 0.7848753822969735, + "grad_norm": 515.7931518554688, + "learning_rate": 5.755558073042747e-06, + "loss": 60.6487, + "step": 194270 + }, + { + "epoch": 0.7849157835623412, + "grad_norm": 735.0186767578125, + "learning_rate": 5.753597986009074e-06, + "loss": 65.2736, + "step": 194280 + }, + { + "epoch": 0.7849561848277088, + "grad_norm": 780.30029296875, + "learning_rate": 5.751638176716339e-06, + "loss": 83.0162, + "step": 194290 + }, + { + "epoch": 0.7849965860930764, + "grad_norm": 1067.095703125, + "learning_rate": 5.74967864520275e-06, + "loss": 87.9258, + "step": 194300 + }, + { + "epoch": 0.7850369873584441, + "grad_norm": 372.51519775390625, + "learning_rate": 5.7477193915065145e-06, + "loss": 79.6895, + "step": 194310 + }, + { + "epoch": 0.7850773886238117, + "grad_norm": 248.12559509277344, + "learning_rate": 5.7457604156658265e-06, + "loss": 73.2516, + "step": 194320 + }, + { + "epoch": 0.7851177898891794, + "grad_norm": 560.6986083984375, + "learning_rate": 5.743801717718878e-06, + "loss": 67.6356, + "step": 194330 + }, + { + "epoch": 0.785158191154547, + "grad_norm": 705.287109375, + "learning_rate": 5.7418432977038595e-06, + "loss": 93.3734, + "step": 194340 + }, + { + "epoch": 0.7851985924199146, + "grad_norm": 517.9177856445312, + "learning_rate": 5.739885155658942e-06, + "loss": 42.6828, + "step": 194350 + }, + { + "epoch": 0.7852389936852823, + "grad_norm": 369.4615783691406, + "learning_rate": 5.737927291622305e-06, + "loss": 59.1598, + "step": 194360 + }, + { + "epoch": 0.7852793949506498, + "grad_norm": 577.49658203125, + "learning_rate": 5.735969705632123e-06, + "loss": 66.9638, + "step": 194370 + }, + { + "epoch": 0.7853197962160174, + "grad_norm": 684.221923828125, + "learning_rate": 5.734012397726543e-06, + "loss": 70.4882, + "step": 194380 + }, + { + "epoch": 0.7853601974813851, + "grad_norm": 859.9932861328125, + "learning_rate": 5.73205536794375e-06, + "loss": 81.7615, + "step": 194390 + }, + { + "epoch": 0.7854005987467527, + "grad_norm": 786.934814453125, + "learning_rate": 5.730098616321869e-06, + "loss": 64.6849, + "step": 194400 + }, + { + "epoch": 0.7854410000121204, + "grad_norm": 743.8314819335938, + "learning_rate": 5.7281421428990755e-06, + "loss": 48.2847, + "step": 194410 + }, + { + "epoch": 0.785481401277488, + "grad_norm": 2181.358642578125, + "learning_rate": 5.726185947713492e-06, + "loss": 90.7121, + "step": 194420 + }, + { + "epoch": 0.7855218025428556, + "grad_norm": 710.2190551757812, + "learning_rate": 5.724230030803262e-06, + "loss": 59.4964, + "step": 194430 + }, + { + "epoch": 0.7855622038082233, + "grad_norm": 715.4453125, + "learning_rate": 5.722274392206517e-06, + "loss": 57.6218, + "step": 194440 + }, + { + "epoch": 0.7856026050735909, + "grad_norm": 1040.243408203125, + "learning_rate": 5.72031903196139e-06, + "loss": 70.4223, + "step": 194450 + }, + { + "epoch": 0.7856430063389586, + "grad_norm": 863.6126708984375, + "learning_rate": 5.718363950105989e-06, + "loss": 63.8904, + "step": 194460 + }, + { + "epoch": 0.7856834076043262, + "grad_norm": 547.2091064453125, + "learning_rate": 5.716409146678434e-06, + "loss": 68.873, + "step": 194470 + }, + { + "epoch": 0.7857238088696938, + "grad_norm": 1000.9700317382812, + "learning_rate": 5.714454621716839e-06, + "loss": 82.3458, + "step": 194480 + }, + { + "epoch": 0.7857642101350615, + "grad_norm": 458.6191711425781, + "learning_rate": 5.712500375259305e-06, + "loss": 59.3849, + "step": 194490 + }, + { + "epoch": 0.785804611400429, + "grad_norm": 807.8645629882812, + "learning_rate": 5.710546407343938e-06, + "loss": 63.842, + "step": 194500 + }, + { + "epoch": 0.7858450126657966, + "grad_norm": 747.1063232421875, + "learning_rate": 5.708592718008812e-06, + "loss": 90.9579, + "step": 194510 + }, + { + "epoch": 0.7858854139311643, + "grad_norm": 655.1370239257812, + "learning_rate": 5.706639307292041e-06, + "loss": 77.7008, + "step": 194520 + }, + { + "epoch": 0.7859258151965319, + "grad_norm": 776.3811645507812, + "learning_rate": 5.704686175231691e-06, + "loss": 105.1823, + "step": 194530 + }, + { + "epoch": 0.7859662164618996, + "grad_norm": 697.1920166015625, + "learning_rate": 5.7027333218658435e-06, + "loss": 67.8905, + "step": 194540 + }, + { + "epoch": 0.7860066177272672, + "grad_norm": 961.0586547851562, + "learning_rate": 5.700780747232575e-06, + "loss": 73.4675, + "step": 194550 + }, + { + "epoch": 0.7860470189926348, + "grad_norm": 661.1362915039062, + "learning_rate": 5.698828451369935e-06, + "loss": 65.6391, + "step": 194560 + }, + { + "epoch": 0.7860874202580025, + "grad_norm": 444.586669921875, + "learning_rate": 5.696876434316012e-06, + "loss": 56.899, + "step": 194570 + }, + { + "epoch": 0.7861278215233701, + "grad_norm": 892.2806396484375, + "learning_rate": 5.694924696108839e-06, + "loss": 62.3284, + "step": 194580 + }, + { + "epoch": 0.7861682227887378, + "grad_norm": 919.8406372070312, + "learning_rate": 5.692973236786475e-06, + "loss": 82.6779, + "step": 194590 + }, + { + "epoch": 0.7862086240541054, + "grad_norm": 1008.5107421875, + "learning_rate": 5.691022056386961e-06, + "loss": 96.9291, + "step": 194600 + }, + { + "epoch": 0.786249025319473, + "grad_norm": 798.1177978515625, + "learning_rate": 5.689071154948347e-06, + "loss": 56.6285, + "step": 194610 + }, + { + "epoch": 0.7862894265848407, + "grad_norm": 458.1597900390625, + "learning_rate": 5.687120532508652e-06, + "loss": 55.5412, + "step": 194620 + }, + { + "epoch": 0.7863298278502082, + "grad_norm": 667.795166015625, + "learning_rate": 5.685170189105911e-06, + "loss": 75.7771, + "step": 194630 + }, + { + "epoch": 0.7863702291155759, + "grad_norm": 649.5155639648438, + "learning_rate": 5.683220124778146e-06, + "loss": 69.7698, + "step": 194640 + }, + { + "epoch": 0.7864106303809435, + "grad_norm": 482.86065673828125, + "learning_rate": 5.681270339563376e-06, + "loss": 74.022, + "step": 194650 + }, + { + "epoch": 0.7864510316463111, + "grad_norm": 833.0869140625, + "learning_rate": 5.6793208334996195e-06, + "loss": 56.7291, + "step": 194660 + }, + { + "epoch": 0.7864914329116788, + "grad_norm": 1213.965576171875, + "learning_rate": 5.6773716066248644e-06, + "loss": 53.7623, + "step": 194670 + }, + { + "epoch": 0.7865318341770464, + "grad_norm": 600.1317138671875, + "learning_rate": 5.675422658977134e-06, + "loss": 82.3885, + "step": 194680 + }, + { + "epoch": 0.7865722354424141, + "grad_norm": 819.19482421875, + "learning_rate": 5.6734739905944095e-06, + "loss": 49.9245, + "step": 194690 + }, + { + "epoch": 0.7866126367077817, + "grad_norm": 570.42236328125, + "learning_rate": 5.6715256015146825e-06, + "loss": 72.6325, + "step": 194700 + }, + { + "epoch": 0.7866530379731493, + "grad_norm": 363.2998352050781, + "learning_rate": 5.669577491775948e-06, + "loss": 66.0587, + "step": 194710 + }, + { + "epoch": 0.786693439238517, + "grad_norm": 343.2928466796875, + "learning_rate": 5.667629661416167e-06, + "loss": 49.1049, + "step": 194720 + }, + { + "epoch": 0.7867338405038846, + "grad_norm": 851.8533325195312, + "learning_rate": 5.665682110473336e-06, + "loss": 53.5016, + "step": 194730 + }, + { + "epoch": 0.7867742417692523, + "grad_norm": 616.7781372070312, + "learning_rate": 5.663734838985406e-06, + "loss": 48.8312, + "step": 194740 + }, + { + "epoch": 0.7868146430346199, + "grad_norm": 788.5413818359375, + "learning_rate": 5.661787846990346e-06, + "loss": 64.3973, + "step": 194750 + }, + { + "epoch": 0.7868550442999874, + "grad_norm": 215.1351318359375, + "learning_rate": 5.659841134526114e-06, + "loss": 53.7937, + "step": 194760 + }, + { + "epoch": 0.7868954455653551, + "grad_norm": 389.25927734375, + "learning_rate": 5.657894701630668e-06, + "loss": 58.7175, + "step": 194770 + }, + { + "epoch": 0.7869358468307227, + "grad_norm": 476.2295227050781, + "learning_rate": 5.655948548341936e-06, + "loss": 62.1644, + "step": 194780 + }, + { + "epoch": 0.7869762480960903, + "grad_norm": 1021.6456298828125, + "learning_rate": 5.654002674697885e-06, + "loss": 73.3357, + "step": 194790 + }, + { + "epoch": 0.787016649361458, + "grad_norm": 844.8771362304688, + "learning_rate": 5.6520570807364306e-06, + "loss": 58.209, + "step": 194800 + }, + { + "epoch": 0.7870570506268256, + "grad_norm": 968.1404418945312, + "learning_rate": 5.650111766495509e-06, + "loss": 82.8765, + "step": 194810 + }, + { + "epoch": 0.7870974518921933, + "grad_norm": 736.2339477539062, + "learning_rate": 5.648166732013057e-06, + "loss": 72.6991, + "step": 194820 + }, + { + "epoch": 0.7871378531575609, + "grad_norm": 416.2422790527344, + "learning_rate": 5.646221977326966e-06, + "loss": 72.0906, + "step": 194830 + }, + { + "epoch": 0.7871782544229285, + "grad_norm": 763.305908203125, + "learning_rate": 5.6442775024751815e-06, + "loss": 87.6719, + "step": 194840 + }, + { + "epoch": 0.7872186556882962, + "grad_norm": 475.3885803222656, + "learning_rate": 5.642333307495586e-06, + "loss": 61.3862, + "step": 194850 + }, + { + "epoch": 0.7872590569536638, + "grad_norm": 326.9962158203125, + "learning_rate": 5.640389392426107e-06, + "loss": 80.0421, + "step": 194860 + }, + { + "epoch": 0.7872994582190315, + "grad_norm": 720.7672119140625, + "learning_rate": 5.638445757304623e-06, + "loss": 66.7757, + "step": 194870 + }, + { + "epoch": 0.787339859484399, + "grad_norm": 407.8500061035156, + "learning_rate": 5.636502402169033e-06, + "loss": 59.3142, + "step": 194880 + }, + { + "epoch": 0.7873802607497666, + "grad_norm": 880.2608032226562, + "learning_rate": 5.6345593270572295e-06, + "loss": 98.3491, + "step": 194890 + }, + { + "epoch": 0.7874206620151343, + "grad_norm": 288.4773254394531, + "learning_rate": 5.632616532007082e-06, + "loss": 72.9185, + "step": 194900 + }, + { + "epoch": 0.7874610632805019, + "grad_norm": 573.1043090820312, + "learning_rate": 5.630674017056472e-06, + "loss": 94.7093, + "step": 194910 + }, + { + "epoch": 0.7875014645458696, + "grad_norm": 457.56683349609375, + "learning_rate": 5.628731782243268e-06, + "loss": 69.4214, + "step": 194920 + }, + { + "epoch": 0.7875418658112372, + "grad_norm": 753.3097534179688, + "learning_rate": 5.6267898276053386e-06, + "loss": 38.2414, + "step": 194930 + }, + { + "epoch": 0.7875822670766048, + "grad_norm": 1078.5167236328125, + "learning_rate": 5.624848153180542e-06, + "loss": 76.571, + "step": 194940 + }, + { + "epoch": 0.7876226683419725, + "grad_norm": 1090.159423828125, + "learning_rate": 5.622906759006737e-06, + "loss": 55.4463, + "step": 194950 + }, + { + "epoch": 0.7876630696073401, + "grad_norm": 360.18017578125, + "learning_rate": 5.620965645121758e-06, + "loss": 32.4792, + "step": 194960 + }, + { + "epoch": 0.7877034708727078, + "grad_norm": 463.46624755859375, + "learning_rate": 5.61902481156346e-06, + "loss": 54.2287, + "step": 194970 + }, + { + "epoch": 0.7877438721380754, + "grad_norm": 754.9583129882812, + "learning_rate": 5.617084258369683e-06, + "loss": 46.0771, + "step": 194980 + }, + { + "epoch": 0.787784273403443, + "grad_norm": 625.06396484375, + "learning_rate": 5.615143985578239e-06, + "loss": 58.6966, + "step": 194990 + }, + { + "epoch": 0.7878246746688107, + "grad_norm": 334.9090270996094, + "learning_rate": 5.613203993226981e-06, + "loss": 67.8821, + "step": 195000 + }, + { + "epoch": 0.7878650759341782, + "grad_norm": 552.6209716796875, + "learning_rate": 5.611264281353708e-06, + "loss": 65.5341, + "step": 195010 + }, + { + "epoch": 0.7879054771995458, + "grad_norm": 662.0979614257812, + "learning_rate": 5.609324849996256e-06, + "loss": 54.1424, + "step": 195020 + }, + { + "epoch": 0.7879458784649135, + "grad_norm": 792.7586059570312, + "learning_rate": 5.60738569919242e-06, + "loss": 71.4793, + "step": 195030 + }, + { + "epoch": 0.7879862797302811, + "grad_norm": 1134.9969482421875, + "learning_rate": 5.6054468289800076e-06, + "loss": 85.0288, + "step": 195040 + }, + { + "epoch": 0.7880266809956488, + "grad_norm": 721.7567749023438, + "learning_rate": 5.603508239396829e-06, + "loss": 111.1969, + "step": 195050 + }, + { + "epoch": 0.7880670822610164, + "grad_norm": 431.5768737792969, + "learning_rate": 5.601569930480661e-06, + "loss": 72.1602, + "step": 195060 + }, + { + "epoch": 0.788107483526384, + "grad_norm": 452.9695129394531, + "learning_rate": 5.5996319022693e-06, + "loss": 71.976, + "step": 195070 + }, + { + "epoch": 0.7881478847917517, + "grad_norm": 531.107421875, + "learning_rate": 5.59769415480053e-06, + "loss": 45.7043, + "step": 195080 + }, + { + "epoch": 0.7881882860571193, + "grad_norm": 577.4664916992188, + "learning_rate": 5.595756688112126e-06, + "loss": 63.4685, + "step": 195090 + }, + { + "epoch": 0.788228687322487, + "grad_norm": 565.5504760742188, + "learning_rate": 5.593819502241862e-06, + "loss": 72.5533, + "step": 195100 + }, + { + "epoch": 0.7882690885878546, + "grad_norm": 281.1690673828125, + "learning_rate": 5.591882597227509e-06, + "loss": 54.5582, + "step": 195110 + }, + { + "epoch": 0.7883094898532222, + "grad_norm": 1111.865234375, + "learning_rate": 5.589945973106812e-06, + "loss": 106.0911, + "step": 195120 + }, + { + "epoch": 0.7883498911185899, + "grad_norm": 695.4479370117188, + "learning_rate": 5.588009629917548e-06, + "loss": 66.8044, + "step": 195130 + }, + { + "epoch": 0.7883902923839574, + "grad_norm": 604.8488159179688, + "learning_rate": 5.58607356769745e-06, + "loss": 69.4199, + "step": 195140 + }, + { + "epoch": 0.788430693649325, + "grad_norm": 765.1904296875, + "learning_rate": 5.5841377864842675e-06, + "loss": 91.0633, + "step": 195150 + }, + { + "epoch": 0.7884710949146927, + "grad_norm": 311.0297546386719, + "learning_rate": 5.582202286315747e-06, + "loss": 78.7439, + "step": 195160 + }, + { + "epoch": 0.7885114961800603, + "grad_norm": 612.4523315429688, + "learning_rate": 5.580267067229606e-06, + "loss": 108.3362, + "step": 195170 + }, + { + "epoch": 0.788551897445428, + "grad_norm": 1559.901123046875, + "learning_rate": 5.578332129263593e-06, + "loss": 89.5055, + "step": 195180 + }, + { + "epoch": 0.7885922987107956, + "grad_norm": 445.1541748046875, + "learning_rate": 5.576397472455413e-06, + "loss": 64.7981, + "step": 195190 + }, + { + "epoch": 0.7886326999761633, + "grad_norm": 690.4293212890625, + "learning_rate": 5.574463096842791e-06, + "loss": 69.845, + "step": 195200 + }, + { + "epoch": 0.7886731012415309, + "grad_norm": 744.034912109375, + "learning_rate": 5.572529002463438e-06, + "loss": 65.1012, + "step": 195210 + }, + { + "epoch": 0.7887135025068985, + "grad_norm": 303.5430908203125, + "learning_rate": 5.570595189355066e-06, + "loss": 44.4234, + "step": 195220 + }, + { + "epoch": 0.7887539037722662, + "grad_norm": 836.3827514648438, + "learning_rate": 5.568661657555361e-06, + "loss": 50.2298, + "step": 195230 + }, + { + "epoch": 0.7887943050376338, + "grad_norm": 733.35009765625, + "learning_rate": 5.566728407102029e-06, + "loss": 67.2798, + "step": 195240 + }, + { + "epoch": 0.7888347063030015, + "grad_norm": 773.5714111328125, + "learning_rate": 5.5647954380327576e-06, + "loss": 71.181, + "step": 195250 + }, + { + "epoch": 0.7888751075683691, + "grad_norm": 512.0818481445312, + "learning_rate": 5.562862750385229e-06, + "loss": 71.0401, + "step": 195260 + }, + { + "epoch": 0.7889155088337366, + "grad_norm": 952.4354858398438, + "learning_rate": 5.560930344197133e-06, + "loss": 72.4534, + "step": 195270 + }, + { + "epoch": 0.7889559100991043, + "grad_norm": 1181.0841064453125, + "learning_rate": 5.55899821950612e-06, + "loss": 96.8383, + "step": 195280 + }, + { + "epoch": 0.7889963113644719, + "grad_norm": 937.505859375, + "learning_rate": 5.557066376349882e-06, + "loss": 72.1268, + "step": 195290 + }, + { + "epoch": 0.7890367126298395, + "grad_norm": 262.8683166503906, + "learning_rate": 5.5551348147660675e-06, + "loss": 82.6183, + "step": 195300 + }, + { + "epoch": 0.7890771138952072, + "grad_norm": 715.1094970703125, + "learning_rate": 5.553203534792335e-06, + "loss": 52.7838, + "step": 195310 + }, + { + "epoch": 0.7891175151605748, + "grad_norm": 547.092529296875, + "learning_rate": 5.551272536466343e-06, + "loss": 54.2569, + "step": 195320 + }, + { + "epoch": 0.7891579164259425, + "grad_norm": 955.89697265625, + "learning_rate": 5.54934181982572e-06, + "loss": 86.4374, + "step": 195330 + }, + { + "epoch": 0.7891983176913101, + "grad_norm": 386.8194885253906, + "learning_rate": 5.5474113849081305e-06, + "loss": 62.761, + "step": 195340 + }, + { + "epoch": 0.7892387189566777, + "grad_norm": 422.8041687011719, + "learning_rate": 5.5454812317511885e-06, + "loss": 71.7943, + "step": 195350 + }, + { + "epoch": 0.7892791202220454, + "grad_norm": 789.3390502929688, + "learning_rate": 5.543551360392534e-06, + "loss": 58.6011, + "step": 195360 + }, + { + "epoch": 0.789319521487413, + "grad_norm": 465.9772033691406, + "learning_rate": 5.541621770869787e-06, + "loss": 73.316, + "step": 195370 + }, + { + "epoch": 0.7893599227527807, + "grad_norm": 676.7305297851562, + "learning_rate": 5.539692463220567e-06, + "loss": 73.2003, + "step": 195380 + }, + { + "epoch": 0.7894003240181483, + "grad_norm": 817.5529174804688, + "learning_rate": 5.537763437482495e-06, + "loss": 51.8518, + "step": 195390 + }, + { + "epoch": 0.7894407252835158, + "grad_norm": 567.8902587890625, + "learning_rate": 5.535834693693163e-06, + "loss": 46.1014, + "step": 195400 + }, + { + "epoch": 0.7894811265488835, + "grad_norm": 875.5498046875, + "learning_rate": 5.533906231890181e-06, + "loss": 33.825, + "step": 195410 + }, + { + "epoch": 0.7895215278142511, + "grad_norm": 717.0527954101562, + "learning_rate": 5.5319780521111444e-06, + "loss": 62.5788, + "step": 195420 + }, + { + "epoch": 0.7895619290796188, + "grad_norm": 1102.947021484375, + "learning_rate": 5.530050154393649e-06, + "loss": 83.2981, + "step": 195430 + }, + { + "epoch": 0.7896023303449864, + "grad_norm": 270.22222900390625, + "learning_rate": 5.5281225387752646e-06, + "loss": 67.2055, + "step": 195440 + }, + { + "epoch": 0.789642731610354, + "grad_norm": 1395.4488525390625, + "learning_rate": 5.526195205293594e-06, + "loss": 76.5142, + "step": 195450 + }, + { + "epoch": 0.7896831328757217, + "grad_norm": 577.6397094726562, + "learning_rate": 5.5242681539861875e-06, + "loss": 84.9792, + "step": 195460 + }, + { + "epoch": 0.7897235341410893, + "grad_norm": 1774.780029296875, + "learning_rate": 5.52234138489064e-06, + "loss": 105.5032, + "step": 195470 + }, + { + "epoch": 0.789763935406457, + "grad_norm": 613.2858276367188, + "learning_rate": 5.520414898044493e-06, + "loss": 69.2838, + "step": 195480 + }, + { + "epoch": 0.7898043366718246, + "grad_norm": 646.5277099609375, + "learning_rate": 5.518488693485311e-06, + "loss": 84.1104, + "step": 195490 + }, + { + "epoch": 0.7898447379371922, + "grad_norm": 378.3345947265625, + "learning_rate": 5.516562771250655e-06, + "loss": 52.2742, + "step": 195500 + }, + { + "epoch": 0.7898851392025599, + "grad_norm": 808.4698486328125, + "learning_rate": 5.514637131378058e-06, + "loss": 51.9317, + "step": 195510 + }, + { + "epoch": 0.7899255404679274, + "grad_norm": 1005.7137451171875, + "learning_rate": 5.5127117739050666e-06, + "loss": 71.1159, + "step": 195520 + }, + { + "epoch": 0.789965941733295, + "grad_norm": 963.409423828125, + "learning_rate": 5.51078669886922e-06, + "loss": 72.5832, + "step": 195530 + }, + { + "epoch": 0.7900063429986627, + "grad_norm": 658.0546875, + "learning_rate": 5.5088619063080426e-06, + "loss": 70.2935, + "step": 195540 + }, + { + "epoch": 0.7900467442640303, + "grad_norm": 1335.7430419921875, + "learning_rate": 5.506937396259067e-06, + "loss": 85.2417, + "step": 195550 + }, + { + "epoch": 0.790087145529398, + "grad_norm": 958.788330078125, + "learning_rate": 5.5050131687598116e-06, + "loss": 81.5646, + "step": 195560 + }, + { + "epoch": 0.7901275467947656, + "grad_norm": 1104.074951171875, + "learning_rate": 5.503089223847782e-06, + "loss": 90.1085, + "step": 195570 + }, + { + "epoch": 0.7901679480601332, + "grad_norm": 3557.441162109375, + "learning_rate": 5.501165561560491e-06, + "loss": 108.7318, + "step": 195580 + }, + { + "epoch": 0.7902083493255009, + "grad_norm": 909.9269409179688, + "learning_rate": 5.499242181935449e-06, + "loss": 72.7428, + "step": 195590 + }, + { + "epoch": 0.7902487505908685, + "grad_norm": 870.8326416015625, + "learning_rate": 5.4973190850101334e-06, + "loss": 48.1317, + "step": 195600 + }, + { + "epoch": 0.7902891518562362, + "grad_norm": 312.474609375, + "learning_rate": 5.49539627082206e-06, + "loss": 46.6865, + "step": 195610 + }, + { + "epoch": 0.7903295531216038, + "grad_norm": 541.4234619140625, + "learning_rate": 5.493473739408692e-06, + "loss": 54.0819, + "step": 195620 + }, + { + "epoch": 0.7903699543869714, + "grad_norm": 634.0132446289062, + "learning_rate": 5.491551490807536e-06, + "loss": 45.8342, + "step": 195630 + }, + { + "epoch": 0.7904103556523391, + "grad_norm": 1292.65869140625, + "learning_rate": 5.489629525056044e-06, + "loss": 101.3481, + "step": 195640 + }, + { + "epoch": 0.7904507569177066, + "grad_norm": 827.0450439453125, + "learning_rate": 5.4877078421916965e-06, + "loss": 65.7223, + "step": 195650 + }, + { + "epoch": 0.7904911581830742, + "grad_norm": 327.7843322753906, + "learning_rate": 5.485786442251963e-06, + "loss": 65.4434, + "step": 195660 + }, + { + "epoch": 0.7905315594484419, + "grad_norm": 1221.4622802734375, + "learning_rate": 5.483865325274289e-06, + "loss": 73.2456, + "step": 195670 + }, + { + "epoch": 0.7905719607138095, + "grad_norm": 829.7383422851562, + "learning_rate": 5.481944491296132e-06, + "loss": 64.3684, + "step": 195680 + }, + { + "epoch": 0.7906123619791772, + "grad_norm": 339.604248046875, + "learning_rate": 5.480023940354944e-06, + "loss": 71.1187, + "step": 195690 + }, + { + "epoch": 0.7906527632445448, + "grad_norm": 788.55419921875, + "learning_rate": 5.478103672488162e-06, + "loss": 92.1096, + "step": 195700 + }, + { + "epoch": 0.7906931645099124, + "grad_norm": 922.0347900390625, + "learning_rate": 5.476183687733227e-06, + "loss": 65.9476, + "step": 195710 + }, + { + "epoch": 0.7907335657752801, + "grad_norm": 1067.5355224609375, + "learning_rate": 5.474263986127575e-06, + "loss": 50.0337, + "step": 195720 + }, + { + "epoch": 0.7907739670406477, + "grad_norm": 436.9764404296875, + "learning_rate": 5.472344567708616e-06, + "loss": 34.4781, + "step": 195730 + }, + { + "epoch": 0.7908143683060154, + "grad_norm": 322.6572265625, + "learning_rate": 5.4704254325137815e-06, + "loss": 68.9489, + "step": 195740 + }, + { + "epoch": 0.790854769571383, + "grad_norm": 876.14208984375, + "learning_rate": 5.4685065805804835e-06, + "loss": 73.4928, + "step": 195750 + }, + { + "epoch": 0.7908951708367506, + "grad_norm": 1138.8128662109375, + "learning_rate": 5.46658801194613e-06, + "loss": 65.6377, + "step": 195760 + }, + { + "epoch": 0.7909355721021183, + "grad_norm": 2609.007080078125, + "learning_rate": 5.464669726648133e-06, + "loss": 54.5025, + "step": 195770 + }, + { + "epoch": 0.7909759733674858, + "grad_norm": 983.99462890625, + "learning_rate": 5.4627517247238695e-06, + "loss": 64.5997, + "step": 195780 + }, + { + "epoch": 0.7910163746328535, + "grad_norm": 652.0485229492188, + "learning_rate": 5.460834006210758e-06, + "loss": 73.4926, + "step": 195790 + }, + { + "epoch": 0.7910567758982211, + "grad_norm": 696.356201171875, + "learning_rate": 5.458916571146167e-06, + "loss": 67.8603, + "step": 195800 + }, + { + "epoch": 0.7910971771635887, + "grad_norm": 1168.8751220703125, + "learning_rate": 5.456999419567482e-06, + "loss": 87.0765, + "step": 195810 + }, + { + "epoch": 0.7911375784289564, + "grad_norm": 539.7451171875, + "learning_rate": 5.4550825515120835e-06, + "loss": 68.5614, + "step": 195820 + }, + { + "epoch": 0.791177979694324, + "grad_norm": 423.9830627441406, + "learning_rate": 5.453165967017335e-06, + "loss": 70.4154, + "step": 195830 + }, + { + "epoch": 0.7912183809596917, + "grad_norm": 618.4097290039062, + "learning_rate": 5.451249666120615e-06, + "loss": 58.1139, + "step": 195840 + }, + { + "epoch": 0.7912587822250593, + "grad_norm": 736.8831176757812, + "learning_rate": 5.4493336488592655e-06, + "loss": 68.1197, + "step": 195850 + }, + { + "epoch": 0.7912991834904269, + "grad_norm": 480.24163818359375, + "learning_rate": 5.447417915270648e-06, + "loss": 52.9726, + "step": 195860 + }, + { + "epoch": 0.7913395847557946, + "grad_norm": 864.5050048828125, + "learning_rate": 5.44550246539211e-06, + "loss": 92.4271, + "step": 195870 + }, + { + "epoch": 0.7913799860211622, + "grad_norm": 503.1786193847656, + "learning_rate": 5.443587299260999e-06, + "loss": 64.757, + "step": 195880 + }, + { + "epoch": 0.7914203872865299, + "grad_norm": 470.47216796875, + "learning_rate": 5.441672416914639e-06, + "loss": 58.4325, + "step": 195890 + }, + { + "epoch": 0.7914607885518975, + "grad_norm": 386.7495422363281, + "learning_rate": 5.439757818390381e-06, + "loss": 51.1474, + "step": 195900 + }, + { + "epoch": 0.791501189817265, + "grad_norm": 942.6947631835938, + "learning_rate": 5.437843503725533e-06, + "loss": 57.2752, + "step": 195910 + }, + { + "epoch": 0.7915415910826327, + "grad_norm": 1066.5789794921875, + "learning_rate": 5.435929472957424e-06, + "loss": 58.0471, + "step": 195920 + }, + { + "epoch": 0.7915819923480003, + "grad_norm": 657.4696044921875, + "learning_rate": 5.434015726123374e-06, + "loss": 58.0279, + "step": 195930 + }, + { + "epoch": 0.791622393613368, + "grad_norm": 999.9287719726562, + "learning_rate": 5.432102263260675e-06, + "loss": 98.456, + "step": 195940 + }, + { + "epoch": 0.7916627948787356, + "grad_norm": 1053.19580078125, + "learning_rate": 5.430189084406654e-06, + "loss": 55.6314, + "step": 195950 + }, + { + "epoch": 0.7917031961441032, + "grad_norm": 480.54266357421875, + "learning_rate": 5.428276189598593e-06, + "loss": 62.6137, + "step": 195960 + }, + { + "epoch": 0.7917435974094709, + "grad_norm": 680.0366821289062, + "learning_rate": 5.426363578873788e-06, + "loss": 71.7237, + "step": 195970 + }, + { + "epoch": 0.7917839986748385, + "grad_norm": 315.5231018066406, + "learning_rate": 5.424451252269529e-06, + "loss": 58.6494, + "step": 195980 + }, + { + "epoch": 0.7918243999402061, + "grad_norm": 548.6585693359375, + "learning_rate": 5.422539209823097e-06, + "loss": 56.2233, + "step": 195990 + }, + { + "epoch": 0.7918648012055738, + "grad_norm": 1000.8866577148438, + "learning_rate": 5.4206274515717735e-06, + "loss": 102.5495, + "step": 196000 + }, + { + "epoch": 0.7919052024709414, + "grad_norm": 1363.3798828125, + "learning_rate": 5.418715977552818e-06, + "loss": 59.2995, + "step": 196010 + }, + { + "epoch": 0.7919456037363091, + "grad_norm": 591.3885498046875, + "learning_rate": 5.416804787803502e-06, + "loss": 64.6508, + "step": 196020 + }, + { + "epoch": 0.7919860050016767, + "grad_norm": 1110.771240234375, + "learning_rate": 5.4148938823610834e-06, + "loss": 44.1233, + "step": 196030 + }, + { + "epoch": 0.7920264062670442, + "grad_norm": 384.07965087890625, + "learning_rate": 5.412983261262825e-06, + "loss": 32.9271, + "step": 196040 + }, + { + "epoch": 0.7920668075324119, + "grad_norm": 1105.742919921875, + "learning_rate": 5.4110729245459544e-06, + "loss": 57.4839, + "step": 196050 + }, + { + "epoch": 0.7921072087977795, + "grad_norm": 919.4006958007812, + "learning_rate": 5.40916287224774e-06, + "loss": 50.5678, + "step": 196060 + }, + { + "epoch": 0.7921476100631472, + "grad_norm": 746.574951171875, + "learning_rate": 5.4072531044054035e-06, + "loss": 78.1685, + "step": 196070 + }, + { + "epoch": 0.7921880113285148, + "grad_norm": 781.1284790039062, + "learning_rate": 5.405343621056178e-06, + "loss": 70.9818, + "step": 196080 + }, + { + "epoch": 0.7922284125938824, + "grad_norm": 1024.7418212890625, + "learning_rate": 5.403434422237292e-06, + "loss": 105.6784, + "step": 196090 + }, + { + "epoch": 0.7922688138592501, + "grad_norm": 532.6754150390625, + "learning_rate": 5.401525507985969e-06, + "loss": 74.8222, + "step": 196100 + }, + { + "epoch": 0.7923092151246177, + "grad_norm": 265.1700134277344, + "learning_rate": 5.3996168783394285e-06, + "loss": 65.7714, + "step": 196110 + }, + { + "epoch": 0.7923496163899854, + "grad_norm": 569.279052734375, + "learning_rate": 5.397708533334865e-06, + "loss": 99.9478, + "step": 196120 + }, + { + "epoch": 0.792390017655353, + "grad_norm": 981.3369750976562, + "learning_rate": 5.395800473009494e-06, + "loss": 103.103, + "step": 196130 + }, + { + "epoch": 0.7924304189207206, + "grad_norm": 789.4158935546875, + "learning_rate": 5.393892697400509e-06, + "loss": 74.3333, + "step": 196140 + }, + { + "epoch": 0.7924708201860883, + "grad_norm": 752.9960327148438, + "learning_rate": 5.391985206545107e-06, + "loss": 77.9106, + "step": 196150 + }, + { + "epoch": 0.7925112214514558, + "grad_norm": 1053.7432861328125, + "learning_rate": 5.3900780004804745e-06, + "loss": 89.4792, + "step": 196160 + }, + { + "epoch": 0.7925516227168234, + "grad_norm": 873.7487182617188, + "learning_rate": 5.388171079243798e-06, + "loss": 65.0702, + "step": 196170 + }, + { + "epoch": 0.7925920239821911, + "grad_norm": 785.3327026367188, + "learning_rate": 5.386264442872244e-06, + "loss": 82.0844, + "step": 196180 + }, + { + "epoch": 0.7926324252475587, + "grad_norm": 726.8406982421875, + "learning_rate": 5.384358091402988e-06, + "loss": 81.258, + "step": 196190 + }, + { + "epoch": 0.7926728265129264, + "grad_norm": 372.0096740722656, + "learning_rate": 5.382452024873201e-06, + "loss": 62.4877, + "step": 196200 + }, + { + "epoch": 0.792713227778294, + "grad_norm": 554.6402587890625, + "learning_rate": 5.3805462433200265e-06, + "loss": 70.7371, + "step": 196210 + }, + { + "epoch": 0.7927536290436616, + "grad_norm": 820.2308959960938, + "learning_rate": 5.3786407467806415e-06, + "loss": 68.4055, + "step": 196220 + }, + { + "epoch": 0.7927940303090293, + "grad_norm": 638.0755615234375, + "learning_rate": 5.37673553529217e-06, + "loss": 62.7139, + "step": 196230 + }, + { + "epoch": 0.7928344315743969, + "grad_norm": 724.6431884765625, + "learning_rate": 5.3748306088917815e-06, + "loss": 35.1821, + "step": 196240 + }, + { + "epoch": 0.7928748328397646, + "grad_norm": 1183.3958740234375, + "learning_rate": 5.372925967616591e-06, + "loss": 77.0474, + "step": 196250 + }, + { + "epoch": 0.7929152341051322, + "grad_norm": 416.2100524902344, + "learning_rate": 5.371021611503742e-06, + "loss": 63.1517, + "step": 196260 + }, + { + "epoch": 0.7929556353704998, + "grad_norm": 702.9608154296875, + "learning_rate": 5.369117540590363e-06, + "loss": 56.2893, + "step": 196270 + }, + { + "epoch": 0.7929960366358675, + "grad_norm": 810.3818359375, + "learning_rate": 5.367213754913563e-06, + "loss": 54.7855, + "step": 196280 + }, + { + "epoch": 0.793036437901235, + "grad_norm": 948.7098999023438, + "learning_rate": 5.3653102545104655e-06, + "loss": 70.317, + "step": 196290 + }, + { + "epoch": 0.7930768391666027, + "grad_norm": 679.0529174804688, + "learning_rate": 5.3634070394181785e-06, + "loss": 82.8233, + "step": 196300 + }, + { + "epoch": 0.7931172404319703, + "grad_norm": 804.6410522460938, + "learning_rate": 5.361504109673808e-06, + "loss": 81.3745, + "step": 196310 + }, + { + "epoch": 0.7931576416973379, + "grad_norm": 785.44140625, + "learning_rate": 5.35960146531445e-06, + "loss": 96.5606, + "step": 196320 + }, + { + "epoch": 0.7931980429627056, + "grad_norm": 1010.595703125, + "learning_rate": 5.357699106377205e-06, + "loss": 74.4034, + "step": 196330 + }, + { + "epoch": 0.7932384442280732, + "grad_norm": 393.8507385253906, + "learning_rate": 5.355797032899148e-06, + "loss": 68.0817, + "step": 196340 + }, + { + "epoch": 0.7932788454934409, + "grad_norm": 719.6611328125, + "learning_rate": 5.353895244917366e-06, + "loss": 58.9353, + "step": 196350 + }, + { + "epoch": 0.7933192467588085, + "grad_norm": 568.1353149414062, + "learning_rate": 5.35199374246894e-06, + "loss": 64.9624, + "step": 196360 + }, + { + "epoch": 0.7933596480241761, + "grad_norm": 618.6283569335938, + "learning_rate": 5.350092525590936e-06, + "loss": 126.3685, + "step": 196370 + }, + { + "epoch": 0.7934000492895438, + "grad_norm": 863.833251953125, + "learning_rate": 5.348191594320425e-06, + "loss": 67.7844, + "step": 196380 + }, + { + "epoch": 0.7934404505549114, + "grad_norm": 488.93756103515625, + "learning_rate": 5.346290948694451e-06, + "loss": 73.6491, + "step": 196390 + }, + { + "epoch": 0.793480851820279, + "grad_norm": 911.6883544921875, + "learning_rate": 5.34439058875009e-06, + "loss": 50.9137, + "step": 196400 + }, + { + "epoch": 0.7935212530856467, + "grad_norm": 213.65045166015625, + "learning_rate": 5.342490514524376e-06, + "loss": 44.6143, + "step": 196410 + }, + { + "epoch": 0.7935616543510142, + "grad_norm": 671.77001953125, + "learning_rate": 5.340590726054353e-06, + "loss": 70.0193, + "step": 196420 + }, + { + "epoch": 0.7936020556163819, + "grad_norm": 301.10552978515625, + "learning_rate": 5.338691223377064e-06, + "loss": 40.0894, + "step": 196430 + }, + { + "epoch": 0.7936424568817495, + "grad_norm": 617.1746826171875, + "learning_rate": 5.3367920065295365e-06, + "loss": 67.9977, + "step": 196440 + }, + { + "epoch": 0.7936828581471171, + "grad_norm": 837.4561157226562, + "learning_rate": 5.334893075548804e-06, + "loss": 107.5108, + "step": 196450 + }, + { + "epoch": 0.7937232594124848, + "grad_norm": 768.6305541992188, + "learning_rate": 5.3329944304718735e-06, + "loss": 69.2948, + "step": 196460 + }, + { + "epoch": 0.7937636606778524, + "grad_norm": 676.1723022460938, + "learning_rate": 5.3310960713357684e-06, + "loss": 40.1371, + "step": 196470 + }, + { + "epoch": 0.7938040619432201, + "grad_norm": 467.5173034667969, + "learning_rate": 5.329197998177498e-06, + "loss": 71.6882, + "step": 196480 + }, + { + "epoch": 0.7938444632085877, + "grad_norm": 404.9355163574219, + "learning_rate": 5.327300211034072e-06, + "loss": 48.1484, + "step": 196490 + }, + { + "epoch": 0.7938848644739553, + "grad_norm": 677.6632080078125, + "learning_rate": 5.325402709942471e-06, + "loss": 57.1935, + "step": 196500 + }, + { + "epoch": 0.793925265739323, + "grad_norm": 791.7833251953125, + "learning_rate": 5.323505494939709e-06, + "loss": 59.4461, + "step": 196510 + }, + { + "epoch": 0.7939656670046906, + "grad_norm": 368.07110595703125, + "learning_rate": 5.321608566062759e-06, + "loss": 67.4957, + "step": 196520 + }, + { + "epoch": 0.7940060682700583, + "grad_norm": 419.4300537109375, + "learning_rate": 5.319711923348607e-06, + "loss": 60.1967, + "step": 196530 + }, + { + "epoch": 0.7940464695354259, + "grad_norm": 862.3035888671875, + "learning_rate": 5.317815566834234e-06, + "loss": 79.8188, + "step": 196540 + }, + { + "epoch": 0.7940868708007934, + "grad_norm": 840.2998046875, + "learning_rate": 5.3159194965565945e-06, + "loss": 60.4577, + "step": 196550 + }, + { + "epoch": 0.7941272720661611, + "grad_norm": 495.64105224609375, + "learning_rate": 5.314023712552676e-06, + "loss": 91.2407, + "step": 196560 + }, + { + "epoch": 0.7941676733315287, + "grad_norm": 594.6430053710938, + "learning_rate": 5.31212821485942e-06, + "loss": 45.3386, + "step": 196570 + }, + { + "epoch": 0.7942080745968964, + "grad_norm": 584.9028930664062, + "learning_rate": 5.310233003513785e-06, + "loss": 49.6531, + "step": 196580 + }, + { + "epoch": 0.794248475862264, + "grad_norm": 1396.327392578125, + "learning_rate": 5.308338078552722e-06, + "loss": 83.4711, + "step": 196590 + }, + { + "epoch": 0.7942888771276316, + "grad_norm": 758.5535888671875, + "learning_rate": 5.306443440013171e-06, + "loss": 69.8248, + "step": 196600 + }, + { + "epoch": 0.7943292783929993, + "grad_norm": 970.44775390625, + "learning_rate": 5.304549087932076e-06, + "loss": 54.2326, + "step": 196610 + }, + { + "epoch": 0.7943696796583669, + "grad_norm": 392.916015625, + "learning_rate": 5.302655022346357e-06, + "loss": 56.2589, + "step": 196620 + }, + { + "epoch": 0.7944100809237346, + "grad_norm": 1200.5804443359375, + "learning_rate": 5.300761243292945e-06, + "loss": 63.1016, + "step": 196630 + }, + { + "epoch": 0.7944504821891022, + "grad_norm": 595.2494506835938, + "learning_rate": 5.298867750808758e-06, + "loss": 68.7543, + "step": 196640 + }, + { + "epoch": 0.7944908834544698, + "grad_norm": 215.77952575683594, + "learning_rate": 5.296974544930722e-06, + "loss": 71.6075, + "step": 196650 + }, + { + "epoch": 0.7945312847198375, + "grad_norm": 1086.1199951171875, + "learning_rate": 5.295081625695724e-06, + "loss": 75.1514, + "step": 196660 + }, + { + "epoch": 0.7945716859852051, + "grad_norm": 847.3512573242188, + "learning_rate": 5.293188993140692e-06, + "loss": 102.1609, + "step": 196670 + }, + { + "epoch": 0.7946120872505726, + "grad_norm": 417.1792907714844, + "learning_rate": 5.291296647302504e-06, + "loss": 40.2056, + "step": 196680 + }, + { + "epoch": 0.7946524885159403, + "grad_norm": 496.6617126464844, + "learning_rate": 5.289404588218063e-06, + "loss": 68.574, + "step": 196690 + }, + { + "epoch": 0.7946928897813079, + "grad_norm": 621.0296020507812, + "learning_rate": 5.287512815924254e-06, + "loss": 70.8554, + "step": 196700 + }, + { + "epoch": 0.7947332910466756, + "grad_norm": 615.9517822265625, + "learning_rate": 5.285621330457955e-06, + "loss": 59.1375, + "step": 196710 + }, + { + "epoch": 0.7947736923120432, + "grad_norm": 534.3119506835938, + "learning_rate": 5.2837301318560505e-06, + "loss": 50.4958, + "step": 196720 + }, + { + "epoch": 0.7948140935774108, + "grad_norm": 617.869140625, + "learning_rate": 5.281839220155396e-06, + "loss": 94.4089, + "step": 196730 + }, + { + "epoch": 0.7948544948427785, + "grad_norm": 794.46923828125, + "learning_rate": 5.279948595392865e-06, + "loss": 53.0094, + "step": 196740 + }, + { + "epoch": 0.7948948961081461, + "grad_norm": 562.4744873046875, + "learning_rate": 5.278058257605314e-06, + "loss": 50.6858, + "step": 196750 + }, + { + "epoch": 0.7949352973735138, + "grad_norm": 1524.5927734375, + "learning_rate": 5.276168206829597e-06, + "loss": 78.4505, + "step": 196760 + }, + { + "epoch": 0.7949756986388814, + "grad_norm": 574.8984985351562, + "learning_rate": 5.274278443102561e-06, + "loss": 58.209, + "step": 196770 + }, + { + "epoch": 0.795016099904249, + "grad_norm": 607.8275756835938, + "learning_rate": 5.272388966461055e-06, + "loss": 63.8901, + "step": 196780 + }, + { + "epoch": 0.7950565011696167, + "grad_norm": 742.5903930664062, + "learning_rate": 5.2704997769419e-06, + "loss": 39.5754, + "step": 196790 + }, + { + "epoch": 0.7950969024349842, + "grad_norm": 667.3244018554688, + "learning_rate": 5.268610874581936e-06, + "loss": 64.0746, + "step": 196800 + }, + { + "epoch": 0.7951373037003518, + "grad_norm": 390.8659362792969, + "learning_rate": 5.266722259417996e-06, + "loss": 91.536, + "step": 196810 + }, + { + "epoch": 0.7951777049657195, + "grad_norm": 290.2196960449219, + "learning_rate": 5.264833931486875e-06, + "loss": 66.6126, + "step": 196820 + }, + { + "epoch": 0.7952181062310871, + "grad_norm": 470.4638977050781, + "learning_rate": 5.262945890825417e-06, + "loss": 74.1133, + "step": 196830 + }, + { + "epoch": 0.7952585074964548, + "grad_norm": 433.9496765136719, + "learning_rate": 5.261058137470403e-06, + "loss": 61.4028, + "step": 196840 + }, + { + "epoch": 0.7952989087618224, + "grad_norm": 1175.022705078125, + "learning_rate": 5.25917067145866e-06, + "loss": 64.7309, + "step": 196850 + }, + { + "epoch": 0.79533931002719, + "grad_norm": 458.92626953125, + "learning_rate": 5.2572834928269704e-06, + "loss": 66.7948, + "step": 196860 + }, + { + "epoch": 0.7953797112925577, + "grad_norm": 617.4252319335938, + "learning_rate": 5.255396601612126e-06, + "loss": 43.4264, + "step": 196870 + }, + { + "epoch": 0.7954201125579253, + "grad_norm": 1106.809326171875, + "learning_rate": 5.253509997850923e-06, + "loss": 102.1933, + "step": 196880 + }, + { + "epoch": 0.795460513823293, + "grad_norm": 513.2438354492188, + "learning_rate": 5.251623681580122e-06, + "loss": 64.7417, + "step": 196890 + }, + { + "epoch": 0.7955009150886606, + "grad_norm": 764.281494140625, + "learning_rate": 5.249737652836524e-06, + "loss": 41.3968, + "step": 196900 + }, + { + "epoch": 0.7955413163540282, + "grad_norm": 823.2719116210938, + "learning_rate": 5.247851911656878e-06, + "loss": 61.3331, + "step": 196910 + }, + { + "epoch": 0.7955817176193959, + "grad_norm": 647.1095581054688, + "learning_rate": 5.245966458077954e-06, + "loss": 80.3887, + "step": 196920 + }, + { + "epoch": 0.7956221188847634, + "grad_norm": 761.2973022460938, + "learning_rate": 5.244081292136509e-06, + "loss": 74.4949, + "step": 196930 + }, + { + "epoch": 0.7956625201501311, + "grad_norm": 485.6175231933594, + "learning_rate": 5.242196413869302e-06, + "loss": 61.0986, + "step": 196940 + }, + { + "epoch": 0.7957029214154987, + "grad_norm": 539.3735961914062, + "learning_rate": 5.240311823313069e-06, + "loss": 52.5716, + "step": 196950 + }, + { + "epoch": 0.7957433226808663, + "grad_norm": 739.6612548828125, + "learning_rate": 5.238427520504554e-06, + "loss": 83.5769, + "step": 196960 + }, + { + "epoch": 0.795783723946234, + "grad_norm": 1255.1484375, + "learning_rate": 5.236543505480496e-06, + "loss": 60.5744, + "step": 196970 + }, + { + "epoch": 0.7958241252116016, + "grad_norm": 646.8488159179688, + "learning_rate": 5.234659778277622e-06, + "loss": 55.4763, + "step": 196980 + }, + { + "epoch": 0.7958645264769693, + "grad_norm": 1616.5155029296875, + "learning_rate": 5.232776338932663e-06, + "loss": 66.9615, + "step": 196990 + }, + { + "epoch": 0.7959049277423369, + "grad_norm": 254.66421508789062, + "learning_rate": 5.230893187482322e-06, + "loss": 66.3956, + "step": 197000 + }, + { + "epoch": 0.7959453290077045, + "grad_norm": 941.5853881835938, + "learning_rate": 5.229010323963333e-06, + "loss": 47.9289, + "step": 197010 + }, + { + "epoch": 0.7959857302730722, + "grad_norm": 724.4763793945312, + "learning_rate": 5.227127748412386e-06, + "loss": 66.1019, + "step": 197020 + }, + { + "epoch": 0.7960261315384398, + "grad_norm": 728.1976318359375, + "learning_rate": 5.225245460866188e-06, + "loss": 76.2252, + "step": 197030 + }, + { + "epoch": 0.7960665328038075, + "grad_norm": 582.67236328125, + "learning_rate": 5.2233634613614435e-06, + "loss": 88.8144, + "step": 197040 + }, + { + "epoch": 0.7961069340691751, + "grad_norm": 478.2121887207031, + "learning_rate": 5.221481749934825e-06, + "loss": 70.5305, + "step": 197050 + }, + { + "epoch": 0.7961473353345426, + "grad_norm": 1434.853271484375, + "learning_rate": 5.219600326623038e-06, + "loss": 77.5897, + "step": 197060 + }, + { + "epoch": 0.7961877365999103, + "grad_norm": 461.4397277832031, + "learning_rate": 5.217719191462747e-06, + "loss": 50.6175, + "step": 197070 + }, + { + "epoch": 0.7962281378652779, + "grad_norm": 770.552001953125, + "learning_rate": 5.215838344490631e-06, + "loss": 81.5829, + "step": 197080 + }, + { + "epoch": 0.7962685391306455, + "grad_norm": 722.8850708007812, + "learning_rate": 5.213957785743358e-06, + "loss": 66.1978, + "step": 197090 + }, + { + "epoch": 0.7963089403960132, + "grad_norm": 1009.544189453125, + "learning_rate": 5.212077515257597e-06, + "loss": 47.6926, + "step": 197100 + }, + { + "epoch": 0.7963493416613808, + "grad_norm": 2472.3173828125, + "learning_rate": 5.210197533069985e-06, + "loss": 107.9225, + "step": 197110 + }, + { + "epoch": 0.7963897429267485, + "grad_norm": 828.9481811523438, + "learning_rate": 5.2083178392171985e-06, + "loss": 87.4263, + "step": 197120 + }, + { + "epoch": 0.7964301441921161, + "grad_norm": 928.3444213867188, + "learning_rate": 5.206438433735863e-06, + "loss": 60.6803, + "step": 197130 + }, + { + "epoch": 0.7964705454574837, + "grad_norm": 454.3027038574219, + "learning_rate": 5.20455931666263e-06, + "loss": 47.3043, + "step": 197140 + }, + { + "epoch": 0.7965109467228514, + "grad_norm": 292.7356872558594, + "learning_rate": 5.202680488034135e-06, + "loss": 50.137, + "step": 197150 + }, + { + "epoch": 0.796551347988219, + "grad_norm": 1382.556396484375, + "learning_rate": 5.200801947886989e-06, + "loss": 66.6105, + "step": 197160 + }, + { + "epoch": 0.7965917492535867, + "grad_norm": 709.8524780273438, + "learning_rate": 5.198923696257843e-06, + "loss": 45.6601, + "step": 197170 + }, + { + "epoch": 0.7966321505189543, + "grad_norm": 1106.4617919921875, + "learning_rate": 5.1970457331832905e-06, + "loss": 53.6495, + "step": 197180 + }, + { + "epoch": 0.7966725517843218, + "grad_norm": 1079.3809814453125, + "learning_rate": 5.195168058699953e-06, + "loss": 61.3003, + "step": 197190 + }, + { + "epoch": 0.7967129530496895, + "grad_norm": 839.0560302734375, + "learning_rate": 5.193290672844438e-06, + "loss": 86.0989, + "step": 197200 + }, + { + "epoch": 0.7967533543150571, + "grad_norm": 475.5909423828125, + "learning_rate": 5.191413575653343e-06, + "loss": 67.3112, + "step": 197210 + }, + { + "epoch": 0.7967937555804248, + "grad_norm": 625.6830444335938, + "learning_rate": 5.18953676716327e-06, + "loss": 67.4591, + "step": 197220 + }, + { + "epoch": 0.7968341568457924, + "grad_norm": 636.3223266601562, + "learning_rate": 5.1876602474108e-06, + "loss": 69.3125, + "step": 197230 + }, + { + "epoch": 0.79687455811116, + "grad_norm": 192.79608154296875, + "learning_rate": 5.185784016432515e-06, + "loss": 74.0214, + "step": 197240 + }, + { + "epoch": 0.7969149593765277, + "grad_norm": 836.4407348632812, + "learning_rate": 5.183908074265001e-06, + "loss": 76.8528, + "step": 197250 + }, + { + "epoch": 0.7969553606418953, + "grad_norm": 1117.2548828125, + "learning_rate": 5.182032420944829e-06, + "loss": 85.9573, + "step": 197260 + }, + { + "epoch": 0.796995761907263, + "grad_norm": 705.3391723632812, + "learning_rate": 5.180157056508554e-06, + "loss": 50.0575, + "step": 197270 + }, + { + "epoch": 0.7970361631726306, + "grad_norm": 978.028076171875, + "learning_rate": 5.1782819809927585e-06, + "loss": 68.736, + "step": 197280 + }, + { + "epoch": 0.7970765644379982, + "grad_norm": 512.1487426757812, + "learning_rate": 5.176407194433981e-06, + "loss": 48.8587, + "step": 197290 + }, + { + "epoch": 0.7971169657033659, + "grad_norm": 783.3118286132812, + "learning_rate": 5.174532696868777e-06, + "loss": 72.3017, + "step": 197300 + }, + { + "epoch": 0.7971573669687335, + "grad_norm": 383.3263854980469, + "learning_rate": 5.172658488333697e-06, + "loss": 62.131, + "step": 197310 + }, + { + "epoch": 0.797197768234101, + "grad_norm": 751.6918334960938, + "learning_rate": 5.1707845688652616e-06, + "loss": 46.5068, + "step": 197320 + }, + { + "epoch": 0.7972381694994687, + "grad_norm": 573.1828002929688, + "learning_rate": 5.168910938500027e-06, + "loss": 47.9096, + "step": 197330 + }, + { + "epoch": 0.7972785707648363, + "grad_norm": 277.38641357421875, + "learning_rate": 5.167037597274503e-06, + "loss": 53.7811, + "step": 197340 + }, + { + "epoch": 0.797318972030204, + "grad_norm": 788.4767456054688, + "learning_rate": 5.165164545225219e-06, + "loss": 57.703, + "step": 197350 + }, + { + "epoch": 0.7973593732955716, + "grad_norm": 649.050048828125, + "learning_rate": 5.163291782388687e-06, + "loss": 111.4628, + "step": 197360 + }, + { + "epoch": 0.7973997745609392, + "grad_norm": 514.818115234375, + "learning_rate": 5.161419308801421e-06, + "loss": 58.7037, + "step": 197370 + }, + { + "epoch": 0.7974401758263069, + "grad_norm": 1656.3890380859375, + "learning_rate": 5.159547124499933e-06, + "loss": 63.1091, + "step": 197380 + }, + { + "epoch": 0.7974805770916745, + "grad_norm": 906.7816162109375, + "learning_rate": 5.157675229520706e-06, + "loss": 70.001, + "step": 197390 + }, + { + "epoch": 0.7975209783570422, + "grad_norm": 323.2098388671875, + "learning_rate": 5.155803623900242e-06, + "loss": 41.4598, + "step": 197400 + }, + { + "epoch": 0.7975613796224098, + "grad_norm": 1298.9307861328125, + "learning_rate": 5.153932307675029e-06, + "loss": 55.083, + "step": 197410 + }, + { + "epoch": 0.7976017808877774, + "grad_norm": 360.3175048828125, + "learning_rate": 5.152061280881551e-06, + "loss": 39.3468, + "step": 197420 + }, + { + "epoch": 0.7976421821531451, + "grad_norm": 480.0019836425781, + "learning_rate": 5.150190543556279e-06, + "loss": 77.6835, + "step": 197430 + }, + { + "epoch": 0.7976825834185126, + "grad_norm": 245.16041564941406, + "learning_rate": 5.148320095735695e-06, + "loss": 44.2249, + "step": 197440 + }, + { + "epoch": 0.7977229846838803, + "grad_norm": 624.9221801757812, + "learning_rate": 5.1464499374562456e-06, + "loss": 43.3324, + "step": 197450 + }, + { + "epoch": 0.7977633859492479, + "grad_norm": 524.8291015625, + "learning_rate": 5.144580068754415e-06, + "loss": 65.3381, + "step": 197460 + }, + { + "epoch": 0.7978037872146155, + "grad_norm": 434.7145690917969, + "learning_rate": 5.142710489666635e-06, + "loss": 57.3129, + "step": 197470 + }, + { + "epoch": 0.7978441884799832, + "grad_norm": 721.4072265625, + "learning_rate": 5.140841200229365e-06, + "loss": 66.1117, + "step": 197480 + }, + { + "epoch": 0.7978845897453508, + "grad_norm": 334.7727355957031, + "learning_rate": 5.138972200479051e-06, + "loss": 48.5401, + "step": 197490 + }, + { + "epoch": 0.7979249910107185, + "grad_norm": 373.9875793457031, + "learning_rate": 5.137103490452113e-06, + "loss": 49.7185, + "step": 197500 + }, + { + "epoch": 0.7979653922760861, + "grad_norm": 331.3859558105469, + "learning_rate": 5.135235070185007e-06, + "loss": 71.5396, + "step": 197510 + }, + { + "epoch": 0.7980057935414537, + "grad_norm": 590.7794189453125, + "learning_rate": 5.13336693971414e-06, + "loss": 96.1508, + "step": 197520 + }, + { + "epoch": 0.7980461948068214, + "grad_norm": 381.1514892578125, + "learning_rate": 5.131499099075938e-06, + "loss": 80.2253, + "step": 197530 + }, + { + "epoch": 0.798086596072189, + "grad_norm": 1206.818603515625, + "learning_rate": 5.1296315483068145e-06, + "loss": 61.0389, + "step": 197540 + }, + { + "epoch": 0.7981269973375567, + "grad_norm": 1136.2713623046875, + "learning_rate": 5.127764287443189e-06, + "loss": 121.3971, + "step": 197550 + }, + { + "epoch": 0.7981673986029243, + "grad_norm": 333.56439208984375, + "learning_rate": 5.1258973165214465e-06, + "loss": 142.6217, + "step": 197560 + }, + { + "epoch": 0.7982077998682918, + "grad_norm": 774.0609130859375, + "learning_rate": 5.124030635577995e-06, + "loss": 56.8396, + "step": 197570 + }, + { + "epoch": 0.7982482011336595, + "grad_norm": 535.3653564453125, + "learning_rate": 5.122164244649224e-06, + "loss": 53.4369, + "step": 197580 + }, + { + "epoch": 0.7982886023990271, + "grad_norm": 406.2797546386719, + "learning_rate": 5.120298143771523e-06, + "loss": 56.4607, + "step": 197590 + }, + { + "epoch": 0.7983290036643947, + "grad_norm": 1183.49560546875, + "learning_rate": 5.118432332981273e-06, + "loss": 52.4784, + "step": 197600 + }, + { + "epoch": 0.7983694049297624, + "grad_norm": 784.4857177734375, + "learning_rate": 5.116566812314836e-06, + "loss": 94.9911, + "step": 197610 + }, + { + "epoch": 0.79840980619513, + "grad_norm": 673.7864990234375, + "learning_rate": 5.114701581808603e-06, + "loss": 100.9854, + "step": 197620 + }, + { + "epoch": 0.7984502074604977, + "grad_norm": 3634.361083984375, + "learning_rate": 5.112836641498922e-06, + "loss": 88.4108, + "step": 197630 + }, + { + "epoch": 0.7984906087258653, + "grad_norm": 900.5426025390625, + "learning_rate": 5.110971991422153e-06, + "loss": 72.1671, + "step": 197640 + }, + { + "epoch": 0.7985310099912329, + "grad_norm": 987.8720703125, + "learning_rate": 5.1091076316146556e-06, + "loss": 65.5826, + "step": 197650 + }, + { + "epoch": 0.7985714112566006, + "grad_norm": 1204.1033935546875, + "learning_rate": 5.107243562112762e-06, + "loss": 45.1077, + "step": 197660 + }, + { + "epoch": 0.7986118125219682, + "grad_norm": 1040.8824462890625, + "learning_rate": 5.105379782952833e-06, + "loss": 78.4434, + "step": 197670 + }, + { + "epoch": 0.7986522137873359, + "grad_norm": 660.4754638671875, + "learning_rate": 5.103516294171187e-06, + "loss": 72.5076, + "step": 197680 + }, + { + "epoch": 0.7986926150527035, + "grad_norm": 478.88336181640625, + "learning_rate": 5.10165309580416e-06, + "loss": 75.761, + "step": 197690 + }, + { + "epoch": 0.798733016318071, + "grad_norm": 782.6514282226562, + "learning_rate": 5.099790187888078e-06, + "loss": 73.7269, + "step": 197700 + }, + { + "epoch": 0.7987734175834387, + "grad_norm": 807.1671752929688, + "learning_rate": 5.097927570459263e-06, + "loss": 55.6674, + "step": 197710 + }, + { + "epoch": 0.7988138188488063, + "grad_norm": 745.4215087890625, + "learning_rate": 5.096065243554013e-06, + "loss": 66.9129, + "step": 197720 + }, + { + "epoch": 0.798854220114174, + "grad_norm": 472.8764953613281, + "learning_rate": 5.094203207208648e-06, + "loss": 88.9958, + "step": 197730 + }, + { + "epoch": 0.7988946213795416, + "grad_norm": 665.2481079101562, + "learning_rate": 5.092341461459463e-06, + "loss": 75.8107, + "step": 197740 + }, + { + "epoch": 0.7989350226449092, + "grad_norm": 801.7098388671875, + "learning_rate": 5.090480006342757e-06, + "loss": 56.2637, + "step": 197750 + }, + { + "epoch": 0.7989754239102769, + "grad_norm": 716.2535400390625, + "learning_rate": 5.088618841894826e-06, + "loss": 68.6319, + "step": 197760 + }, + { + "epoch": 0.7990158251756445, + "grad_norm": 1033.476806640625, + "learning_rate": 5.086757968151934e-06, + "loss": 67.6387, + "step": 197770 + }, + { + "epoch": 0.7990562264410122, + "grad_norm": 477.35919189453125, + "learning_rate": 5.084897385150387e-06, + "loss": 54.9502, + "step": 197780 + }, + { + "epoch": 0.7990966277063798, + "grad_norm": 1210.760498046875, + "learning_rate": 5.083037092926437e-06, + "loss": 116.0934, + "step": 197790 + }, + { + "epoch": 0.7991370289717474, + "grad_norm": 157.07872009277344, + "learning_rate": 5.081177091516359e-06, + "loss": 52.2958, + "step": 197800 + }, + { + "epoch": 0.7991774302371151, + "grad_norm": 805.3746948242188, + "learning_rate": 5.079317380956414e-06, + "loss": 80.4175, + "step": 197810 + }, + { + "epoch": 0.7992178315024827, + "grad_norm": 572.009521484375, + "learning_rate": 5.0774579612828586e-06, + "loss": 67.097, + "step": 197820 + }, + { + "epoch": 0.7992582327678502, + "grad_norm": 567.9599609375, + "learning_rate": 5.0755988325319496e-06, + "loss": 67.0281, + "step": 197830 + }, + { + "epoch": 0.7992986340332179, + "grad_norm": 574.2120971679688, + "learning_rate": 5.0737399947399166e-06, + "loss": 56.035, + "step": 197840 + }, + { + "epoch": 0.7993390352985855, + "grad_norm": 823.6156616210938, + "learning_rate": 5.07188144794301e-06, + "loss": 113.8094, + "step": 197850 + }, + { + "epoch": 0.7993794365639532, + "grad_norm": 443.3690490722656, + "learning_rate": 5.070023192177458e-06, + "loss": 41.0242, + "step": 197860 + }, + { + "epoch": 0.7994198378293208, + "grad_norm": 684.1746826171875, + "learning_rate": 5.068165227479498e-06, + "loss": 80.4849, + "step": 197870 + }, + { + "epoch": 0.7994602390946884, + "grad_norm": 847.6041870117188, + "learning_rate": 5.0663075538853306e-06, + "loss": 56.526, + "step": 197880 + }, + { + "epoch": 0.7995006403600561, + "grad_norm": 621.6456298828125, + "learning_rate": 5.064450171431199e-06, + "loss": 65.3973, + "step": 197890 + }, + { + "epoch": 0.7995410416254237, + "grad_norm": 814.2513427734375, + "learning_rate": 5.062593080153295e-06, + "loss": 81.6166, + "step": 197900 + }, + { + "epoch": 0.7995814428907914, + "grad_norm": 491.21282958984375, + "learning_rate": 5.060736280087828e-06, + "loss": 78.1966, + "step": 197910 + }, + { + "epoch": 0.799621844156159, + "grad_norm": 1217.310546875, + "learning_rate": 5.0588797712710035e-06, + "loss": 110.1732, + "step": 197920 + }, + { + "epoch": 0.7996622454215266, + "grad_norm": 418.4263916015625, + "learning_rate": 5.057023553739e-06, + "loss": 79.1593, + "step": 197930 + }, + { + "epoch": 0.7997026466868943, + "grad_norm": 531.6622314453125, + "learning_rate": 5.055167627528028e-06, + "loss": 57.5455, + "step": 197940 + }, + { + "epoch": 0.7997430479522618, + "grad_norm": 1104.2218017578125, + "learning_rate": 5.053311992674243e-06, + "loss": 69.4859, + "step": 197950 + }, + { + "epoch": 0.7997834492176294, + "grad_norm": 845.2813720703125, + "learning_rate": 5.051456649213851e-06, + "loss": 75.0023, + "step": 197960 + }, + { + "epoch": 0.7998238504829971, + "grad_norm": 6369.02685546875, + "learning_rate": 5.049601597182998e-06, + "loss": 114.2462, + "step": 197970 + }, + { + "epoch": 0.7998642517483647, + "grad_norm": 1262.4097900390625, + "learning_rate": 5.0477468366178614e-06, + "loss": 61.4881, + "step": 197980 + }, + { + "epoch": 0.7999046530137324, + "grad_norm": 927.794677734375, + "learning_rate": 5.0458923675546035e-06, + "loss": 83.2494, + "step": 197990 + }, + { + "epoch": 0.7999450542791, + "grad_norm": 721.2593994140625, + "learning_rate": 5.044038190029366e-06, + "loss": 71.7542, + "step": 198000 + }, + { + "epoch": 0.7999854555444676, + "grad_norm": 781.5968627929688, + "learning_rate": 5.0421843040783035e-06, + "loss": 64.6263, + "step": 198010 + }, + { + "epoch": 0.8000258568098353, + "grad_norm": 517.8507080078125, + "learning_rate": 5.04033070973756e-06, + "loss": 51.012, + "step": 198020 + }, + { + "epoch": 0.8000662580752029, + "grad_norm": 403.0516357421875, + "learning_rate": 5.0384774070432715e-06, + "loss": 83.5187, + "step": 198030 + }, + { + "epoch": 0.8001066593405706, + "grad_norm": 1217.47705078125, + "learning_rate": 5.036624396031568e-06, + "loss": 93.277, + "step": 198040 + }, + { + "epoch": 0.8001470606059382, + "grad_norm": 455.79632568359375, + "learning_rate": 5.034771676738581e-06, + "loss": 48.6318, + "step": 198050 + }, + { + "epoch": 0.8001874618713058, + "grad_norm": 714.5032958984375, + "learning_rate": 5.0329192492004186e-06, + "loss": 73.9955, + "step": 198060 + }, + { + "epoch": 0.8002278631366735, + "grad_norm": 1200.610595703125, + "learning_rate": 5.031067113453203e-06, + "loss": 42.2606, + "step": 198070 + }, + { + "epoch": 0.800268264402041, + "grad_norm": 1191.0537109375, + "learning_rate": 5.029215269533039e-06, + "loss": 85.481, + "step": 198080 + }, + { + "epoch": 0.8003086656674087, + "grad_norm": 547.728759765625, + "learning_rate": 5.027363717476031e-06, + "loss": 56.0167, + "step": 198090 + }, + { + "epoch": 0.8003490669327763, + "grad_norm": 808.0905151367188, + "learning_rate": 5.025512457318282e-06, + "loss": 107.0646, + "step": 198100 + }, + { + "epoch": 0.8003894681981439, + "grad_norm": 624.0579223632812, + "learning_rate": 5.023661489095863e-06, + "loss": 69.5478, + "step": 198110 + }, + { + "epoch": 0.8004298694635116, + "grad_norm": 1607.9798583984375, + "learning_rate": 5.02181081284489e-06, + "loss": 71.6181, + "step": 198120 + }, + { + "epoch": 0.8004702707288792, + "grad_norm": 469.4092102050781, + "learning_rate": 5.019960428601416e-06, + "loss": 43.9724, + "step": 198130 + }, + { + "epoch": 0.8005106719942469, + "grad_norm": 994.5287475585938, + "learning_rate": 5.018110336401527e-06, + "loss": 64.9244, + "step": 198140 + }, + { + "epoch": 0.8005510732596145, + "grad_norm": 599.136474609375, + "learning_rate": 5.016260536281293e-06, + "loss": 69.0754, + "step": 198150 + }, + { + "epoch": 0.8005914745249821, + "grad_norm": 400.5040588378906, + "learning_rate": 5.014411028276775e-06, + "loss": 65.8642, + "step": 198160 + }, + { + "epoch": 0.8006318757903498, + "grad_norm": 691.6903076171875, + "learning_rate": 5.012561812424028e-06, + "loss": 86.122, + "step": 198170 + }, + { + "epoch": 0.8006722770557174, + "grad_norm": 649.5882568359375, + "learning_rate": 5.010712888759102e-06, + "loss": 63.3135, + "step": 198180 + }, + { + "epoch": 0.8007126783210851, + "grad_norm": 447.17840576171875, + "learning_rate": 5.008864257318045e-06, + "loss": 61.4576, + "step": 198190 + }, + { + "epoch": 0.8007530795864527, + "grad_norm": 812.7483520507812, + "learning_rate": 5.007015918136899e-06, + "loss": 61.0129, + "step": 198200 + }, + { + "epoch": 0.8007934808518202, + "grad_norm": 429.31634521484375, + "learning_rate": 5.005167871251702e-06, + "loss": 80.7649, + "step": 198210 + }, + { + "epoch": 0.8008338821171879, + "grad_norm": 812.1944580078125, + "learning_rate": 5.003320116698465e-06, + "loss": 102.7798, + "step": 198220 + }, + { + "epoch": 0.8008742833825555, + "grad_norm": 693.7962646484375, + "learning_rate": 5.001472654513233e-06, + "loss": 69.0636, + "step": 198230 + }, + { + "epoch": 0.8009146846479231, + "grad_norm": 632.196533203125, + "learning_rate": 4.999625484732009e-06, + "loss": 74.9457, + "step": 198240 + }, + { + "epoch": 0.8009550859132908, + "grad_norm": 501.4670715332031, + "learning_rate": 4.997778607390809e-06, + "loss": 55.8872, + "step": 198250 + }, + { + "epoch": 0.8009954871786584, + "grad_norm": 536.1237182617188, + "learning_rate": 4.995932022525644e-06, + "loss": 53.6052, + "step": 198260 + }, + { + "epoch": 0.8010358884440261, + "grad_norm": 623.710205078125, + "learning_rate": 4.9940857301725e-06, + "loss": 109.7616, + "step": 198270 + }, + { + "epoch": 0.8010762897093937, + "grad_norm": 415.929931640625, + "learning_rate": 4.99223973036739e-06, + "loss": 40.8621, + "step": 198280 + }, + { + "epoch": 0.8011166909747613, + "grad_norm": 1154.6942138671875, + "learning_rate": 4.990394023146286e-06, + "loss": 78.5072, + "step": 198290 + }, + { + "epoch": 0.801157092240129, + "grad_norm": 500.2198181152344, + "learning_rate": 4.988548608545178e-06, + "loss": 66.3165, + "step": 198300 + }, + { + "epoch": 0.8011974935054966, + "grad_norm": 1043.51806640625, + "learning_rate": 4.986703486600044e-06, + "loss": 75.0396, + "step": 198310 + }, + { + "epoch": 0.8012378947708643, + "grad_norm": 919.5612182617188, + "learning_rate": 4.984858657346862e-06, + "loss": 47.7736, + "step": 198320 + }, + { + "epoch": 0.8012782960362319, + "grad_norm": 894.4104614257812, + "learning_rate": 4.983014120821583e-06, + "loss": 63.6933, + "step": 198330 + }, + { + "epoch": 0.8013186973015994, + "grad_norm": 768.0238647460938, + "learning_rate": 4.981169877060177e-06, + "loss": 77.159, + "step": 198340 + }, + { + "epoch": 0.8013590985669671, + "grad_norm": 640.6677856445312, + "learning_rate": 4.979325926098595e-06, + "loss": 51.0113, + "step": 198350 + }, + { + "epoch": 0.8013994998323347, + "grad_norm": 629.4816284179688, + "learning_rate": 4.977482267972786e-06, + "loss": 55.4775, + "step": 198360 + }, + { + "epoch": 0.8014399010977024, + "grad_norm": 726.5436401367188, + "learning_rate": 4.9756389027187025e-06, + "loss": 114.2215, + "step": 198370 + }, + { + "epoch": 0.80148030236307, + "grad_norm": 556.4415893554688, + "learning_rate": 4.973795830372261e-06, + "loss": 67.8119, + "step": 198380 + }, + { + "epoch": 0.8015207036284376, + "grad_norm": 559.693603515625, + "learning_rate": 4.971953050969418e-06, + "loss": 82.6068, + "step": 198390 + }, + { + "epoch": 0.8015611048938053, + "grad_norm": 856.8923950195312, + "learning_rate": 4.97011056454608e-06, + "loss": 65.8439, + "step": 198400 + }, + { + "epoch": 0.8016015061591729, + "grad_norm": 516.6926879882812, + "learning_rate": 4.9682683711381766e-06, + "loss": 75.1911, + "step": 198410 + }, + { + "epoch": 0.8016419074245406, + "grad_norm": 511.05682373046875, + "learning_rate": 4.9664264707816175e-06, + "loss": 55.4673, + "step": 198420 + }, + { + "epoch": 0.8016823086899082, + "grad_norm": 623.7053833007812, + "learning_rate": 4.9645848635123166e-06, + "loss": 86.0544, + "step": 198430 + }, + { + "epoch": 0.8017227099552758, + "grad_norm": 823.7802734375, + "learning_rate": 4.96274354936618e-06, + "loss": 58.3917, + "step": 198440 + }, + { + "epoch": 0.8017631112206435, + "grad_norm": 987.7944946289062, + "learning_rate": 4.960902528379092e-06, + "loss": 69.7545, + "step": 198450 + }, + { + "epoch": 0.8018035124860111, + "grad_norm": 626.90673828125, + "learning_rate": 4.959061800586955e-06, + "loss": 97.0137, + "step": 198460 + }, + { + "epoch": 0.8018439137513786, + "grad_norm": 684.9356079101562, + "learning_rate": 4.9572213660256485e-06, + "loss": 46.8627, + "step": 198470 + }, + { + "epoch": 0.8018843150167463, + "grad_norm": 488.16900634765625, + "learning_rate": 4.955381224731058e-06, + "loss": 60.0827, + "step": 198480 + }, + { + "epoch": 0.8019247162821139, + "grad_norm": 912.5643310546875, + "learning_rate": 4.953541376739055e-06, + "loss": 89.2192, + "step": 198490 + }, + { + "epoch": 0.8019651175474816, + "grad_norm": 886.3773803710938, + "learning_rate": 4.951701822085515e-06, + "loss": 79.4323, + "step": 198500 + }, + { + "epoch": 0.8020055188128492, + "grad_norm": 551.8983154296875, + "learning_rate": 4.949862560806291e-06, + "loss": 64.9316, + "step": 198510 + }, + { + "epoch": 0.8020459200782168, + "grad_norm": 884.139892578125, + "learning_rate": 4.9480235929372436e-06, + "loss": 76.7135, + "step": 198520 + }, + { + "epoch": 0.8020863213435845, + "grad_norm": 450.048828125, + "learning_rate": 4.946184918514232e-06, + "loss": 58.3559, + "step": 198530 + }, + { + "epoch": 0.8021267226089521, + "grad_norm": 559.5551147460938, + "learning_rate": 4.944346537573084e-06, + "loss": 51.0474, + "step": 198540 + }, + { + "epoch": 0.8021671238743198, + "grad_norm": 573.5088500976562, + "learning_rate": 4.942508450149661e-06, + "loss": 65.7362, + "step": 198550 + }, + { + "epoch": 0.8022075251396874, + "grad_norm": 706.69677734375, + "learning_rate": 4.940670656279778e-06, + "loss": 79.0134, + "step": 198560 + }, + { + "epoch": 0.802247926405055, + "grad_norm": 757.0601806640625, + "learning_rate": 4.9388331559992856e-06, + "loss": 42.2272, + "step": 198570 + }, + { + "epoch": 0.8022883276704227, + "grad_norm": 582.6026611328125, + "learning_rate": 4.936995949343988e-06, + "loss": 56.0491, + "step": 198580 + }, + { + "epoch": 0.8023287289357902, + "grad_norm": 492.28240966796875, + "learning_rate": 4.93515903634971e-06, + "loss": 75.634, + "step": 198590 + }, + { + "epoch": 0.8023691302011579, + "grad_norm": 1495.7139892578125, + "learning_rate": 4.933322417052269e-06, + "loss": 60.3048, + "step": 198600 + }, + { + "epoch": 0.8024095314665255, + "grad_norm": 491.3946838378906, + "learning_rate": 4.931486091487456e-06, + "loss": 96.8619, + "step": 198610 + }, + { + "epoch": 0.8024499327318931, + "grad_norm": 679.8978881835938, + "learning_rate": 4.929650059691082e-06, + "loss": 74.043, + "step": 198620 + }, + { + "epoch": 0.8024903339972608, + "grad_norm": 383.28924560546875, + "learning_rate": 4.927814321698938e-06, + "loss": 47.0701, + "step": 198630 + }, + { + "epoch": 0.8025307352626284, + "grad_norm": 907.4324340820312, + "learning_rate": 4.925978877546813e-06, + "loss": 63.936, + "step": 198640 + }, + { + "epoch": 0.802571136527996, + "grad_norm": 656.7352294921875, + "learning_rate": 4.924143727270491e-06, + "loss": 55.9313, + "step": 198650 + }, + { + "epoch": 0.8026115377933637, + "grad_norm": 542.0066528320312, + "learning_rate": 4.922308870905754e-06, + "loss": 53.6263, + "step": 198660 + }, + { + "epoch": 0.8026519390587313, + "grad_norm": 772.85498046875, + "learning_rate": 4.920474308488363e-06, + "loss": 102.6879, + "step": 198670 + }, + { + "epoch": 0.802692340324099, + "grad_norm": 807.3645629882812, + "learning_rate": 4.9186400400540875e-06, + "loss": 82.5206, + "step": 198680 + }, + { + "epoch": 0.8027327415894666, + "grad_norm": 717.2640991210938, + "learning_rate": 4.91680606563869e-06, + "loss": 56.9824, + "step": 198690 + }, + { + "epoch": 0.8027731428548343, + "grad_norm": 472.66595458984375, + "learning_rate": 4.914972385277923e-06, + "loss": 71.5883, + "step": 198700 + }, + { + "epoch": 0.8028135441202019, + "grad_norm": 526.8843383789062, + "learning_rate": 4.913138999007543e-06, + "loss": 65.1613, + "step": 198710 + }, + { + "epoch": 0.8028539453855694, + "grad_norm": 1121.8946533203125, + "learning_rate": 4.9113059068632705e-06, + "loss": 107.9162, + "step": 198720 + }, + { + "epoch": 0.8028943466509371, + "grad_norm": 738.1530151367188, + "learning_rate": 4.909473108880873e-06, + "loss": 54.4362, + "step": 198730 + }, + { + "epoch": 0.8029347479163047, + "grad_norm": 744.5302124023438, + "learning_rate": 4.907640605096058e-06, + "loss": 95.2522, + "step": 198740 + }, + { + "epoch": 0.8029751491816723, + "grad_norm": 687.0172119140625, + "learning_rate": 4.90580839554456e-06, + "loss": 60.8925, + "step": 198750 + }, + { + "epoch": 0.80301555044704, + "grad_norm": 755.894775390625, + "learning_rate": 4.9039764802620985e-06, + "loss": 56.8497, + "step": 198760 + }, + { + "epoch": 0.8030559517124076, + "grad_norm": 574.1384887695312, + "learning_rate": 4.902144859284395e-06, + "loss": 85.1289, + "step": 198770 + }, + { + "epoch": 0.8030963529777753, + "grad_norm": 600.3209228515625, + "learning_rate": 4.900313532647141e-06, + "loss": 42.8637, + "step": 198780 + }, + { + "epoch": 0.8031367542431429, + "grad_norm": 549.0084228515625, + "learning_rate": 4.898482500386051e-06, + "loss": 93.1559, + "step": 198790 + }, + { + "epoch": 0.8031771555085105, + "grad_norm": 780.4871826171875, + "learning_rate": 4.896651762536819e-06, + "loss": 75.2438, + "step": 198800 + }, + { + "epoch": 0.8032175567738782, + "grad_norm": 564.8466796875, + "learning_rate": 4.8948213191351365e-06, + "loss": 77.246, + "step": 198810 + }, + { + "epoch": 0.8032579580392458, + "grad_norm": 808.0231323242188, + "learning_rate": 4.892991170216694e-06, + "loss": 65.5534, + "step": 198820 + }, + { + "epoch": 0.8032983593046135, + "grad_norm": 881.0784301757812, + "learning_rate": 4.891161315817157e-06, + "loss": 64.691, + "step": 198830 + }, + { + "epoch": 0.8033387605699811, + "grad_norm": 336.58306884765625, + "learning_rate": 4.889331755972218e-06, + "loss": 101.5202, + "step": 198840 + }, + { + "epoch": 0.8033791618353486, + "grad_norm": 897.4869995117188, + "learning_rate": 4.887502490717531e-06, + "loss": 48.1098, + "step": 198850 + }, + { + "epoch": 0.8034195631007163, + "grad_norm": 1498.43212890625, + "learning_rate": 4.8856735200887624e-06, + "loss": 85.3192, + "step": 198860 + }, + { + "epoch": 0.8034599643660839, + "grad_norm": 951.5518798828125, + "learning_rate": 4.8838448441215765e-06, + "loss": 84.2649, + "step": 198870 + }, + { + "epoch": 0.8035003656314516, + "grad_norm": 1067.383544921875, + "learning_rate": 4.882016462851607e-06, + "loss": 83.2608, + "step": 198880 + }, + { + "epoch": 0.8035407668968192, + "grad_norm": 764.8590698242188, + "learning_rate": 4.880188376314521e-06, + "loss": 53.9923, + "step": 198890 + }, + { + "epoch": 0.8035811681621868, + "grad_norm": 933.7940063476562, + "learning_rate": 4.878360584545941e-06, + "loss": 60.8101, + "step": 198900 + }, + { + "epoch": 0.8036215694275545, + "grad_norm": 474.60223388671875, + "learning_rate": 4.876533087581505e-06, + "loss": 58.3732, + "step": 198910 + }, + { + "epoch": 0.8036619706929221, + "grad_norm": 1086.766845703125, + "learning_rate": 4.874705885456843e-06, + "loss": 62.7145, + "step": 198920 + }, + { + "epoch": 0.8037023719582898, + "grad_norm": 2695.24658203125, + "learning_rate": 4.872878978207578e-06, + "loss": 85.4635, + "step": 198930 + }, + { + "epoch": 0.8037427732236574, + "grad_norm": 401.3848571777344, + "learning_rate": 4.871052365869331e-06, + "loss": 65.9007, + "step": 198940 + }, + { + "epoch": 0.803783174489025, + "grad_norm": 1086.5887451171875, + "learning_rate": 4.8692260484777e-06, + "loss": 119.9782, + "step": 198950 + }, + { + "epoch": 0.8038235757543927, + "grad_norm": 520.950927734375, + "learning_rate": 4.867400026068299e-06, + "loss": 66.33, + "step": 198960 + }, + { + "epoch": 0.8038639770197603, + "grad_norm": 1122.876708984375, + "learning_rate": 4.865574298676723e-06, + "loss": 78.789, + "step": 198970 + }, + { + "epoch": 0.8039043782851278, + "grad_norm": 449.2276611328125, + "learning_rate": 4.863748866338576e-06, + "loss": 63.4738, + "step": 198980 + }, + { + "epoch": 0.8039447795504955, + "grad_norm": 525.1778564453125, + "learning_rate": 4.861923729089424e-06, + "loss": 59.7925, + "step": 198990 + }, + { + "epoch": 0.8039851808158631, + "grad_norm": 815.0696411132812, + "learning_rate": 4.8600988869648745e-06, + "loss": 54.1217, + "step": 199000 + }, + { + "epoch": 0.8040255820812308, + "grad_norm": 692.9642333984375, + "learning_rate": 4.8582743400004864e-06, + "loss": 57.0401, + "step": 199010 + }, + { + "epoch": 0.8040659833465984, + "grad_norm": 621.1055908203125, + "learning_rate": 4.856450088231834e-06, + "loss": 46.7863, + "step": 199020 + }, + { + "epoch": 0.804106384611966, + "grad_norm": 471.2278137207031, + "learning_rate": 4.854626131694487e-06, + "loss": 59.9537, + "step": 199030 + }, + { + "epoch": 0.8041467858773337, + "grad_norm": 561.0694580078125, + "learning_rate": 4.852802470423999e-06, + "loss": 75.143, + "step": 199040 + }, + { + "epoch": 0.8041871871427013, + "grad_norm": 752.849365234375, + "learning_rate": 4.8509791044559284e-06, + "loss": 75.829, + "step": 199050 + }, + { + "epoch": 0.804227588408069, + "grad_norm": 378.3296813964844, + "learning_rate": 4.849156033825817e-06, + "loss": 71.9758, + "step": 199060 + }, + { + "epoch": 0.8042679896734366, + "grad_norm": 629.9951782226562, + "learning_rate": 4.8473332585692065e-06, + "loss": 59.7217, + "step": 199070 + }, + { + "epoch": 0.8043083909388042, + "grad_norm": 725.7848510742188, + "learning_rate": 4.8455107787216386e-06, + "loss": 121.5695, + "step": 199080 + }, + { + "epoch": 0.8043487922041719, + "grad_norm": 504.1531982421875, + "learning_rate": 4.843688594318637e-06, + "loss": 71.9199, + "step": 199090 + }, + { + "epoch": 0.8043891934695395, + "grad_norm": 419.4039001464844, + "learning_rate": 4.84186670539573e-06, + "loss": 52.9678, + "step": 199100 + }, + { + "epoch": 0.804429594734907, + "grad_norm": 462.0122985839844, + "learning_rate": 4.840045111988443e-06, + "loss": 57.683, + "step": 199110 + }, + { + "epoch": 0.8044699960002747, + "grad_norm": 418.5985412597656, + "learning_rate": 4.8382238141322726e-06, + "loss": 39.3575, + "step": 199120 + }, + { + "epoch": 0.8045103972656423, + "grad_norm": 464.0343322753906, + "learning_rate": 4.836402811862737e-06, + "loss": 40.7829, + "step": 199130 + }, + { + "epoch": 0.80455079853101, + "grad_norm": 531.5284423828125, + "learning_rate": 4.8345821052153395e-06, + "loss": 50.046, + "step": 199140 + }, + { + "epoch": 0.8045911997963776, + "grad_norm": 1320.6920166015625, + "learning_rate": 4.832761694225563e-06, + "loss": 83.9558, + "step": 199150 + }, + { + "epoch": 0.8046316010617452, + "grad_norm": 1132.6134033203125, + "learning_rate": 4.8309415789289135e-06, + "loss": 100.5547, + "step": 199160 + }, + { + "epoch": 0.8046720023271129, + "grad_norm": 396.85675048828125, + "learning_rate": 4.82912175936086e-06, + "loss": 56.1378, + "step": 199170 + }, + { + "epoch": 0.8047124035924805, + "grad_norm": 781.77490234375, + "learning_rate": 4.827302235556896e-06, + "loss": 89.0556, + "step": 199180 + }, + { + "epoch": 0.8047528048578482, + "grad_norm": 1244.638671875, + "learning_rate": 4.825483007552483e-06, + "loss": 63.5662, + "step": 199190 + }, + { + "epoch": 0.8047932061232158, + "grad_norm": 1030.5054931640625, + "learning_rate": 4.82366407538309e-06, + "loss": 64.5347, + "step": 199200 + }, + { + "epoch": 0.8048336073885834, + "grad_norm": 846.8048095703125, + "learning_rate": 4.821845439084185e-06, + "loss": 78.0727, + "step": 199210 + }, + { + "epoch": 0.8048740086539511, + "grad_norm": 733.8461303710938, + "learning_rate": 4.820027098691213e-06, + "loss": 48.8141, + "step": 199220 + }, + { + "epoch": 0.8049144099193186, + "grad_norm": 861.7489624023438, + "learning_rate": 4.818209054239626e-06, + "loss": 70.7907, + "step": 199230 + }, + { + "epoch": 0.8049548111846863, + "grad_norm": 658.82080078125, + "learning_rate": 4.816391305764872e-06, + "loss": 57.1509, + "step": 199240 + }, + { + "epoch": 0.8049952124500539, + "grad_norm": 552.770751953125, + "learning_rate": 4.814573853302384e-06, + "loss": 59.226, + "step": 199250 + }, + { + "epoch": 0.8050356137154215, + "grad_norm": 570.5429077148438, + "learning_rate": 4.812756696887597e-06, + "loss": 70.5084, + "step": 199260 + }, + { + "epoch": 0.8050760149807892, + "grad_norm": 338.0552978515625, + "learning_rate": 4.810939836555943e-06, + "loss": 54.5322, + "step": 199270 + }, + { + "epoch": 0.8051164162461568, + "grad_norm": 663.9932861328125, + "learning_rate": 4.809123272342832e-06, + "loss": 57.2844, + "step": 199280 + }, + { + "epoch": 0.8051568175115245, + "grad_norm": 429.7908020019531, + "learning_rate": 4.807307004283683e-06, + "loss": 80.2578, + "step": 199290 + }, + { + "epoch": 0.8051972187768921, + "grad_norm": 373.9275207519531, + "learning_rate": 4.805491032413913e-06, + "loss": 52.7411, + "step": 199300 + }, + { + "epoch": 0.8052376200422597, + "grad_norm": 828.2244262695312, + "learning_rate": 4.8036753567689046e-06, + "loss": 55.6494, + "step": 199310 + }, + { + "epoch": 0.8052780213076274, + "grad_norm": 572.9473876953125, + "learning_rate": 4.80185997738408e-06, + "loss": 83.1284, + "step": 199320 + }, + { + "epoch": 0.805318422572995, + "grad_norm": 1036.3521728515625, + "learning_rate": 4.800044894294808e-06, + "loss": 73.9801, + "step": 199330 + }, + { + "epoch": 0.8053588238383627, + "grad_norm": 768.1046142578125, + "learning_rate": 4.7982301075365015e-06, + "loss": 77.4632, + "step": 199340 + }, + { + "epoch": 0.8053992251037303, + "grad_norm": 1277.91552734375, + "learning_rate": 4.796415617144516e-06, + "loss": 76.9297, + "step": 199350 + }, + { + "epoch": 0.8054396263690978, + "grad_norm": 1029.5404052734375, + "learning_rate": 4.794601423154235e-06, + "loss": 88.4859, + "step": 199360 + }, + { + "epoch": 0.8054800276344655, + "grad_norm": 822.1131591796875, + "learning_rate": 4.7927875256010305e-06, + "loss": 67.4397, + "step": 199370 + }, + { + "epoch": 0.8055204288998331, + "grad_norm": 743.3072509765625, + "learning_rate": 4.790973924520266e-06, + "loss": 70.5505, + "step": 199380 + }, + { + "epoch": 0.8055608301652007, + "grad_norm": 511.23504638671875, + "learning_rate": 4.789160619947289e-06, + "loss": 45.9361, + "step": 199390 + }, + { + "epoch": 0.8056012314305684, + "grad_norm": 309.4663391113281, + "learning_rate": 4.787347611917457e-06, + "loss": 79.9471, + "step": 199400 + }, + { + "epoch": 0.805641632695936, + "grad_norm": 790.1514892578125, + "learning_rate": 4.785534900466116e-06, + "loss": 85.688, + "step": 199410 + }, + { + "epoch": 0.8056820339613037, + "grad_norm": 998.5556640625, + "learning_rate": 4.783722485628603e-06, + "loss": 112.5549, + "step": 199420 + }, + { + "epoch": 0.8057224352266713, + "grad_norm": 1271.2747802734375, + "learning_rate": 4.781910367440263e-06, + "loss": 77.943, + "step": 199430 + }, + { + "epoch": 0.805762836492039, + "grad_norm": 921.263916015625, + "learning_rate": 4.780098545936402e-06, + "loss": 87.2626, + "step": 199440 + }, + { + "epoch": 0.8058032377574066, + "grad_norm": 729.3549194335938, + "learning_rate": 4.7782870211523676e-06, + "loss": 38.3069, + "step": 199450 + }, + { + "epoch": 0.8058436390227742, + "grad_norm": 656.723388671875, + "learning_rate": 4.776475793123458e-06, + "loss": 71.5001, + "step": 199460 + }, + { + "epoch": 0.8058840402881419, + "grad_norm": 467.0643310546875, + "learning_rate": 4.77466486188499e-06, + "loss": 50.8688, + "step": 199470 + }, + { + "epoch": 0.8059244415535095, + "grad_norm": 830.067138671875, + "learning_rate": 4.7728542274722765e-06, + "loss": 88.713, + "step": 199480 + }, + { + "epoch": 0.805964842818877, + "grad_norm": 502.71209716796875, + "learning_rate": 4.771043889920597e-06, + "loss": 79.624, + "step": 199490 + }, + { + "epoch": 0.8060052440842447, + "grad_norm": 755.7929077148438, + "learning_rate": 4.769233849265269e-06, + "loss": 62.8511, + "step": 199500 + }, + { + "epoch": 0.8060456453496123, + "grad_norm": 1023.5194702148438, + "learning_rate": 4.767424105541561e-06, + "loss": 66.5314, + "step": 199510 + }, + { + "epoch": 0.80608604661498, + "grad_norm": 999.2639770507812, + "learning_rate": 4.765614658784763e-06, + "loss": 59.7458, + "step": 199520 + }, + { + "epoch": 0.8061264478803476, + "grad_norm": 733.2042236328125, + "learning_rate": 4.763805509030153e-06, + "loss": 43.8388, + "step": 199530 + }, + { + "epoch": 0.8061668491457152, + "grad_norm": 346.2459716796875, + "learning_rate": 4.761996656312995e-06, + "loss": 51.6527, + "step": 199540 + }, + { + "epoch": 0.8062072504110829, + "grad_norm": 1068.8480224609375, + "learning_rate": 4.760188100668566e-06, + "loss": 70.0606, + "step": 199550 + }, + { + "epoch": 0.8062476516764505, + "grad_norm": 673.0106811523438, + "learning_rate": 4.758379842132108e-06, + "loss": 50.4069, + "step": 199560 + }, + { + "epoch": 0.8062880529418182, + "grad_norm": 382.36798095703125, + "learning_rate": 4.756571880738883e-06, + "loss": 47.0824, + "step": 199570 + }, + { + "epoch": 0.8063284542071858, + "grad_norm": 917.4437866210938, + "learning_rate": 4.754764216524137e-06, + "loss": 71.4182, + "step": 199580 + }, + { + "epoch": 0.8063688554725534, + "grad_norm": 2003.8033447265625, + "learning_rate": 4.752956849523116e-06, + "loss": 100.5967, + "step": 199590 + }, + { + "epoch": 0.8064092567379211, + "grad_norm": 550.6415405273438, + "learning_rate": 4.75114977977104e-06, + "loss": 57.5534, + "step": 199600 + }, + { + "epoch": 0.8064496580032887, + "grad_norm": 334.86798095703125, + "learning_rate": 4.749343007303162e-06, + "loss": 36.4116, + "step": 199610 + }, + { + "epoch": 0.8064900592686562, + "grad_norm": 404.8976745605469, + "learning_rate": 4.747536532154688e-06, + "loss": 39.6613, + "step": 199620 + }, + { + "epoch": 0.8065304605340239, + "grad_norm": 664.2545776367188, + "learning_rate": 4.74573035436084e-06, + "loss": 58.4795, + "step": 199630 + }, + { + "epoch": 0.8065708617993915, + "grad_norm": 516.3895263671875, + "learning_rate": 4.74392447395684e-06, + "loss": 37.5789, + "step": 199640 + }, + { + "epoch": 0.8066112630647592, + "grad_norm": 825.1966552734375, + "learning_rate": 4.742118890977876e-06, + "loss": 74.329, + "step": 199650 + }, + { + "epoch": 0.8066516643301268, + "grad_norm": 167.0436248779297, + "learning_rate": 4.740313605459168e-06, + "loss": 52.8836, + "step": 199660 + }, + { + "epoch": 0.8066920655954944, + "grad_norm": 562.9675903320312, + "learning_rate": 4.738508617435897e-06, + "loss": 56.8856, + "step": 199670 + }, + { + "epoch": 0.8067324668608621, + "grad_norm": 1003.69677734375, + "learning_rate": 4.73670392694326e-06, + "loss": 72.9248, + "step": 199680 + }, + { + "epoch": 0.8067728681262297, + "grad_norm": 575.2083129882812, + "learning_rate": 4.734899534016437e-06, + "loss": 77.377, + "step": 199690 + }, + { + "epoch": 0.8068132693915974, + "grad_norm": 523.8365478515625, + "learning_rate": 4.733095438690607e-06, + "loss": 68.7379, + "step": 199700 + }, + { + "epoch": 0.806853670656965, + "grad_norm": 490.980224609375, + "learning_rate": 4.731291641000946e-06, + "loss": 93.7263, + "step": 199710 + }, + { + "epoch": 0.8068940719223326, + "grad_norm": 1277.6531982421875, + "learning_rate": 4.72948814098261e-06, + "loss": 63.651, + "step": 199720 + }, + { + "epoch": 0.8069344731877003, + "grad_norm": 786.5264282226562, + "learning_rate": 4.727684938670764e-06, + "loss": 38.3091, + "step": 199730 + }, + { + "epoch": 0.8069748744530679, + "grad_norm": 701.703369140625, + "learning_rate": 4.725882034100564e-06, + "loss": 73.2781, + "step": 199740 + }, + { + "epoch": 0.8070152757184355, + "grad_norm": 486.3127136230469, + "learning_rate": 4.724079427307162e-06, + "loss": 70.2856, + "step": 199750 + }, + { + "epoch": 0.8070556769838031, + "grad_norm": 2219.1591796875, + "learning_rate": 4.722277118325684e-06, + "loss": 79.3558, + "step": 199760 + }, + { + "epoch": 0.8070960782491707, + "grad_norm": 801.4882202148438, + "learning_rate": 4.7204751071912935e-06, + "loss": 83.7438, + "step": 199770 + }, + { + "epoch": 0.8071364795145384, + "grad_norm": 1393.8243408203125, + "learning_rate": 4.7186733939390925e-06, + "loss": 59.3065, + "step": 199780 + }, + { + "epoch": 0.807176880779906, + "grad_norm": 579.2114868164062, + "learning_rate": 4.716871978604236e-06, + "loss": 57.2042, + "step": 199790 + }, + { + "epoch": 0.8072172820452737, + "grad_norm": 906.3067626953125, + "learning_rate": 4.71507086122182e-06, + "loss": 72.8185, + "step": 199800 + }, + { + "epoch": 0.8072576833106413, + "grad_norm": 909.1152954101562, + "learning_rate": 4.713270041826967e-06, + "loss": 92.3732, + "step": 199810 + }, + { + "epoch": 0.8072980845760089, + "grad_norm": 665.7765502929688, + "learning_rate": 4.711469520454792e-06, + "loss": 60.077, + "step": 199820 + }, + { + "epoch": 0.8073384858413766, + "grad_norm": 867.6395874023438, + "learning_rate": 4.709669297140382e-06, + "loss": 71.299, + "step": 199830 + }, + { + "epoch": 0.8073788871067442, + "grad_norm": 524.0, + "learning_rate": 4.707869371918843e-06, + "loss": 95.5578, + "step": 199840 + }, + { + "epoch": 0.8074192883721119, + "grad_norm": 533.2967529296875, + "learning_rate": 4.706069744825261e-06, + "loss": 41.0587, + "step": 199850 + }, + { + "epoch": 0.8074596896374795, + "grad_norm": 599.3479614257812, + "learning_rate": 4.704270415894725e-06, + "loss": 70.4663, + "step": 199860 + }, + { + "epoch": 0.807500090902847, + "grad_norm": 854.325927734375, + "learning_rate": 4.702471385162313e-06, + "loss": 83.3096, + "step": 199870 + }, + { + "epoch": 0.8075404921682147, + "grad_norm": 567.803955078125, + "learning_rate": 4.700672652663101e-06, + "loss": 67.2941, + "step": 199880 + }, + { + "epoch": 0.8075808934335823, + "grad_norm": 769.7466430664062, + "learning_rate": 4.698874218432148e-06, + "loss": 70.4691, + "step": 199890 + }, + { + "epoch": 0.8076212946989499, + "grad_norm": 483.1881408691406, + "learning_rate": 4.697076082504517e-06, + "loss": 55.329, + "step": 199900 + }, + { + "epoch": 0.8076616959643176, + "grad_norm": 573.3446044921875, + "learning_rate": 4.695278244915276e-06, + "loss": 67.3878, + "step": 199910 + }, + { + "epoch": 0.8077020972296852, + "grad_norm": 799.0599365234375, + "learning_rate": 4.693480705699449e-06, + "loss": 74.2926, + "step": 199920 + }, + { + "epoch": 0.8077424984950529, + "grad_norm": 578.8666381835938, + "learning_rate": 4.691683464892109e-06, + "loss": 52.3457, + "step": 199930 + }, + { + "epoch": 0.8077828997604205, + "grad_norm": 584.9608764648438, + "learning_rate": 4.6898865225282705e-06, + "loss": 68.2441, + "step": 199940 + }, + { + "epoch": 0.8078233010257881, + "grad_norm": 408.0672607421875, + "learning_rate": 4.6880898786429895e-06, + "loss": 56.7599, + "step": 199950 + }, + { + "epoch": 0.8078637022911558, + "grad_norm": 1044.000732421875, + "learning_rate": 4.686293533271271e-06, + "loss": 75.0377, + "step": 199960 + }, + { + "epoch": 0.8079041035565234, + "grad_norm": 3629.451904296875, + "learning_rate": 4.684497486448145e-06, + "loss": 111.0915, + "step": 199970 + }, + { + "epoch": 0.8079445048218911, + "grad_norm": 647.61572265625, + "learning_rate": 4.682701738208633e-06, + "loss": 67.076, + "step": 199980 + }, + { + "epoch": 0.8079849060872587, + "grad_norm": 688.9310913085938, + "learning_rate": 4.680906288587721e-06, + "loss": 50.9049, + "step": 199990 + }, + { + "epoch": 0.8080253073526262, + "grad_norm": 529.5390625, + "learning_rate": 4.679111137620442e-06, + "loss": 94.4693, + "step": 200000 + }, + { + "epoch": 0.8080657086179939, + "grad_norm": 429.6932373046875, + "learning_rate": 4.677316285341773e-06, + "loss": 72.5782, + "step": 200010 + }, + { + "epoch": 0.8081061098833615, + "grad_norm": 611.6761474609375, + "learning_rate": 4.675521731786712e-06, + "loss": 36.2639, + "step": 200020 + }, + { + "epoch": 0.8081465111487292, + "grad_norm": 414.89666748046875, + "learning_rate": 4.673727476990244e-06, + "loss": 72.0258, + "step": 200030 + }, + { + "epoch": 0.8081869124140968, + "grad_norm": 595.6712036132812, + "learning_rate": 4.671933520987355e-06, + "loss": 80.6571, + "step": 200040 + }, + { + "epoch": 0.8082273136794644, + "grad_norm": 147.6326141357422, + "learning_rate": 4.670139863813006e-06, + "loss": 78.3501, + "step": 200050 + }, + { + "epoch": 0.8082677149448321, + "grad_norm": 824.07421875, + "learning_rate": 4.668346505502175e-06, + "loss": 94.6885, + "step": 200060 + }, + { + "epoch": 0.8083081162101997, + "grad_norm": 649.5587768554688, + "learning_rate": 4.666553446089821e-06, + "loss": 77.2411, + "step": 200070 + }, + { + "epoch": 0.8083485174755674, + "grad_norm": 498.7693176269531, + "learning_rate": 4.664760685610903e-06, + "loss": 42.757, + "step": 200080 + }, + { + "epoch": 0.808388918740935, + "grad_norm": 325.1972961425781, + "learning_rate": 4.662968224100375e-06, + "loss": 81.4069, + "step": 200090 + }, + { + "epoch": 0.8084293200063026, + "grad_norm": 589.6842041015625, + "learning_rate": 4.661176061593167e-06, + "loss": 48.1832, + "step": 200100 + }, + { + "epoch": 0.8084697212716703, + "grad_norm": 567.0579223632812, + "learning_rate": 4.65938419812424e-06, + "loss": 72.5399, + "step": 200110 + }, + { + "epoch": 0.8085101225370379, + "grad_norm": 564.2578125, + "learning_rate": 4.657592633728509e-06, + "loss": 68.3455, + "step": 200120 + }, + { + "epoch": 0.8085505238024054, + "grad_norm": 750.1382446289062, + "learning_rate": 4.655801368440909e-06, + "loss": 76.5158, + "step": 200130 + }, + { + "epoch": 0.8085909250677731, + "grad_norm": 616.2826538085938, + "learning_rate": 4.654010402296361e-06, + "loss": 47.7551, + "step": 200140 + }, + { + "epoch": 0.8086313263331407, + "grad_norm": 1087.7950439453125, + "learning_rate": 4.6522197353297815e-06, + "loss": 62.0734, + "step": 200150 + }, + { + "epoch": 0.8086717275985084, + "grad_norm": 1153.560546875, + "learning_rate": 4.650429367576086e-06, + "loss": 72.5238, + "step": 200160 + }, + { + "epoch": 0.808712128863876, + "grad_norm": 716.3167724609375, + "learning_rate": 4.648639299070166e-06, + "loss": 70.1364, + "step": 200170 + }, + { + "epoch": 0.8087525301292436, + "grad_norm": 785.3956909179688, + "learning_rate": 4.646849529846926e-06, + "loss": 84.1483, + "step": 200180 + }, + { + "epoch": 0.8087929313946113, + "grad_norm": 543.43701171875, + "learning_rate": 4.645060059941258e-06, + "loss": 85.4511, + "step": 200190 + }, + { + "epoch": 0.8088333326599789, + "grad_norm": 544.4234619140625, + "learning_rate": 4.643270889388056e-06, + "loss": 48.1448, + "step": 200200 + }, + { + "epoch": 0.8088737339253466, + "grad_norm": 663.5798950195312, + "learning_rate": 4.641482018222183e-06, + "loss": 57.9586, + "step": 200210 + }, + { + "epoch": 0.8089141351907142, + "grad_norm": 978.5228271484375, + "learning_rate": 4.639693446478537e-06, + "loss": 56.9114, + "step": 200220 + }, + { + "epoch": 0.8089545364560818, + "grad_norm": 774.2407836914062, + "learning_rate": 4.637905174191968e-06, + "loss": 54.8549, + "step": 200230 + }, + { + "epoch": 0.8089949377214495, + "grad_norm": 714.7657470703125, + "learning_rate": 4.636117201397348e-06, + "loss": 72.9835, + "step": 200240 + }, + { + "epoch": 0.8090353389868171, + "grad_norm": 511.72918701171875, + "learning_rate": 4.634329528129538e-06, + "loss": 40.1841, + "step": 200250 + }, + { + "epoch": 0.8090757402521846, + "grad_norm": 391.35113525390625, + "learning_rate": 4.632542154423374e-06, + "loss": 45.6502, + "step": 200260 + }, + { + "epoch": 0.8091161415175523, + "grad_norm": 519.5234985351562, + "learning_rate": 4.630755080313723e-06, + "loss": 89.957, + "step": 200270 + }, + { + "epoch": 0.8091565427829199, + "grad_norm": 413.6555480957031, + "learning_rate": 4.628968305835409e-06, + "loss": 77.4968, + "step": 200280 + }, + { + "epoch": 0.8091969440482876, + "grad_norm": 536.6680908203125, + "learning_rate": 4.627181831023271e-06, + "loss": 38.7027, + "step": 200290 + }, + { + "epoch": 0.8092373453136552, + "grad_norm": 794.5275268554688, + "learning_rate": 4.625395655912137e-06, + "loss": 74.656, + "step": 200300 + }, + { + "epoch": 0.8092777465790228, + "grad_norm": 1503.9759521484375, + "learning_rate": 4.623609780536833e-06, + "loss": 102.2086, + "step": 200310 + }, + { + "epoch": 0.8093181478443905, + "grad_norm": 487.6082763671875, + "learning_rate": 4.621824204932175e-06, + "loss": 65.9617, + "step": 200320 + }, + { + "epoch": 0.8093585491097581, + "grad_norm": 590.9786376953125, + "learning_rate": 4.620038929132968e-06, + "loss": 65.509, + "step": 200330 + }, + { + "epoch": 0.8093989503751258, + "grad_norm": 594.8910522460938, + "learning_rate": 4.618253953174019e-06, + "loss": 94.0405, + "step": 200340 + }, + { + "epoch": 0.8094393516404934, + "grad_norm": 639.8983764648438, + "learning_rate": 4.616469277090128e-06, + "loss": 51.9678, + "step": 200350 + }, + { + "epoch": 0.809479752905861, + "grad_norm": 662.3313598632812, + "learning_rate": 4.614684900916096e-06, + "loss": 60.7116, + "step": 200360 + }, + { + "epoch": 0.8095201541712287, + "grad_norm": 1355.767822265625, + "learning_rate": 4.612900824686688e-06, + "loss": 83.1092, + "step": 200370 + }, + { + "epoch": 0.8095605554365963, + "grad_norm": 567.7518920898438, + "learning_rate": 4.611117048436715e-06, + "loss": 58.2193, + "step": 200380 + }, + { + "epoch": 0.8096009567019639, + "grad_norm": 784.1131591796875, + "learning_rate": 4.609333572200929e-06, + "loss": 87.4168, + "step": 200390 + }, + { + "epoch": 0.8096413579673315, + "grad_norm": 631.615966796875, + "learning_rate": 4.607550396014111e-06, + "loss": 62.9475, + "step": 200400 + }, + { + "epoch": 0.8096817592326991, + "grad_norm": 1186.810791015625, + "learning_rate": 4.605767519911022e-06, + "loss": 84.4435, + "step": 200410 + }, + { + "epoch": 0.8097221604980668, + "grad_norm": 238.95089721679688, + "learning_rate": 4.603984943926421e-06, + "loss": 72.6532, + "step": 200420 + }, + { + "epoch": 0.8097625617634344, + "grad_norm": 735.4130859375, + "learning_rate": 4.602202668095066e-06, + "loss": 60.8148, + "step": 200430 + }, + { + "epoch": 0.8098029630288021, + "grad_norm": 494.9907531738281, + "learning_rate": 4.600420692451691e-06, + "loss": 47.0831, + "step": 200440 + }, + { + "epoch": 0.8098433642941697, + "grad_norm": 365.2383728027344, + "learning_rate": 4.598639017031041e-06, + "loss": 37.0835, + "step": 200450 + }, + { + "epoch": 0.8098837655595373, + "grad_norm": 1546.6051025390625, + "learning_rate": 4.5968576418678555e-06, + "loss": 69.0256, + "step": 200460 + }, + { + "epoch": 0.809924166824905, + "grad_norm": 626.8446044921875, + "learning_rate": 4.595076566996859e-06, + "loss": 72.6378, + "step": 200470 + }, + { + "epoch": 0.8099645680902726, + "grad_norm": 755.864013671875, + "learning_rate": 4.593295792452777e-06, + "loss": 58.0876, + "step": 200480 + }, + { + "epoch": 0.8100049693556403, + "grad_norm": 686.9541015625, + "learning_rate": 4.59151531827033e-06, + "loss": 62.4934, + "step": 200490 + }, + { + "epoch": 0.8100453706210079, + "grad_norm": 847.0406494140625, + "learning_rate": 4.589735144484217e-06, + "loss": 79.7292, + "step": 200500 + }, + { + "epoch": 0.8100857718863754, + "grad_norm": 772.0776977539062, + "learning_rate": 4.587955271129154e-06, + "loss": 36.6553, + "step": 200510 + }, + { + "epoch": 0.8101261731517431, + "grad_norm": 784.9780883789062, + "learning_rate": 4.5861756982398365e-06, + "loss": 65.1115, + "step": 200520 + }, + { + "epoch": 0.8101665744171107, + "grad_norm": 477.48895263671875, + "learning_rate": 4.584396425850961e-06, + "loss": 67.9584, + "step": 200530 + }, + { + "epoch": 0.8102069756824783, + "grad_norm": 897.7119750976562, + "learning_rate": 4.582617453997218e-06, + "loss": 68.2756, + "step": 200540 + }, + { + "epoch": 0.810247376947846, + "grad_norm": 370.1058044433594, + "learning_rate": 4.580838782713273e-06, + "loss": 49.2489, + "step": 200550 + }, + { + "epoch": 0.8102877782132136, + "grad_norm": 357.29205322265625, + "learning_rate": 4.579060412033827e-06, + "loss": 53.0277, + "step": 200560 + }, + { + "epoch": 0.8103281794785813, + "grad_norm": 448.4416809082031, + "learning_rate": 4.577282341993529e-06, + "loss": 65.2503, + "step": 200570 + }, + { + "epoch": 0.8103685807439489, + "grad_norm": 1326.5113525390625, + "learning_rate": 4.5755045726270565e-06, + "loss": 115.3666, + "step": 200580 + }, + { + "epoch": 0.8104089820093165, + "grad_norm": 438.8672790527344, + "learning_rate": 4.5737271039690655e-06, + "loss": 44.4007, + "step": 200590 + }, + { + "epoch": 0.8104493832746842, + "grad_norm": 326.67181396484375, + "learning_rate": 4.571949936054197e-06, + "loss": 60.3335, + "step": 200600 + }, + { + "epoch": 0.8104897845400518, + "grad_norm": 605.2396240234375, + "learning_rate": 4.570173068917119e-06, + "loss": 75.7388, + "step": 200610 + }, + { + "epoch": 0.8105301858054195, + "grad_norm": 1056.2252197265625, + "learning_rate": 4.568396502592453e-06, + "loss": 85.6213, + "step": 200620 + }, + { + "epoch": 0.8105705870707871, + "grad_norm": 1038.5301513671875, + "learning_rate": 4.5666202371148425e-06, + "loss": 106.559, + "step": 200630 + }, + { + "epoch": 0.8106109883361546, + "grad_norm": 469.1705322265625, + "learning_rate": 4.564844272518918e-06, + "loss": 39.2135, + "step": 200640 + }, + { + "epoch": 0.8106513896015223, + "grad_norm": 840.1749267578125, + "learning_rate": 4.563068608839305e-06, + "loss": 82.2124, + "step": 200650 + }, + { + "epoch": 0.8106917908668899, + "grad_norm": 604.8621215820312, + "learning_rate": 4.561293246110612e-06, + "loss": 118.1011, + "step": 200660 + }, + { + "epoch": 0.8107321921322576, + "grad_norm": 657.6017456054688, + "learning_rate": 4.559518184367455e-06, + "loss": 59.9515, + "step": 200670 + }, + { + "epoch": 0.8107725933976252, + "grad_norm": 799.4829711914062, + "learning_rate": 4.5577434236444405e-06, + "loss": 65.1468, + "step": 200680 + }, + { + "epoch": 0.8108129946629928, + "grad_norm": 552.7711791992188, + "learning_rate": 4.555968963976169e-06, + "loss": 48.8286, + "step": 200690 + }, + { + "epoch": 0.8108533959283605, + "grad_norm": 451.34320068359375, + "learning_rate": 4.554194805397238e-06, + "loss": 50.3721, + "step": 200700 + }, + { + "epoch": 0.8108937971937281, + "grad_norm": 908.29443359375, + "learning_rate": 4.55242094794222e-06, + "loss": 79.8299, + "step": 200710 + }, + { + "epoch": 0.8109341984590958, + "grad_norm": 674.8798828125, + "learning_rate": 4.550647391645722e-06, + "loss": 78.4844, + "step": 200720 + }, + { + "epoch": 0.8109745997244634, + "grad_norm": 631.2515869140625, + "learning_rate": 4.548874136542298e-06, + "loss": 58.4276, + "step": 200730 + }, + { + "epoch": 0.811015000989831, + "grad_norm": 1653.769287109375, + "learning_rate": 4.547101182666529e-06, + "loss": 66.6285, + "step": 200740 + }, + { + "epoch": 0.8110554022551987, + "grad_norm": 830.3623657226562, + "learning_rate": 4.54532853005298e-06, + "loss": 73.1676, + "step": 200750 + }, + { + "epoch": 0.8110958035205663, + "grad_norm": 439.56689453125, + "learning_rate": 4.5435561787362055e-06, + "loss": 61.8836, + "step": 200760 + }, + { + "epoch": 0.8111362047859338, + "grad_norm": 1337.3271484375, + "learning_rate": 4.541784128750768e-06, + "loss": 109.0282, + "step": 200770 + }, + { + "epoch": 0.8111766060513015, + "grad_norm": 264.0511169433594, + "learning_rate": 4.5400123801312005e-06, + "loss": 63.3361, + "step": 200780 + }, + { + "epoch": 0.8112170073166691, + "grad_norm": 981.15234375, + "learning_rate": 4.53824093291205e-06, + "loss": 69.0837, + "step": 200790 + }, + { + "epoch": 0.8112574085820368, + "grad_norm": 1182.881591796875, + "learning_rate": 4.536469787127855e-06, + "loss": 75.5696, + "step": 200800 + }, + { + "epoch": 0.8112978098474044, + "grad_norm": 244.80230712890625, + "learning_rate": 4.534698942813147e-06, + "loss": 71.4873, + "step": 200810 + }, + { + "epoch": 0.811338211112772, + "grad_norm": 1080.1380615234375, + "learning_rate": 4.532928400002434e-06, + "loss": 74.1821, + "step": 200820 + }, + { + "epoch": 0.8113786123781397, + "grad_norm": 397.55560302734375, + "learning_rate": 4.5311581587302576e-06, + "loss": 61.469, + "step": 200830 + }, + { + "epoch": 0.8114190136435073, + "grad_norm": 765.16162109375, + "learning_rate": 4.52938821903111e-06, + "loss": 59.7299, + "step": 200840 + }, + { + "epoch": 0.811459414908875, + "grad_norm": 349.1751403808594, + "learning_rate": 4.527618580939503e-06, + "loss": 67.2878, + "step": 200850 + }, + { + "epoch": 0.8114998161742426, + "grad_norm": 655.27587890625, + "learning_rate": 4.525849244489946e-06, + "loss": 65.2153, + "step": 200860 + }, + { + "epoch": 0.8115402174396102, + "grad_norm": 871.0073852539062, + "learning_rate": 4.52408020971691e-06, + "loss": 48.6929, + "step": 200870 + }, + { + "epoch": 0.8115806187049779, + "grad_norm": 390.4417419433594, + "learning_rate": 4.52231147665491e-06, + "loss": 37.3084, + "step": 200880 + }, + { + "epoch": 0.8116210199703455, + "grad_norm": 479.5315246582031, + "learning_rate": 4.520543045338413e-06, + "loss": 78.5444, + "step": 200890 + }, + { + "epoch": 0.811661421235713, + "grad_norm": 614.6495361328125, + "learning_rate": 4.518774915801896e-06, + "loss": 61.0292, + "step": 200900 + }, + { + "epoch": 0.8117018225010807, + "grad_norm": 459.2932434082031, + "learning_rate": 4.5170070880798324e-06, + "loss": 67.1813, + "step": 200910 + }, + { + "epoch": 0.8117422237664483, + "grad_norm": 895.6470947265625, + "learning_rate": 4.515239562206688e-06, + "loss": 56.7232, + "step": 200920 + }, + { + "epoch": 0.811782625031816, + "grad_norm": 726.9061279296875, + "learning_rate": 4.513472338216926e-06, + "loss": 57.9695, + "step": 200930 + }, + { + "epoch": 0.8118230262971836, + "grad_norm": 261.7510986328125, + "learning_rate": 4.511705416144987e-06, + "loss": 79.1962, + "step": 200940 + }, + { + "epoch": 0.8118634275625513, + "grad_norm": 536.12353515625, + "learning_rate": 4.509938796025326e-06, + "loss": 65.0519, + "step": 200950 + }, + { + "epoch": 0.8119038288279189, + "grad_norm": 526.437744140625, + "learning_rate": 4.508172477892383e-06, + "loss": 57.2405, + "step": 200960 + }, + { + "epoch": 0.8119442300932865, + "grad_norm": 505.5520324707031, + "learning_rate": 4.506406461780597e-06, + "loss": 47.8579, + "step": 200970 + }, + { + "epoch": 0.8119846313586542, + "grad_norm": 831.6882934570312, + "learning_rate": 4.504640747724385e-06, + "loss": 62.3471, + "step": 200980 + }, + { + "epoch": 0.8120250326240218, + "grad_norm": 707.2982788085938, + "learning_rate": 4.502875335758188e-06, + "loss": 66.2176, + "step": 200990 + }, + { + "epoch": 0.8120654338893895, + "grad_norm": 567.4088134765625, + "learning_rate": 4.501110225916409e-06, + "loss": 64.3374, + "step": 201000 + }, + { + "epoch": 0.8121058351547571, + "grad_norm": 881.3482666015625, + "learning_rate": 4.49934541823347e-06, + "loss": 75.0729, + "step": 201010 + }, + { + "epoch": 0.8121462364201246, + "grad_norm": 318.6392517089844, + "learning_rate": 4.497580912743769e-06, + "loss": 74.3705, + "step": 201020 + }, + { + "epoch": 0.8121866376854923, + "grad_norm": 517.1087036132812, + "learning_rate": 4.495816709481708e-06, + "loss": 56.3149, + "step": 201030 + }, + { + "epoch": 0.8122270389508599, + "grad_norm": 739.6838989257812, + "learning_rate": 4.4940528084816905e-06, + "loss": 81.837, + "step": 201040 + }, + { + "epoch": 0.8122674402162275, + "grad_norm": 1066.1160888671875, + "learning_rate": 4.492289209778085e-06, + "loss": 93.7868, + "step": 201050 + }, + { + "epoch": 0.8123078414815952, + "grad_norm": 517.6342163085938, + "learning_rate": 4.490525913405295e-06, + "loss": 64.3024, + "step": 201060 + }, + { + "epoch": 0.8123482427469628, + "grad_norm": 488.01715087890625, + "learning_rate": 4.488762919397682e-06, + "loss": 43.1757, + "step": 201070 + }, + { + "epoch": 0.8123886440123305, + "grad_norm": 538.5394287109375, + "learning_rate": 4.4870002277896215e-06, + "loss": 86.6306, + "step": 201080 + }, + { + "epoch": 0.8124290452776981, + "grad_norm": 453.9230651855469, + "learning_rate": 4.48523783861548e-06, + "loss": 75.0248, + "step": 201090 + }, + { + "epoch": 0.8124694465430657, + "grad_norm": 992.544677734375, + "learning_rate": 4.483475751909616e-06, + "loss": 41.5553, + "step": 201100 + }, + { + "epoch": 0.8125098478084334, + "grad_norm": 469.3947448730469, + "learning_rate": 4.481713967706378e-06, + "loss": 62.6309, + "step": 201110 + }, + { + "epoch": 0.812550249073801, + "grad_norm": 1201.8101806640625, + "learning_rate": 4.479952486040116e-06, + "loss": 81.5688, + "step": 201120 + }, + { + "epoch": 0.8125906503391687, + "grad_norm": 976.4193725585938, + "learning_rate": 4.4781913069451695e-06, + "loss": 64.458, + "step": 201130 + }, + { + "epoch": 0.8126310516045363, + "grad_norm": 1150.4884033203125, + "learning_rate": 4.476430430455874e-06, + "loss": 40.03, + "step": 201140 + }, + { + "epoch": 0.8126714528699038, + "grad_norm": 696.5673217773438, + "learning_rate": 4.474669856606566e-06, + "loss": 70.3904, + "step": 201150 + }, + { + "epoch": 0.8127118541352715, + "grad_norm": 505.099853515625, + "learning_rate": 4.472909585431551e-06, + "loss": 96.0026, + "step": 201160 + }, + { + "epoch": 0.8127522554006391, + "grad_norm": 1322.6907958984375, + "learning_rate": 4.471149616965169e-06, + "loss": 76.8876, + "step": 201170 + }, + { + "epoch": 0.8127926566660068, + "grad_norm": 421.96624755859375, + "learning_rate": 4.469389951241714e-06, + "loss": 64.2273, + "step": 201180 + }, + { + "epoch": 0.8128330579313744, + "grad_norm": 744.346923828125, + "learning_rate": 4.467630588295497e-06, + "loss": 76.6489, + "step": 201190 + }, + { + "epoch": 0.812873459196742, + "grad_norm": 395.4475402832031, + "learning_rate": 4.4658715281608235e-06, + "loss": 58.6112, + "step": 201200 + }, + { + "epoch": 0.8129138604621097, + "grad_norm": 686.806396484375, + "learning_rate": 4.464112770871971e-06, + "loss": 65.6655, + "step": 201210 + }, + { + "epoch": 0.8129542617274773, + "grad_norm": 815.7222900390625, + "learning_rate": 4.462354316463251e-06, + "loss": 50.3861, + "step": 201220 + }, + { + "epoch": 0.812994662992845, + "grad_norm": 657.1491088867188, + "learning_rate": 4.460596164968926e-06, + "loss": 71.1586, + "step": 201230 + }, + { + "epoch": 0.8130350642582126, + "grad_norm": 430.00238037109375, + "learning_rate": 4.458838316423279e-06, + "loss": 65.0074, + "step": 201240 + }, + { + "epoch": 0.8130754655235802, + "grad_norm": 613.675048828125, + "learning_rate": 4.4570807708605825e-06, + "loss": 86.6145, + "step": 201250 + }, + { + "epoch": 0.8131158667889479, + "grad_norm": 699.94970703125, + "learning_rate": 4.455323528315101e-06, + "loss": 90.8024, + "step": 201260 + }, + { + "epoch": 0.8131562680543155, + "grad_norm": 730.145263671875, + "learning_rate": 4.453566588821088e-06, + "loss": 70.7297, + "step": 201270 + }, + { + "epoch": 0.813196669319683, + "grad_norm": 963.6216430664062, + "learning_rate": 4.4518099524127956e-06, + "loss": 51.8181, + "step": 201280 + }, + { + "epoch": 0.8132370705850507, + "grad_norm": 353.5361022949219, + "learning_rate": 4.450053619124473e-06, + "loss": 52.6218, + "step": 201290 + }, + { + "epoch": 0.8132774718504183, + "grad_norm": 513.4462890625, + "learning_rate": 4.44829758899036e-06, + "loss": 43.8662, + "step": 201300 + }, + { + "epoch": 0.813317873115786, + "grad_norm": 335.1729736328125, + "learning_rate": 4.4465418620447e-06, + "loss": 62.1748, + "step": 201310 + }, + { + "epoch": 0.8133582743811536, + "grad_norm": 865.79443359375, + "learning_rate": 4.444786438321702e-06, + "loss": 80.1775, + "step": 201320 + }, + { + "epoch": 0.8133986756465212, + "grad_norm": 652.9661865234375, + "learning_rate": 4.4430313178556105e-06, + "loss": 115.5593, + "step": 201330 + }, + { + "epoch": 0.8134390769118889, + "grad_norm": 1161.724609375, + "learning_rate": 4.441276500680629e-06, + "loss": 61.7082, + "step": 201340 + }, + { + "epoch": 0.8134794781772565, + "grad_norm": 1406.6531982421875, + "learning_rate": 4.43952198683097e-06, + "loss": 63.4347, + "step": 201350 + }, + { + "epoch": 0.8135198794426242, + "grad_norm": 1006.868896484375, + "learning_rate": 4.437767776340842e-06, + "loss": 85.6134, + "step": 201360 + }, + { + "epoch": 0.8135602807079918, + "grad_norm": 1848.79150390625, + "learning_rate": 4.436013869244444e-06, + "loss": 86.3189, + "step": 201370 + }, + { + "epoch": 0.8136006819733594, + "grad_norm": 827.1217651367188, + "learning_rate": 4.434260265575973e-06, + "loss": 91.9917, + "step": 201380 + }, + { + "epoch": 0.8136410832387271, + "grad_norm": 1243.6475830078125, + "learning_rate": 4.432506965369607e-06, + "loss": 80.7694, + "step": 201390 + }, + { + "epoch": 0.8136814845040947, + "grad_norm": 870.5795288085938, + "learning_rate": 4.430753968659534e-06, + "loss": 64.366, + "step": 201400 + }, + { + "epoch": 0.8137218857694622, + "grad_norm": 734.6600952148438, + "learning_rate": 4.4290012754799246e-06, + "loss": 85.7028, + "step": 201410 + }, + { + "epoch": 0.8137622870348299, + "grad_norm": 561.6251220703125, + "learning_rate": 4.427248885864959e-06, + "loss": 83.2916, + "step": 201420 + }, + { + "epoch": 0.8138026883001975, + "grad_norm": 501.4179382324219, + "learning_rate": 4.425496799848783e-06, + "loss": 62.591, + "step": 201430 + }, + { + "epoch": 0.8138430895655652, + "grad_norm": 513.661376953125, + "learning_rate": 4.423745017465577e-06, + "loss": 78.217, + "step": 201440 + }, + { + "epoch": 0.8138834908309328, + "grad_norm": 588.11962890625, + "learning_rate": 4.421993538749474e-06, + "loss": 55.5326, + "step": 201450 + }, + { + "epoch": 0.8139238920963004, + "grad_norm": 907.2653198242188, + "learning_rate": 4.420242363734628e-06, + "loss": 78.3994, + "step": 201460 + }, + { + "epoch": 0.8139642933616681, + "grad_norm": 496.221923828125, + "learning_rate": 4.418491492455181e-06, + "loss": 62.8451, + "step": 201470 + }, + { + "epoch": 0.8140046946270357, + "grad_norm": 484.19561767578125, + "learning_rate": 4.416740924945258e-06, + "loss": 84.5514, + "step": 201480 + }, + { + "epoch": 0.8140450958924034, + "grad_norm": 565.6563110351562, + "learning_rate": 4.4149906612390005e-06, + "loss": 105.5082, + "step": 201490 + }, + { + "epoch": 0.814085497157771, + "grad_norm": 444.9754638671875, + "learning_rate": 4.413240701370514e-06, + "loss": 40.6176, + "step": 201500 + }, + { + "epoch": 0.8141258984231386, + "grad_norm": 317.4707336425781, + "learning_rate": 4.411491045373937e-06, + "loss": 67.9178, + "step": 201510 + }, + { + "epoch": 0.8141662996885063, + "grad_norm": 872.8093872070312, + "learning_rate": 4.40974169328336e-06, + "loss": 56.2998, + "step": 201520 + }, + { + "epoch": 0.8142067009538739, + "grad_norm": 560.0808715820312, + "learning_rate": 4.407992645132897e-06, + "loss": 78.6556, + "step": 201530 + }, + { + "epoch": 0.8142471022192415, + "grad_norm": 632.9170532226562, + "learning_rate": 4.406243900956648e-06, + "loss": 46.536, + "step": 201540 + }, + { + "epoch": 0.8142875034846091, + "grad_norm": 930.1355590820312, + "learning_rate": 4.404495460788698e-06, + "loss": 62.6813, + "step": 201550 + }, + { + "epoch": 0.8143279047499767, + "grad_norm": 672.6768798828125, + "learning_rate": 4.402747324663139e-06, + "loss": 58.0816, + "step": 201560 + }, + { + "epoch": 0.8143683060153444, + "grad_norm": 521.6105346679688, + "learning_rate": 4.400999492614049e-06, + "loss": 53.6126, + "step": 201570 + }, + { + "epoch": 0.814408707280712, + "grad_norm": 914.1198120117188, + "learning_rate": 4.3992519646755064e-06, + "loss": 78.9203, + "step": 201580 + }, + { + "epoch": 0.8144491085460797, + "grad_norm": 222.87744140625, + "learning_rate": 4.397504740881577e-06, + "loss": 52.8959, + "step": 201590 + }, + { + "epoch": 0.8144895098114473, + "grad_norm": 1153.2991943359375, + "learning_rate": 4.395757821266333e-06, + "loss": 88.3473, + "step": 201600 + }, + { + "epoch": 0.8145299110768149, + "grad_norm": 249.2390594482422, + "learning_rate": 4.394011205863817e-06, + "loss": 37.3306, + "step": 201610 + }, + { + "epoch": 0.8145703123421826, + "grad_norm": 377.8114929199219, + "learning_rate": 4.392264894708087e-06, + "loss": 43.0508, + "step": 201620 + }, + { + "epoch": 0.8146107136075502, + "grad_norm": 883.5800170898438, + "learning_rate": 4.390518887833195e-06, + "loss": 95.8989, + "step": 201630 + }, + { + "epoch": 0.8146511148729179, + "grad_norm": 463.2521667480469, + "learning_rate": 4.38877318527316e-06, + "loss": 52.3598, + "step": 201640 + }, + { + "epoch": 0.8146915161382855, + "grad_norm": 322.0601806640625, + "learning_rate": 4.3870277870620416e-06, + "loss": 78.3702, + "step": 201650 + }, + { + "epoch": 0.814731917403653, + "grad_norm": 829.0099487304688, + "learning_rate": 4.385282693233843e-06, + "loss": 82.1207, + "step": 201660 + }, + { + "epoch": 0.8147723186690207, + "grad_norm": 415.2408142089844, + "learning_rate": 4.38353790382261e-06, + "loss": 39.9924, + "step": 201670 + }, + { + "epoch": 0.8148127199343883, + "grad_norm": 627.7042236328125, + "learning_rate": 4.381793418862339e-06, + "loss": 73.9785, + "step": 201680 + }, + { + "epoch": 0.814853121199756, + "grad_norm": 689.3956298828125, + "learning_rate": 4.3800492383870454e-06, + "loss": 56.4742, + "step": 201690 + }, + { + "epoch": 0.8148935224651236, + "grad_norm": 521.7947387695312, + "learning_rate": 4.378305362430735e-06, + "loss": 78.3973, + "step": 201700 + }, + { + "epoch": 0.8149339237304912, + "grad_norm": 782.8671875, + "learning_rate": 4.37656179102741e-06, + "loss": 72.4171, + "step": 201710 + }, + { + "epoch": 0.8149743249958589, + "grad_norm": 763.3424072265625, + "learning_rate": 4.374818524211048e-06, + "loss": 66.2229, + "step": 201720 + }, + { + "epoch": 0.8150147262612265, + "grad_norm": 730.6586303710938, + "learning_rate": 4.373075562015645e-06, + "loss": 81.2784, + "step": 201730 + }, + { + "epoch": 0.8150551275265941, + "grad_norm": 1165.77734375, + "learning_rate": 4.371332904475179e-06, + "loss": 78.2448, + "step": 201740 + }, + { + "epoch": 0.8150955287919618, + "grad_norm": 737.277587890625, + "learning_rate": 4.3695905516236235e-06, + "loss": 78.3483, + "step": 201750 + }, + { + "epoch": 0.8151359300573294, + "grad_norm": 498.79345703125, + "learning_rate": 4.367848503494954e-06, + "loss": 68.1771, + "step": 201760 + }, + { + "epoch": 0.8151763313226971, + "grad_norm": 609.8656616210938, + "learning_rate": 4.366106760123114e-06, + "loss": 78.2262, + "step": 201770 + }, + { + "epoch": 0.8152167325880647, + "grad_norm": 524.537109375, + "learning_rate": 4.364365321542083e-06, + "loss": 93.2662, + "step": 201780 + }, + { + "epoch": 0.8152571338534322, + "grad_norm": 676.1365966796875, + "learning_rate": 4.362624187785795e-06, + "loss": 52.9815, + "step": 201790 + }, + { + "epoch": 0.8152975351187999, + "grad_norm": 734.7216186523438, + "learning_rate": 4.3608833588882e-06, + "loss": 56.5757, + "step": 201800 + }, + { + "epoch": 0.8153379363841675, + "grad_norm": 2122.55810546875, + "learning_rate": 4.359142834883239e-06, + "loss": 97.9416, + "step": 201810 + }, + { + "epoch": 0.8153783376495352, + "grad_norm": 1477.7742919921875, + "learning_rate": 4.3574026158048285e-06, + "loss": 92.4672, + "step": 201820 + }, + { + "epoch": 0.8154187389149028, + "grad_norm": 578.5282592773438, + "learning_rate": 4.355662701686922e-06, + "loss": 51.0916, + "step": 201830 + }, + { + "epoch": 0.8154591401802704, + "grad_norm": 443.83782958984375, + "learning_rate": 4.353923092563417e-06, + "loss": 72.7991, + "step": 201840 + }, + { + "epoch": 0.8154995414456381, + "grad_norm": 856.5198364257812, + "learning_rate": 4.352183788468239e-06, + "loss": 89.4901, + "step": 201850 + }, + { + "epoch": 0.8155399427110057, + "grad_norm": 409.7867736816406, + "learning_rate": 4.350444789435293e-06, + "loss": 58.7051, + "step": 201860 + }, + { + "epoch": 0.8155803439763734, + "grad_norm": 727.6536865234375, + "learning_rate": 4.3487060954984875e-06, + "loss": 71.2525, + "step": 201870 + }, + { + "epoch": 0.815620745241741, + "grad_norm": 823.3516845703125, + "learning_rate": 4.3469677066917096e-06, + "loss": 67.706, + "step": 201880 + }, + { + "epoch": 0.8156611465071086, + "grad_norm": 930.4603881835938, + "learning_rate": 4.345229623048854e-06, + "loss": 74.215, + "step": 201890 + }, + { + "epoch": 0.8157015477724763, + "grad_norm": 340.3995056152344, + "learning_rate": 4.343491844603806e-06, + "loss": 87.2953, + "step": 201900 + }, + { + "epoch": 0.8157419490378439, + "grad_norm": 805.6270141601562, + "learning_rate": 4.341754371390448e-06, + "loss": 69.2234, + "step": 201910 + }, + { + "epoch": 0.8157823503032114, + "grad_norm": 618.5772094726562, + "learning_rate": 4.340017203442652e-06, + "loss": 67.7922, + "step": 201920 + }, + { + "epoch": 0.8158227515685791, + "grad_norm": 305.3576354980469, + "learning_rate": 4.3382803407942745e-06, + "loss": 119.5424, + "step": 201930 + }, + { + "epoch": 0.8158631528339467, + "grad_norm": 567.5014038085938, + "learning_rate": 4.3365437834791945e-06, + "loss": 48.3278, + "step": 201940 + }, + { + "epoch": 0.8159035540993144, + "grad_norm": 848.06005859375, + "learning_rate": 4.334807531531253e-06, + "loss": 50.3889, + "step": 201950 + }, + { + "epoch": 0.815943955364682, + "grad_norm": 964.5476684570312, + "learning_rate": 4.3330715849843034e-06, + "loss": 56.6988, + "step": 201960 + }, + { + "epoch": 0.8159843566300496, + "grad_norm": 1703.272705078125, + "learning_rate": 4.331335943872195e-06, + "loss": 46.8973, + "step": 201970 + }, + { + "epoch": 0.8160247578954173, + "grad_norm": 1062.0196533203125, + "learning_rate": 4.329600608228748e-06, + "loss": 73.3035, + "step": 201980 + }, + { + "epoch": 0.8160651591607849, + "grad_norm": 1108.6719970703125, + "learning_rate": 4.327865578087815e-06, + "loss": 43.5951, + "step": 201990 + }, + { + "epoch": 0.8161055604261526, + "grad_norm": 805.7373046875, + "learning_rate": 4.326130853483206e-06, + "loss": 75.3461, + "step": 202000 + }, + { + "epoch": 0.8161459616915202, + "grad_norm": 461.622802734375, + "learning_rate": 4.324396434448745e-06, + "loss": 52.1879, + "step": 202010 + }, + { + "epoch": 0.8161863629568878, + "grad_norm": 704.5280151367188, + "learning_rate": 4.322662321018247e-06, + "loss": 62.4634, + "step": 202020 + }, + { + "epoch": 0.8162267642222555, + "grad_norm": 1050.8812255859375, + "learning_rate": 4.3209285132255174e-06, + "loss": 49.6509, + "step": 202030 + }, + { + "epoch": 0.8162671654876231, + "grad_norm": 1427.469970703125, + "learning_rate": 4.31919501110436e-06, + "loss": 82.3799, + "step": 202040 + }, + { + "epoch": 0.8163075667529907, + "grad_norm": 929.4609375, + "learning_rate": 4.317461814688573e-06, + "loss": 80.326, + "step": 202050 + }, + { + "epoch": 0.8163479680183583, + "grad_norm": 1305.0367431640625, + "learning_rate": 4.315728924011937e-06, + "loss": 56.8346, + "step": 202060 + }, + { + "epoch": 0.8163883692837259, + "grad_norm": 500.2276306152344, + "learning_rate": 4.31399633910824e-06, + "loss": 72.6741, + "step": 202070 + }, + { + "epoch": 0.8164287705490936, + "grad_norm": 352.0236511230469, + "learning_rate": 4.3122640600112645e-06, + "loss": 43.2406, + "step": 202080 + }, + { + "epoch": 0.8164691718144612, + "grad_norm": 626.4642333984375, + "learning_rate": 4.310532086754768e-06, + "loss": 62.7684, + "step": 202090 + }, + { + "epoch": 0.8165095730798289, + "grad_norm": 928.4041137695312, + "learning_rate": 4.308800419372536e-06, + "loss": 74.4685, + "step": 202100 + }, + { + "epoch": 0.8165499743451965, + "grad_norm": 2387.8125, + "learning_rate": 4.3070690578983095e-06, + "loss": 102.0107, + "step": 202110 + }, + { + "epoch": 0.8165903756105641, + "grad_norm": 911.839599609375, + "learning_rate": 4.3053380023658595e-06, + "loss": 74.2053, + "step": 202120 + }, + { + "epoch": 0.8166307768759318, + "grad_norm": 453.6501159667969, + "learning_rate": 4.303607252808921e-06, + "loss": 82.2763, + "step": 202130 + }, + { + "epoch": 0.8166711781412994, + "grad_norm": 880.88525390625, + "learning_rate": 4.3018768092612405e-06, + "loss": 64.415, + "step": 202140 + }, + { + "epoch": 0.816711579406667, + "grad_norm": 1100.875244140625, + "learning_rate": 4.300146671756557e-06, + "loss": 89.7472, + "step": 202150 + }, + { + "epoch": 0.8167519806720347, + "grad_norm": 503.518798828125, + "learning_rate": 4.298416840328594e-06, + "loss": 62.3704, + "step": 202160 + }, + { + "epoch": 0.8167923819374023, + "grad_norm": 608.8798828125, + "learning_rate": 4.296687315011076e-06, + "loss": 52.0714, + "step": 202170 + }, + { + "epoch": 0.8168327832027699, + "grad_norm": 1106.574951171875, + "learning_rate": 4.294958095837727e-06, + "loss": 75.3584, + "step": 202180 + }, + { + "epoch": 0.8168731844681375, + "grad_norm": 962.6588134765625, + "learning_rate": 4.293229182842253e-06, + "loss": 62.6276, + "step": 202190 + }, + { + "epoch": 0.8169135857335051, + "grad_norm": 462.022705078125, + "learning_rate": 4.291500576058363e-06, + "loss": 55.6731, + "step": 202200 + }, + { + "epoch": 0.8169539869988728, + "grad_norm": 1331.68505859375, + "learning_rate": 4.289772275519761e-06, + "loss": 63.1045, + "step": 202210 + }, + { + "epoch": 0.8169943882642404, + "grad_norm": 1263.93212890625, + "learning_rate": 4.288044281260131e-06, + "loss": 88.0366, + "step": 202220 + }, + { + "epoch": 0.8170347895296081, + "grad_norm": 919.6927490234375, + "learning_rate": 4.286316593313169e-06, + "loss": 70.4844, + "step": 202230 + }, + { + "epoch": 0.8170751907949757, + "grad_norm": 775.591796875, + "learning_rate": 4.2845892117125575e-06, + "loss": 90.2907, + "step": 202240 + }, + { + "epoch": 0.8171155920603433, + "grad_norm": 576.2592163085938, + "learning_rate": 4.282862136491961e-06, + "loss": 65.7645, + "step": 202250 + }, + { + "epoch": 0.817155993325711, + "grad_norm": 665.7859497070312, + "learning_rate": 4.28113536768507e-06, + "loss": 72.7745, + "step": 202260 + }, + { + "epoch": 0.8171963945910786, + "grad_norm": 607.8301391601562, + "learning_rate": 4.279408905325526e-06, + "loss": 53.2035, + "step": 202270 + }, + { + "epoch": 0.8172367958564463, + "grad_norm": 505.8408508300781, + "learning_rate": 4.277682749447012e-06, + "loss": 32.4762, + "step": 202280 + }, + { + "epoch": 0.8172771971218139, + "grad_norm": 1073.9583740234375, + "learning_rate": 4.275956900083158e-06, + "loss": 56.0833, + "step": 202290 + }, + { + "epoch": 0.8173175983871814, + "grad_norm": 930.248779296875, + "learning_rate": 4.2742313572676216e-06, + "loss": 85.0025, + "step": 202300 + }, + { + "epoch": 0.8173579996525491, + "grad_norm": 1198.9141845703125, + "learning_rate": 4.2725061210340455e-06, + "loss": 83.4001, + "step": 202310 + }, + { + "epoch": 0.8173984009179167, + "grad_norm": 782.28662109375, + "learning_rate": 4.270781191416056e-06, + "loss": 67.6029, + "step": 202320 + }, + { + "epoch": 0.8174388021832844, + "grad_norm": 805.6671752929688, + "learning_rate": 4.269056568447285e-06, + "loss": 45.8591, + "step": 202330 + }, + { + "epoch": 0.817479203448652, + "grad_norm": 585.23486328125, + "learning_rate": 4.267332252161353e-06, + "loss": 77.8132, + "step": 202340 + }, + { + "epoch": 0.8175196047140196, + "grad_norm": 851.21533203125, + "learning_rate": 4.265608242591881e-06, + "loss": 59.3158, + "step": 202350 + }, + { + "epoch": 0.8175600059793873, + "grad_norm": 867.9598388671875, + "learning_rate": 4.263884539772474e-06, + "loss": 56.6606, + "step": 202360 + }, + { + "epoch": 0.8176004072447549, + "grad_norm": 669.4365844726562, + "learning_rate": 4.262161143736747e-06, + "loss": 82.3316, + "step": 202370 + }, + { + "epoch": 0.8176408085101226, + "grad_norm": 354.5205078125, + "learning_rate": 4.260438054518285e-06, + "loss": 86.2525, + "step": 202380 + }, + { + "epoch": 0.8176812097754902, + "grad_norm": 537.0974731445312, + "learning_rate": 4.258715272150686e-06, + "loss": 119.0032, + "step": 202390 + }, + { + "epoch": 0.8177216110408578, + "grad_norm": 438.22198486328125, + "learning_rate": 4.256992796667536e-06, + "loss": 65.6406, + "step": 202400 + }, + { + "epoch": 0.8177620123062255, + "grad_norm": 1090.996337890625, + "learning_rate": 4.255270628102419e-06, + "loss": 82.8368, + "step": 202410 + }, + { + "epoch": 0.8178024135715931, + "grad_norm": 581.946533203125, + "learning_rate": 4.253548766488911e-06, + "loss": 79.3624, + "step": 202420 + }, + { + "epoch": 0.8178428148369606, + "grad_norm": 891.6680908203125, + "learning_rate": 4.251827211860566e-06, + "loss": 48.4356, + "step": 202430 + }, + { + "epoch": 0.8178832161023283, + "grad_norm": 735.2766723632812, + "learning_rate": 4.250105964250968e-06, + "loss": 57.9954, + "step": 202440 + }, + { + "epoch": 0.8179236173676959, + "grad_norm": 395.760009765625, + "learning_rate": 4.2483850236936576e-06, + "loss": 44.4595, + "step": 202450 + }, + { + "epoch": 0.8179640186330636, + "grad_norm": 884.0155029296875, + "learning_rate": 4.2466643902221904e-06, + "loss": 56.7754, + "step": 202460 + }, + { + "epoch": 0.8180044198984312, + "grad_norm": 570.177978515625, + "learning_rate": 4.244944063870111e-06, + "loss": 64.9163, + "step": 202470 + }, + { + "epoch": 0.8180448211637988, + "grad_norm": 1356.488525390625, + "learning_rate": 4.2432240446709635e-06, + "loss": 139.1439, + "step": 202480 + }, + { + "epoch": 0.8180852224291665, + "grad_norm": 800.6863403320312, + "learning_rate": 4.241504332658271e-06, + "loss": 73.4065, + "step": 202490 + }, + { + "epoch": 0.8181256236945341, + "grad_norm": 329.8395080566406, + "learning_rate": 4.239784927865562e-06, + "loss": 54.9243, + "step": 202500 + }, + { + "epoch": 0.8181660249599018, + "grad_norm": 1031.297607421875, + "learning_rate": 4.2380658303263635e-06, + "loss": 80.4942, + "step": 202510 + }, + { + "epoch": 0.8182064262252694, + "grad_norm": 706.806884765625, + "learning_rate": 4.236347040074185e-06, + "loss": 65.8865, + "step": 202520 + }, + { + "epoch": 0.818246827490637, + "grad_norm": 662.4999389648438, + "learning_rate": 4.2346285571425415e-06, + "loss": 57.4382, + "step": 202530 + }, + { + "epoch": 0.8182872287560047, + "grad_norm": 1051.240478515625, + "learning_rate": 4.2329103815649185e-06, + "loss": 71.656, + "step": 202540 + }, + { + "epoch": 0.8183276300213723, + "grad_norm": 829.7106323242188, + "learning_rate": 4.231192513374838e-06, + "loss": 74.5911, + "step": 202550 + }, + { + "epoch": 0.8183680312867398, + "grad_norm": 747.0504760742188, + "learning_rate": 4.229474952605772e-06, + "loss": 61.4295, + "step": 202560 + }, + { + "epoch": 0.8184084325521075, + "grad_norm": 845.4904174804688, + "learning_rate": 4.22775769929121e-06, + "loss": 70.8881, + "step": 202570 + }, + { + "epoch": 0.8184488338174751, + "grad_norm": 966.598876953125, + "learning_rate": 4.22604075346464e-06, + "loss": 61.1303, + "step": 202580 + }, + { + "epoch": 0.8184892350828428, + "grad_norm": 1122.509765625, + "learning_rate": 4.224324115159513e-06, + "loss": 83.6683, + "step": 202590 + }, + { + "epoch": 0.8185296363482104, + "grad_norm": 500.9676818847656, + "learning_rate": 4.2226077844093205e-06, + "loss": 53.7782, + "step": 202600 + }, + { + "epoch": 0.818570037613578, + "grad_norm": 678.6842041015625, + "learning_rate": 4.220891761247508e-06, + "loss": 61.0555, + "step": 202610 + }, + { + "epoch": 0.8186104388789457, + "grad_norm": 1631.94873046875, + "learning_rate": 4.219176045707531e-06, + "loss": 77.893, + "step": 202620 + }, + { + "epoch": 0.8186508401443133, + "grad_norm": 522.3391723632812, + "learning_rate": 4.217460637822845e-06, + "loss": 64.4513, + "step": 202630 + }, + { + "epoch": 0.818691241409681, + "grad_norm": 757.8579711914062, + "learning_rate": 4.215745537626887e-06, + "loss": 48.5609, + "step": 202640 + }, + { + "epoch": 0.8187316426750486, + "grad_norm": 1361.6986083984375, + "learning_rate": 4.214030745153104e-06, + "loss": 56.1197, + "step": 202650 + }, + { + "epoch": 0.8187720439404162, + "grad_norm": 626.077392578125, + "learning_rate": 4.212316260434912e-06, + "loss": 62.8881, + "step": 202660 + }, + { + "epoch": 0.8188124452057839, + "grad_norm": 395.271728515625, + "learning_rate": 4.210602083505741e-06, + "loss": 38.6826, + "step": 202670 + }, + { + "epoch": 0.8188528464711515, + "grad_norm": 891.6951293945312, + "learning_rate": 4.208888214399014e-06, + "loss": 50.9728, + "step": 202680 + }, + { + "epoch": 0.8188932477365191, + "grad_norm": 571.4351196289062, + "learning_rate": 4.207174653148145e-06, + "loss": 55.327, + "step": 202690 + }, + { + "epoch": 0.8189336490018867, + "grad_norm": 471.67657470703125, + "learning_rate": 4.205461399786526e-06, + "loss": 45.0789, + "step": 202700 + }, + { + "epoch": 0.8189740502672543, + "grad_norm": 732.1365966796875, + "learning_rate": 4.203748454347578e-06, + "loss": 52.9026, + "step": 202710 + }, + { + "epoch": 0.819014451532622, + "grad_norm": 724.2202758789062, + "learning_rate": 4.202035816864683e-06, + "loss": 77.4242, + "step": 202720 + }, + { + "epoch": 0.8190548527979896, + "grad_norm": 137.0491943359375, + "learning_rate": 4.200323487371232e-06, + "loss": 57.0169, + "step": 202730 + }, + { + "epoch": 0.8190952540633573, + "grad_norm": 430.61956787109375, + "learning_rate": 4.198611465900611e-06, + "loss": 41.7674, + "step": 202740 + }, + { + "epoch": 0.8191356553287249, + "grad_norm": 473.8705749511719, + "learning_rate": 4.196899752486192e-06, + "loss": 52.156, + "step": 202750 + }, + { + "epoch": 0.8191760565940925, + "grad_norm": 1096.9814453125, + "learning_rate": 4.195188347161354e-06, + "loss": 73.2342, + "step": 202760 + }, + { + "epoch": 0.8192164578594602, + "grad_norm": 1016.5376586914062, + "learning_rate": 4.1934772499594525e-06, + "loss": 67.4523, + "step": 202770 + }, + { + "epoch": 0.8192568591248278, + "grad_norm": 1173.11181640625, + "learning_rate": 4.191766460913849e-06, + "loss": 63.9323, + "step": 202780 + }, + { + "epoch": 0.8192972603901955, + "grad_norm": 310.65924072265625, + "learning_rate": 4.190055980057896e-06, + "loss": 59.801, + "step": 202790 + }, + { + "epoch": 0.8193376616555631, + "grad_norm": 305.4320983886719, + "learning_rate": 4.18834580742494e-06, + "loss": 57.1772, + "step": 202800 + }, + { + "epoch": 0.8193780629209307, + "grad_norm": 1602.146728515625, + "learning_rate": 4.186635943048325e-06, + "loss": 69.6057, + "step": 202810 + }, + { + "epoch": 0.8194184641862983, + "grad_norm": 825.0625, + "learning_rate": 4.18492638696139e-06, + "loss": 61.9972, + "step": 202820 + }, + { + "epoch": 0.8194588654516659, + "grad_norm": 470.7935791015625, + "learning_rate": 4.183217139197451e-06, + "loss": 74.7176, + "step": 202830 + }, + { + "epoch": 0.8194992667170335, + "grad_norm": 700.677978515625, + "learning_rate": 4.181508199789834e-06, + "loss": 61.9144, + "step": 202840 + }, + { + "epoch": 0.8195396679824012, + "grad_norm": 928.5784301757812, + "learning_rate": 4.1797995687718676e-06, + "loss": 52.6431, + "step": 202850 + }, + { + "epoch": 0.8195800692477688, + "grad_norm": 681.140869140625, + "learning_rate": 4.178091246176841e-06, + "loss": 52.4899, + "step": 202860 + }, + { + "epoch": 0.8196204705131365, + "grad_norm": 407.27984619140625, + "learning_rate": 4.1763832320380814e-06, + "loss": 45.6163, + "step": 202870 + }, + { + "epoch": 0.8196608717785041, + "grad_norm": 612.0280151367188, + "learning_rate": 4.174675526388867e-06, + "loss": 56.9259, + "step": 202880 + }, + { + "epoch": 0.8197012730438717, + "grad_norm": 758.974609375, + "learning_rate": 4.172968129262514e-06, + "loss": 64.8957, + "step": 202890 + }, + { + "epoch": 0.8197416743092394, + "grad_norm": 400.32733154296875, + "learning_rate": 4.171261040692287e-06, + "loss": 67.0429, + "step": 202900 + }, + { + "epoch": 0.819782075574607, + "grad_norm": 524.123046875, + "learning_rate": 4.169554260711475e-06, + "loss": 40.697, + "step": 202910 + }, + { + "epoch": 0.8198224768399747, + "grad_norm": 518.7138671875, + "learning_rate": 4.167847789353361e-06, + "loss": 68.234, + "step": 202920 + }, + { + "epoch": 0.8198628781053423, + "grad_norm": 1183.4178466796875, + "learning_rate": 4.166141626651197e-06, + "loss": 53.7478, + "step": 202930 + }, + { + "epoch": 0.8199032793707098, + "grad_norm": 652.0435180664062, + "learning_rate": 4.1644357726382555e-06, + "loss": 77.2102, + "step": 202940 + }, + { + "epoch": 0.8199436806360775, + "grad_norm": 333.60235595703125, + "learning_rate": 4.162730227347791e-06, + "loss": 66.1985, + "step": 202950 + }, + { + "epoch": 0.8199840819014451, + "grad_norm": 1225.7625732421875, + "learning_rate": 4.161024990813054e-06, + "loss": 82.7061, + "step": 202960 + }, + { + "epoch": 0.8200244831668128, + "grad_norm": 2229.330810546875, + "learning_rate": 4.159320063067289e-06, + "loss": 107.0624, + "step": 202970 + }, + { + "epoch": 0.8200648844321804, + "grad_norm": 1055.973388671875, + "learning_rate": 4.157615444143741e-06, + "loss": 79.4312, + "step": 202980 + }, + { + "epoch": 0.820105285697548, + "grad_norm": 965.6340942382812, + "learning_rate": 4.15591113407563e-06, + "loss": 55.837, + "step": 202990 + }, + { + "epoch": 0.8201456869629157, + "grad_norm": 794.1283569335938, + "learning_rate": 4.154207132896189e-06, + "loss": 65.4609, + "step": 203000 + }, + { + "epoch": 0.8201860882282833, + "grad_norm": 1127.6075439453125, + "learning_rate": 4.152503440638638e-06, + "loss": 47.2164, + "step": 203010 + }, + { + "epoch": 0.820226489493651, + "grad_norm": 439.9280700683594, + "learning_rate": 4.150800057336188e-06, + "loss": 76.0418, + "step": 203020 + }, + { + "epoch": 0.8202668907590186, + "grad_norm": 525.7040405273438, + "learning_rate": 4.1490969830220605e-06, + "loss": 48.3175, + "step": 203030 + }, + { + "epoch": 0.8203072920243862, + "grad_norm": 738.9090576171875, + "learning_rate": 4.147394217729434e-06, + "loss": 49.4532, + "step": 203040 + }, + { + "epoch": 0.8203476932897539, + "grad_norm": 410.5454406738281, + "learning_rate": 4.145691761491532e-06, + "loss": 70.7972, + "step": 203050 + }, + { + "epoch": 0.8203880945551215, + "grad_norm": 494.97314453125, + "learning_rate": 4.143989614341526e-06, + "loss": 47.6452, + "step": 203060 + }, + { + "epoch": 0.820428495820489, + "grad_norm": 513.0502319335938, + "learning_rate": 4.142287776312603e-06, + "loss": 36.7736, + "step": 203070 + }, + { + "epoch": 0.8204688970858567, + "grad_norm": 597.7318725585938, + "learning_rate": 4.140586247437948e-06, + "loss": 77.0351, + "step": 203080 + }, + { + "epoch": 0.8205092983512243, + "grad_norm": 357.81793212890625, + "learning_rate": 4.138885027750726e-06, + "loss": 54.951, + "step": 203090 + }, + { + "epoch": 0.820549699616592, + "grad_norm": 337.25152587890625, + "learning_rate": 4.1371841172841125e-06, + "loss": 55.4656, + "step": 203100 + }, + { + "epoch": 0.8205901008819596, + "grad_norm": 533.76953125, + "learning_rate": 4.1354835160712545e-06, + "loss": 63.1981, + "step": 203110 + }, + { + "epoch": 0.8206305021473272, + "grad_norm": 704.6478881835938, + "learning_rate": 4.1337832241453155e-06, + "loss": 73.1109, + "step": 203120 + }, + { + "epoch": 0.8206709034126949, + "grad_norm": 863.1463623046875, + "learning_rate": 4.13208324153944e-06, + "loss": 45.1918, + "step": 203130 + }, + { + "epoch": 0.8207113046780625, + "grad_norm": 779.7010498046875, + "learning_rate": 4.130383568286776e-06, + "loss": 106.8248, + "step": 203140 + }, + { + "epoch": 0.8207517059434302, + "grad_norm": 508.7459716796875, + "learning_rate": 4.128684204420443e-06, + "loss": 44.0803, + "step": 203150 + }, + { + "epoch": 0.8207921072087978, + "grad_norm": 638.2647094726562, + "learning_rate": 4.126985149973595e-06, + "loss": 57.0576, + "step": 203160 + }, + { + "epoch": 0.8208325084741654, + "grad_norm": 517.2372436523438, + "learning_rate": 4.1252864049793365e-06, + "loss": 100.9686, + "step": 203170 + }, + { + "epoch": 0.8208729097395331, + "grad_norm": 1405.5762939453125, + "learning_rate": 4.123587969470795e-06, + "loss": 84.905, + "step": 203180 + }, + { + "epoch": 0.8209133110049007, + "grad_norm": 1009.2142333984375, + "learning_rate": 4.121889843481082e-06, + "loss": 80.1197, + "step": 203190 + }, + { + "epoch": 0.8209537122702683, + "grad_norm": 445.126708984375, + "learning_rate": 4.120192027043293e-06, + "loss": 49.195, + "step": 203200 + }, + { + "epoch": 0.8209941135356359, + "grad_norm": 470.849365234375, + "learning_rate": 4.1184945201905435e-06, + "loss": 55.3073, + "step": 203210 + }, + { + "epoch": 0.8210345148010035, + "grad_norm": 781.3931274414062, + "learning_rate": 4.116797322955917e-06, + "loss": 76.3041, + "step": 203220 + }, + { + "epoch": 0.8210749160663712, + "grad_norm": 412.7049560546875, + "learning_rate": 4.115100435372503e-06, + "loss": 66.2245, + "step": 203230 + }, + { + "epoch": 0.8211153173317388, + "grad_norm": 619.7646484375, + "learning_rate": 4.113403857473386e-06, + "loss": 50.8339, + "step": 203240 + }, + { + "epoch": 0.8211557185971065, + "grad_norm": 470.26214599609375, + "learning_rate": 4.111707589291635e-06, + "loss": 55.7227, + "step": 203250 + }, + { + "epoch": 0.8211961198624741, + "grad_norm": 704.4564819335938, + "learning_rate": 4.110011630860335e-06, + "loss": 65.2877, + "step": 203260 + }, + { + "epoch": 0.8212365211278417, + "grad_norm": 389.7925720214844, + "learning_rate": 4.1083159822125314e-06, + "loss": 64.6969, + "step": 203270 + }, + { + "epoch": 0.8212769223932094, + "grad_norm": 844.0777587890625, + "learning_rate": 4.1066206433812896e-06, + "loss": 76.0595, + "step": 203280 + }, + { + "epoch": 0.821317323658577, + "grad_norm": 635.1431274414062, + "learning_rate": 4.10492561439966e-06, + "loss": 104.1085, + "step": 203290 + }, + { + "epoch": 0.8213577249239447, + "grad_norm": 240.35679626464844, + "learning_rate": 4.103230895300694e-06, + "loss": 54.5709, + "step": 203300 + }, + { + "epoch": 0.8213981261893123, + "grad_norm": 1353.5472412109375, + "learning_rate": 4.101536486117417e-06, + "loss": 104.4096, + "step": 203310 + }, + { + "epoch": 0.8214385274546799, + "grad_norm": 892.1799926757812, + "learning_rate": 4.099842386882881e-06, + "loss": 68.4861, + "step": 203320 + }, + { + "epoch": 0.8214789287200475, + "grad_norm": 458.0665283203125, + "learning_rate": 4.098148597630098e-06, + "loss": 58.0125, + "step": 203330 + }, + { + "epoch": 0.8215193299854151, + "grad_norm": 1594.962890625, + "learning_rate": 4.0964551183920955e-06, + "loss": 95.3562, + "step": 203340 + }, + { + "epoch": 0.8215597312507827, + "grad_norm": 667.8057250976562, + "learning_rate": 4.094761949201889e-06, + "loss": 74.2207, + "step": 203350 + }, + { + "epoch": 0.8216001325161504, + "grad_norm": 830.0928344726562, + "learning_rate": 4.093069090092485e-06, + "loss": 62.2057, + "step": 203360 + }, + { + "epoch": 0.821640533781518, + "grad_norm": 977.8357543945312, + "learning_rate": 4.0913765410968945e-06, + "loss": 62.9506, + "step": 203370 + }, + { + "epoch": 0.8216809350468857, + "grad_norm": 1137.858154296875, + "learning_rate": 4.089684302248103e-06, + "loss": 82.074, + "step": 203380 + }, + { + "epoch": 0.8217213363122533, + "grad_norm": 655.5372314453125, + "learning_rate": 4.087992373579109e-06, + "loss": 93.7762, + "step": 203390 + }, + { + "epoch": 0.8217617375776209, + "grad_norm": 774.5426025390625, + "learning_rate": 4.086300755122891e-06, + "loss": 76.9394, + "step": 203400 + }, + { + "epoch": 0.8218021388429886, + "grad_norm": 418.4342041015625, + "learning_rate": 4.084609446912438e-06, + "loss": 103.3534, + "step": 203410 + }, + { + "epoch": 0.8218425401083562, + "grad_norm": 393.9885559082031, + "learning_rate": 4.082918448980715e-06, + "loss": 47.7037, + "step": 203420 + }, + { + "epoch": 0.8218829413737239, + "grad_norm": 575.7022094726562, + "learning_rate": 4.081227761360697e-06, + "loss": 70.9595, + "step": 203430 + }, + { + "epoch": 0.8219233426390915, + "grad_norm": 277.14923095703125, + "learning_rate": 4.079537384085335e-06, + "loss": 53.1348, + "step": 203440 + }, + { + "epoch": 0.8219637439044591, + "grad_norm": 872.6604614257812, + "learning_rate": 4.077847317187589e-06, + "loss": 86.7438, + "step": 203450 + }, + { + "epoch": 0.8220041451698267, + "grad_norm": 429.4429626464844, + "learning_rate": 4.076157560700409e-06, + "loss": 66.6064, + "step": 203460 + }, + { + "epoch": 0.8220445464351943, + "grad_norm": 601.5440673828125, + "learning_rate": 4.0744681146567265e-06, + "loss": 58.2926, + "step": 203470 + }, + { + "epoch": 0.822084947700562, + "grad_norm": 418.5809020996094, + "learning_rate": 4.072778979089498e-06, + "loss": 82.9981, + "step": 203480 + }, + { + "epoch": 0.8221253489659296, + "grad_norm": 355.09478759765625, + "learning_rate": 4.071090154031634e-06, + "loss": 38.4025, + "step": 203490 + }, + { + "epoch": 0.8221657502312972, + "grad_norm": 319.6790466308594, + "learning_rate": 4.069401639516075e-06, + "loss": 44.534, + "step": 203500 + }, + { + "epoch": 0.8222061514966649, + "grad_norm": 928.7694091796875, + "learning_rate": 4.067713435575731e-06, + "loss": 55.6365, + "step": 203510 + }, + { + "epoch": 0.8222465527620325, + "grad_norm": 159.58648681640625, + "learning_rate": 4.066025542243515e-06, + "loss": 63.3258, + "step": 203520 + }, + { + "epoch": 0.8222869540274002, + "grad_norm": 1510.0184326171875, + "learning_rate": 4.064337959552338e-06, + "loss": 82.0264, + "step": 203530 + }, + { + "epoch": 0.8223273552927678, + "grad_norm": 397.55120849609375, + "learning_rate": 4.062650687535095e-06, + "loss": 80.4299, + "step": 203540 + }, + { + "epoch": 0.8223677565581354, + "grad_norm": 350.57647705078125, + "learning_rate": 4.0609637262246785e-06, + "loss": 40.692, + "step": 203550 + }, + { + "epoch": 0.8224081578235031, + "grad_norm": 393.658203125, + "learning_rate": 4.05927707565398e-06, + "loss": 81.0842, + "step": 203560 + }, + { + "epoch": 0.8224485590888707, + "grad_norm": 745.2801513671875, + "learning_rate": 4.057590735855885e-06, + "loss": 53.4222, + "step": 203570 + }, + { + "epoch": 0.8224889603542382, + "grad_norm": 456.558837890625, + "learning_rate": 4.055904706863263e-06, + "loss": 62.6553, + "step": 203580 + }, + { + "epoch": 0.8225293616196059, + "grad_norm": 339.56036376953125, + "learning_rate": 4.054218988708995e-06, + "loss": 61.2569, + "step": 203590 + }, + { + "epoch": 0.8225697628849735, + "grad_norm": 736.6005249023438, + "learning_rate": 4.05253358142593e-06, + "loss": 51.354, + "step": 203600 + }, + { + "epoch": 0.8226101641503412, + "grad_norm": 345.00323486328125, + "learning_rate": 4.050848485046934e-06, + "loss": 48.7259, + "step": 203610 + }, + { + "epoch": 0.8226505654157088, + "grad_norm": 657.6856689453125, + "learning_rate": 4.049163699604859e-06, + "loss": 78.7273, + "step": 203620 + }, + { + "epoch": 0.8226909666810764, + "grad_norm": 816.178466796875, + "learning_rate": 4.047479225132549e-06, + "loss": 75.5536, + "step": 203630 + }, + { + "epoch": 0.8227313679464441, + "grad_norm": 1030.3809814453125, + "learning_rate": 4.045795061662849e-06, + "loss": 58.8488, + "step": 203640 + }, + { + "epoch": 0.8227717692118117, + "grad_norm": 873.8626098632812, + "learning_rate": 4.044111209228578e-06, + "loss": 62.4727, + "step": 203650 + }, + { + "epoch": 0.8228121704771794, + "grad_norm": 1189.3658447265625, + "learning_rate": 4.0424276678625855e-06, + "loss": 64.8521, + "step": 203660 + }, + { + "epoch": 0.822852571742547, + "grad_norm": 975.9833374023438, + "learning_rate": 4.040744437597673e-06, + "loss": 83.2356, + "step": 203670 + }, + { + "epoch": 0.8228929730079146, + "grad_norm": 1040.594970703125, + "learning_rate": 4.039061518466665e-06, + "loss": 70.2418, + "step": 203680 + }, + { + "epoch": 0.8229333742732823, + "grad_norm": 820.2346801757812, + "learning_rate": 4.037378910502372e-06, + "loss": 62.0103, + "step": 203690 + }, + { + "epoch": 0.8229737755386499, + "grad_norm": 881.4517211914062, + "learning_rate": 4.035696613737594e-06, + "loss": 47.3766, + "step": 203700 + }, + { + "epoch": 0.8230141768040174, + "grad_norm": 979.148681640625, + "learning_rate": 4.034014628205134e-06, + "loss": 59.4124, + "step": 203710 + }, + { + "epoch": 0.8230545780693851, + "grad_norm": 574.5164794921875, + "learning_rate": 4.032332953937775e-06, + "loss": 80.5395, + "step": 203720 + }, + { + "epoch": 0.8230949793347527, + "grad_norm": 812.2247314453125, + "learning_rate": 4.0306515909683045e-06, + "loss": 75.1718, + "step": 203730 + }, + { + "epoch": 0.8231353806001204, + "grad_norm": 504.342529296875, + "learning_rate": 4.028970539329502e-06, + "loss": 57.1937, + "step": 203740 + }, + { + "epoch": 0.823175781865488, + "grad_norm": 907.7604370117188, + "learning_rate": 4.027289799054148e-06, + "loss": 100.8764, + "step": 203750 + }, + { + "epoch": 0.8232161831308556, + "grad_norm": 557.8759155273438, + "learning_rate": 4.025609370174992e-06, + "loss": 53.7404, + "step": 203760 + }, + { + "epoch": 0.8232565843962233, + "grad_norm": 695.177490234375, + "learning_rate": 4.023929252724814e-06, + "loss": 44.8597, + "step": 203770 + }, + { + "epoch": 0.8232969856615909, + "grad_norm": 627.762939453125, + "learning_rate": 4.022249446736357e-06, + "loss": 50.2508, + "step": 203780 + }, + { + "epoch": 0.8233373869269586, + "grad_norm": 1312.428466796875, + "learning_rate": 4.020569952242372e-06, + "loss": 62.5557, + "step": 203790 + }, + { + "epoch": 0.8233777881923262, + "grad_norm": 593.6773681640625, + "learning_rate": 4.01889076927561e-06, + "loss": 58.6091, + "step": 203800 + }, + { + "epoch": 0.8234181894576938, + "grad_norm": 906.6593017578125, + "learning_rate": 4.017211897868787e-06, + "loss": 61.9634, + "step": 203810 + }, + { + "epoch": 0.8234585907230615, + "grad_norm": 538.0030517578125, + "learning_rate": 4.015533338054658e-06, + "loss": 39.5136, + "step": 203820 + }, + { + "epoch": 0.8234989919884291, + "grad_norm": 571.141845703125, + "learning_rate": 4.013855089865933e-06, + "loss": 53.6486, + "step": 203830 + }, + { + "epoch": 0.8235393932537967, + "grad_norm": 615.5213012695312, + "learning_rate": 4.01217715333533e-06, + "loss": 56.7484, + "step": 203840 + }, + { + "epoch": 0.8235797945191643, + "grad_norm": 363.0811767578125, + "learning_rate": 4.010499528495566e-06, + "loss": 81.7641, + "step": 203850 + }, + { + "epoch": 0.8236201957845319, + "grad_norm": 750.9855346679688, + "learning_rate": 4.008822215379344e-06, + "loss": 64.7928, + "step": 203860 + }, + { + "epoch": 0.8236605970498996, + "grad_norm": 1120.28466796875, + "learning_rate": 4.0071452140193725e-06, + "loss": 67.1215, + "step": 203870 + }, + { + "epoch": 0.8237009983152672, + "grad_norm": 269.43341064453125, + "learning_rate": 4.005468524448333e-06, + "loss": 87.9866, + "step": 203880 + }, + { + "epoch": 0.8237413995806349, + "grad_norm": 1242.7177734375, + "learning_rate": 4.00379214669892e-06, + "loss": 68.1443, + "step": 203890 + }, + { + "epoch": 0.8237818008460025, + "grad_norm": 494.302978515625, + "learning_rate": 4.002116080803813e-06, + "loss": 47.2778, + "step": 203900 + }, + { + "epoch": 0.8238222021113701, + "grad_norm": 724.1763305664062, + "learning_rate": 4.000440326795696e-06, + "loss": 73.2355, + "step": 203910 + }, + { + "epoch": 0.8238626033767378, + "grad_norm": 846.3582763671875, + "learning_rate": 3.99876488470722e-06, + "loss": 46.0492, + "step": 203920 + }, + { + "epoch": 0.8239030046421054, + "grad_norm": 990.3980712890625, + "learning_rate": 3.997089754571071e-06, + "loss": 79.9652, + "step": 203930 + }, + { + "epoch": 0.8239434059074731, + "grad_norm": 538.9202880859375, + "learning_rate": 3.995414936419893e-06, + "loss": 46.9971, + "step": 203940 + }, + { + "epoch": 0.8239838071728407, + "grad_norm": 374.8364562988281, + "learning_rate": 3.993740430286339e-06, + "loss": 36.9022, + "step": 203950 + }, + { + "epoch": 0.8240242084382083, + "grad_norm": 414.5154113769531, + "learning_rate": 3.99206623620306e-06, + "loss": 78.5598, + "step": 203960 + }, + { + "epoch": 0.8240646097035759, + "grad_norm": 472.0915832519531, + "learning_rate": 3.990392354202683e-06, + "loss": 40.2922, + "step": 203970 + }, + { + "epoch": 0.8241050109689435, + "grad_norm": 765.1566772460938, + "learning_rate": 3.9887187843178576e-06, + "loss": 74.1222, + "step": 203980 + }, + { + "epoch": 0.8241454122343111, + "grad_norm": 1212.2232666015625, + "learning_rate": 3.987045526581199e-06, + "loss": 77.7429, + "step": 203990 + }, + { + "epoch": 0.8241858134996788, + "grad_norm": 452.0293884277344, + "learning_rate": 3.985372581025333e-06, + "loss": 74.0759, + "step": 204000 + }, + { + "epoch": 0.8242262147650464, + "grad_norm": 518.6043701171875, + "learning_rate": 3.983699947682871e-06, + "loss": 57.4858, + "step": 204010 + }, + { + "epoch": 0.8242666160304141, + "grad_norm": 568.9951171875, + "learning_rate": 3.982027626586424e-06, + "loss": 69.1579, + "step": 204020 + }, + { + "epoch": 0.8243070172957817, + "grad_norm": 1102.97119140625, + "learning_rate": 3.980355617768596e-06, + "loss": 71.3683, + "step": 204030 + }, + { + "epoch": 0.8243474185611493, + "grad_norm": 753.8814697265625, + "learning_rate": 3.97868392126199e-06, + "loss": 70.173, + "step": 204040 + }, + { + "epoch": 0.824387819826517, + "grad_norm": 705.6229248046875, + "learning_rate": 3.977012537099181e-06, + "loss": 53.3602, + "step": 204050 + }, + { + "epoch": 0.8244282210918846, + "grad_norm": 453.16632080078125, + "learning_rate": 3.975341465312763e-06, + "loss": 50.9337, + "step": 204060 + }, + { + "epoch": 0.8244686223572523, + "grad_norm": 939.870849609375, + "learning_rate": 3.97367070593532e-06, + "loss": 65.8549, + "step": 204070 + }, + { + "epoch": 0.8245090236226199, + "grad_norm": 1385.5714111328125, + "learning_rate": 3.972000258999404e-06, + "loss": 97.2303, + "step": 204080 + }, + { + "epoch": 0.8245494248879874, + "grad_norm": 711.6849365234375, + "learning_rate": 3.970330124537607e-06, + "loss": 60.5277, + "step": 204090 + }, + { + "epoch": 0.8245898261533551, + "grad_norm": 1279.734375, + "learning_rate": 3.968660302582466e-06, + "loss": 98.9602, + "step": 204100 + }, + { + "epoch": 0.8246302274187227, + "grad_norm": 972.3397827148438, + "learning_rate": 3.966990793166557e-06, + "loss": 56.4958, + "step": 204110 + }, + { + "epoch": 0.8246706286840904, + "grad_norm": 1436.2969970703125, + "learning_rate": 3.965321596322411e-06, + "loss": 72.8183, + "step": 204120 + }, + { + "epoch": 0.824711029949458, + "grad_norm": 329.451904296875, + "learning_rate": 3.963652712082575e-06, + "loss": 48.6122, + "step": 204130 + }, + { + "epoch": 0.8247514312148256, + "grad_norm": 244.27247619628906, + "learning_rate": 3.961984140479591e-06, + "loss": 31.4307, + "step": 204140 + }, + { + "epoch": 0.8247918324801933, + "grad_norm": 592.1140747070312, + "learning_rate": 3.960315881545973e-06, + "loss": 82.0254, + "step": 204150 + }, + { + "epoch": 0.8248322337455609, + "grad_norm": 556.4722290039062, + "learning_rate": 3.958647935314266e-06, + "loss": 57.5765, + "step": 204160 + }, + { + "epoch": 0.8248726350109286, + "grad_norm": 628.7799072265625, + "learning_rate": 3.956980301816971e-06, + "loss": 86.5817, + "step": 204170 + }, + { + "epoch": 0.8249130362762962, + "grad_norm": 883.5150756835938, + "learning_rate": 3.955312981086603e-06, + "loss": 73.3022, + "step": 204180 + }, + { + "epoch": 0.8249534375416638, + "grad_norm": 824.1716918945312, + "learning_rate": 3.953645973155669e-06, + "loss": 74.7838, + "step": 204190 + }, + { + "epoch": 0.8249938388070315, + "grad_norm": 464.9634704589844, + "learning_rate": 3.9519792780566725e-06, + "loss": 64.9454, + "step": 204200 + }, + { + "epoch": 0.8250342400723991, + "grad_norm": 1088.5291748046875, + "learning_rate": 3.9503128958221e-06, + "loss": 90.2049, + "step": 204210 + }, + { + "epoch": 0.8250746413377666, + "grad_norm": 632.5283813476562, + "learning_rate": 3.948646826484437e-06, + "loss": 65.5226, + "step": 204220 + }, + { + "epoch": 0.8251150426031343, + "grad_norm": 892.033447265625, + "learning_rate": 3.94698107007617e-06, + "loss": 42.6477, + "step": 204230 + }, + { + "epoch": 0.8251554438685019, + "grad_norm": 836.266357421875, + "learning_rate": 3.9453156266297706e-06, + "loss": 71.8309, + "step": 204240 + }, + { + "epoch": 0.8251958451338696, + "grad_norm": 999.6056518554688, + "learning_rate": 3.9436504961777135e-06, + "loss": 54.7349, + "step": 204250 + }, + { + "epoch": 0.8252362463992372, + "grad_norm": 447.24688720703125, + "learning_rate": 3.941985678752447e-06, + "loss": 92.9419, + "step": 204260 + }, + { + "epoch": 0.8252766476646048, + "grad_norm": 381.0337219238281, + "learning_rate": 3.940321174386448e-06, + "loss": 65.0392, + "step": 204270 + }, + { + "epoch": 0.8253170489299725, + "grad_norm": 894.3434448242188, + "learning_rate": 3.938656983112148e-06, + "loss": 78.7584, + "step": 204280 + }, + { + "epoch": 0.8253574501953401, + "grad_norm": 781.9259033203125, + "learning_rate": 3.936993104962002e-06, + "loss": 60.9049, + "step": 204290 + }, + { + "epoch": 0.8253978514607078, + "grad_norm": 643.867919921875, + "learning_rate": 3.935329539968449e-06, + "loss": 63.1497, + "step": 204300 + }, + { + "epoch": 0.8254382527260754, + "grad_norm": 715.306884765625, + "learning_rate": 3.933666288163907e-06, + "loss": 46.5962, + "step": 204310 + }, + { + "epoch": 0.825478653991443, + "grad_norm": 1718.504150390625, + "learning_rate": 3.9320033495808215e-06, + "loss": 103.3249, + "step": 204320 + }, + { + "epoch": 0.8255190552568107, + "grad_norm": 666.7586669921875, + "learning_rate": 3.930340724251598e-06, + "loss": 62.2959, + "step": 204330 + }, + { + "epoch": 0.8255594565221783, + "grad_norm": 449.8785705566406, + "learning_rate": 3.9286784122086575e-06, + "loss": 81.9351, + "step": 204340 + }, + { + "epoch": 0.8255998577875459, + "grad_norm": 501.755859375, + "learning_rate": 3.927016413484404e-06, + "loss": 72.333, + "step": 204350 + }, + { + "epoch": 0.8256402590529135, + "grad_norm": 751.1004638671875, + "learning_rate": 3.925354728111246e-06, + "loss": 60.4454, + "step": 204360 + }, + { + "epoch": 0.8256806603182811, + "grad_norm": 2201.372802734375, + "learning_rate": 3.9236933561215605e-06, + "loss": 74.5162, + "step": 204370 + }, + { + "epoch": 0.8257210615836488, + "grad_norm": 383.67767333984375, + "learning_rate": 3.922032297547762e-06, + "loss": 45.1116, + "step": 204380 + }, + { + "epoch": 0.8257614628490164, + "grad_norm": 458.2007141113281, + "learning_rate": 3.920371552422217e-06, + "loss": 129.2995, + "step": 204390 + }, + { + "epoch": 0.825801864114384, + "grad_norm": 398.5634460449219, + "learning_rate": 3.918711120777308e-06, + "loss": 77.2209, + "step": 204400 + }, + { + "epoch": 0.8258422653797517, + "grad_norm": 769.456298828125, + "learning_rate": 3.917051002645407e-06, + "loss": 87.9833, + "step": 204410 + }, + { + "epoch": 0.8258826666451193, + "grad_norm": 532.5527954101562, + "learning_rate": 3.915391198058869e-06, + "loss": 67.3597, + "step": 204420 + }, + { + "epoch": 0.825923067910487, + "grad_norm": 658.0646362304688, + "learning_rate": 3.913731707050068e-06, + "loss": 61.9221, + "step": 204430 + }, + { + "epoch": 0.8259634691758546, + "grad_norm": 348.19451904296875, + "learning_rate": 3.912072529651347e-06, + "loss": 46.0781, + "step": 204440 + }, + { + "epoch": 0.8260038704412223, + "grad_norm": 521.810302734375, + "learning_rate": 3.910413665895052e-06, + "loss": 39.5764, + "step": 204450 + }, + { + "epoch": 0.8260442717065899, + "grad_norm": 897.446044921875, + "learning_rate": 3.908755115813527e-06, + "loss": 79.2441, + "step": 204460 + }, + { + "epoch": 0.8260846729719575, + "grad_norm": 1157.9791259765625, + "learning_rate": 3.9070968794391074e-06, + "loss": 78.5607, + "step": 204470 + }, + { + "epoch": 0.8261250742373251, + "grad_norm": 563.425048828125, + "learning_rate": 3.905438956804122e-06, + "loss": 75.8558, + "step": 204480 + }, + { + "epoch": 0.8261654755026927, + "grad_norm": 559.4025268554688, + "learning_rate": 3.903781347940885e-06, + "loss": 42.2715, + "step": 204490 + }, + { + "epoch": 0.8262058767680603, + "grad_norm": 1075.1285400390625, + "learning_rate": 3.90212405288172e-06, + "loss": 81.7774, + "step": 204500 + }, + { + "epoch": 0.826246278033428, + "grad_norm": 349.31365966796875, + "learning_rate": 3.900467071658931e-06, + "loss": 54.4991, + "step": 204510 + }, + { + "epoch": 0.8262866792987956, + "grad_norm": 450.9018859863281, + "learning_rate": 3.8988104043048335e-06, + "loss": 64.1686, + "step": 204520 + }, + { + "epoch": 0.8263270805641633, + "grad_norm": 1111.8343505859375, + "learning_rate": 3.897154050851704e-06, + "loss": 71.3912, + "step": 204530 + }, + { + "epoch": 0.8263674818295309, + "grad_norm": 623.166015625, + "learning_rate": 3.895498011331857e-06, + "loss": 65.7385, + "step": 204540 + }, + { + "epoch": 0.8264078830948985, + "grad_norm": 826.7272338867188, + "learning_rate": 3.8938422857775625e-06, + "loss": 62.597, + "step": 204550 + }, + { + "epoch": 0.8264482843602662, + "grad_norm": 400.330810546875, + "learning_rate": 3.892186874221105e-06, + "loss": 48.3574, + "step": 204560 + }, + { + "epoch": 0.8264886856256338, + "grad_norm": 795.3296508789062, + "learning_rate": 3.890531776694764e-06, + "loss": 118.6493, + "step": 204570 + }, + { + "epoch": 0.8265290868910015, + "grad_norm": 891.739013671875, + "learning_rate": 3.888876993230786e-06, + "loss": 62.1963, + "step": 204580 + }, + { + "epoch": 0.8265694881563691, + "grad_norm": 1050.269775390625, + "learning_rate": 3.887222523861458e-06, + "loss": 82.9359, + "step": 204590 + }, + { + "epoch": 0.8266098894217367, + "grad_norm": 556.0502319335938, + "learning_rate": 3.885568368619013e-06, + "loss": 52.484, + "step": 204600 + }, + { + "epoch": 0.8266502906871043, + "grad_norm": 439.3572692871094, + "learning_rate": 3.883914527535717e-06, + "loss": 81.4961, + "step": 204610 + }, + { + "epoch": 0.8266906919524719, + "grad_norm": 327.6829528808594, + "learning_rate": 3.882261000643801e-06, + "loss": 43.9108, + "step": 204620 + }, + { + "epoch": 0.8267310932178396, + "grad_norm": 776.0484619140625, + "learning_rate": 3.8806077879755034e-06, + "loss": 55.6605, + "step": 204630 + }, + { + "epoch": 0.8267714944832072, + "grad_norm": 493.49017333984375, + "learning_rate": 3.87895488956306e-06, + "loss": 50.8813, + "step": 204640 + }, + { + "epoch": 0.8268118957485748, + "grad_norm": 1241.678955078125, + "learning_rate": 3.877302305438688e-06, + "loss": 50.7592, + "step": 204650 + }, + { + "epoch": 0.8268522970139425, + "grad_norm": 436.13128662109375, + "learning_rate": 3.875650035634606e-06, + "loss": 47.3074, + "step": 204660 + }, + { + "epoch": 0.8268926982793101, + "grad_norm": 607.0848388671875, + "learning_rate": 3.87399808018303e-06, + "loss": 62.6406, + "step": 204670 + }, + { + "epoch": 0.8269330995446778, + "grad_norm": 708.3833618164062, + "learning_rate": 3.8723464391161636e-06, + "loss": 42.5327, + "step": 204680 + }, + { + "epoch": 0.8269735008100454, + "grad_norm": 746.2989501953125, + "learning_rate": 3.870695112466205e-06, + "loss": 48.0605, + "step": 204690 + }, + { + "epoch": 0.827013902075413, + "grad_norm": 761.16259765625, + "learning_rate": 3.869044100265356e-06, + "loss": 85.8021, + "step": 204700 + }, + { + "epoch": 0.8270543033407807, + "grad_norm": 369.38641357421875, + "learning_rate": 3.867393402545785e-06, + "loss": 45.979, + "step": 204710 + }, + { + "epoch": 0.8270947046061483, + "grad_norm": 943.9013671875, + "learning_rate": 3.865743019339696e-06, + "loss": 54.1836, + "step": 204720 + }, + { + "epoch": 0.8271351058715158, + "grad_norm": 994.6936645507812, + "learning_rate": 3.864092950679248e-06, + "loss": 78.5662, + "step": 204730 + }, + { + "epoch": 0.8271755071368835, + "grad_norm": 1002.3038940429688, + "learning_rate": 3.8624431965966145e-06, + "loss": 76.6578, + "step": 204740 + }, + { + "epoch": 0.8272159084022511, + "grad_norm": 755.2556762695312, + "learning_rate": 3.860793757123966e-06, + "loss": 55.3265, + "step": 204750 + }, + { + "epoch": 0.8272563096676188, + "grad_norm": 1022.126220703125, + "learning_rate": 3.859144632293441e-06, + "loss": 69.8393, + "step": 204760 + }, + { + "epoch": 0.8272967109329864, + "grad_norm": 762.2265014648438, + "learning_rate": 3.857495822137212e-06, + "loss": 76.9408, + "step": 204770 + }, + { + "epoch": 0.827337112198354, + "grad_norm": 876.8093872070312, + "learning_rate": 3.855847326687407e-06, + "loss": 122.3534, + "step": 204780 + }, + { + "epoch": 0.8273775134637217, + "grad_norm": 643.7639770507812, + "learning_rate": 3.85419914597617e-06, + "loss": 62.4863, + "step": 204790 + }, + { + "epoch": 0.8274179147290893, + "grad_norm": 459.33978271484375, + "learning_rate": 3.852551280035633e-06, + "loss": 87.3506, + "step": 204800 + }, + { + "epoch": 0.827458315994457, + "grad_norm": 511.9626159667969, + "learning_rate": 3.850903728897928e-06, + "loss": 61.5661, + "step": 204810 + }, + { + "epoch": 0.8274987172598246, + "grad_norm": 671.6671752929688, + "learning_rate": 3.849256492595162e-06, + "loss": 54.622, + "step": 204820 + }, + { + "epoch": 0.8275391185251922, + "grad_norm": 742.3109741210938, + "learning_rate": 3.847609571159456e-06, + "loss": 60.2177, + "step": 204830 + }, + { + "epoch": 0.8275795197905599, + "grad_norm": 768.5794677734375, + "learning_rate": 3.84596296462292e-06, + "loss": 76.2251, + "step": 204840 + }, + { + "epoch": 0.8276199210559275, + "grad_norm": 460.94573974609375, + "learning_rate": 3.844316673017649e-06, + "loss": 94.431, + "step": 204850 + }, + { + "epoch": 0.827660322321295, + "grad_norm": 475.81396484375, + "learning_rate": 3.84267069637575e-06, + "loss": 93.0966, + "step": 204860 + }, + { + "epoch": 0.8277007235866627, + "grad_norm": 482.79937744140625, + "learning_rate": 3.841025034729293e-06, + "loss": 131.7757, + "step": 204870 + }, + { + "epoch": 0.8277411248520303, + "grad_norm": 547.8726196289062, + "learning_rate": 3.83937968811038e-06, + "loss": 33.6466, + "step": 204880 + }, + { + "epoch": 0.827781526117398, + "grad_norm": 468.816650390625, + "learning_rate": 3.837734656551078e-06, + "loss": 54.5125, + "step": 204890 + }, + { + "epoch": 0.8278219273827656, + "grad_norm": 1624.579345703125, + "learning_rate": 3.836089940083458e-06, + "loss": 109.9194, + "step": 204900 + }, + { + "epoch": 0.8278623286481332, + "grad_norm": 1351.5826416015625, + "learning_rate": 3.834445538739593e-06, + "loss": 50.6905, + "step": 204910 + }, + { + "epoch": 0.8279027299135009, + "grad_norm": 1161.532470703125, + "learning_rate": 3.832801452551524e-06, + "loss": 72.409, + "step": 204920 + }, + { + "epoch": 0.8279431311788685, + "grad_norm": 831.1387329101562, + "learning_rate": 3.831157681551325e-06, + "loss": 78.9654, + "step": 204930 + }, + { + "epoch": 0.8279835324442362, + "grad_norm": 594.3485717773438, + "learning_rate": 3.829514225771025e-06, + "loss": 67.1376, + "step": 204940 + }, + { + "epoch": 0.8280239337096038, + "grad_norm": 671.1150512695312, + "learning_rate": 3.827871085242669e-06, + "loss": 63.6794, + "step": 204950 + }, + { + "epoch": 0.8280643349749714, + "grad_norm": 652.6165161132812, + "learning_rate": 3.826228259998294e-06, + "loss": 78.6303, + "step": 204960 + }, + { + "epoch": 0.8281047362403391, + "grad_norm": 570.1580810546875, + "learning_rate": 3.824585750069931e-06, + "loss": 43.3808, + "step": 204970 + }, + { + "epoch": 0.8281451375057067, + "grad_norm": 742.7274780273438, + "learning_rate": 3.822943555489591e-06, + "loss": 40.8406, + "step": 204980 + }, + { + "epoch": 0.8281855387710743, + "grad_norm": 967.5043334960938, + "learning_rate": 3.8213016762892955e-06, + "loss": 73.1009, + "step": 204990 + }, + { + "epoch": 0.8282259400364419, + "grad_norm": 605.3741455078125, + "learning_rate": 3.819660112501053e-06, + "loss": 47.7953, + "step": 205000 + }, + { + "epoch": 0.8282663413018095, + "grad_norm": 338.8573303222656, + "learning_rate": 3.8180188641568675e-06, + "loss": 52.4129, + "step": 205010 + }, + { + "epoch": 0.8283067425671772, + "grad_norm": 774.953369140625, + "learning_rate": 3.816377931288739e-06, + "loss": 58.8931, + "step": 205020 + }, + { + "epoch": 0.8283471438325448, + "grad_norm": 454.5758361816406, + "learning_rate": 3.814737313928645e-06, + "loss": 34.4444, + "step": 205030 + }, + { + "epoch": 0.8283875450979125, + "grad_norm": 414.4952392578125, + "learning_rate": 3.8130970121085885e-06, + "loss": 58.2653, + "step": 205040 + }, + { + "epoch": 0.8284279463632801, + "grad_norm": 835.7203979492188, + "learning_rate": 3.8114570258605365e-06, + "loss": 90.9831, + "step": 205050 + }, + { + "epoch": 0.8284683476286477, + "grad_norm": 520.2459716796875, + "learning_rate": 3.809817355216463e-06, + "loss": 94.914, + "step": 205060 + }, + { + "epoch": 0.8285087488940154, + "grad_norm": 179.45571899414062, + "learning_rate": 3.808178000208336e-06, + "loss": 44.4982, + "step": 205070 + }, + { + "epoch": 0.828549150159383, + "grad_norm": 397.7594299316406, + "learning_rate": 3.8065389608681137e-06, + "loss": 82.3385, + "step": 205080 + }, + { + "epoch": 0.8285895514247507, + "grad_norm": 653.4888305664062, + "learning_rate": 3.8049002372277575e-06, + "loss": 70.0517, + "step": 205090 + }, + { + "epoch": 0.8286299526901183, + "grad_norm": 350.9671325683594, + "learning_rate": 3.8032618293192048e-06, + "loss": 50.377, + "step": 205100 + }, + { + "epoch": 0.8286703539554859, + "grad_norm": 346.6308288574219, + "learning_rate": 3.8016237371744005e-06, + "loss": 50.0464, + "step": 205110 + }, + { + "epoch": 0.8287107552208535, + "grad_norm": 481.1168212890625, + "learning_rate": 3.799985960825281e-06, + "loss": 58.4021, + "step": 205120 + }, + { + "epoch": 0.8287511564862211, + "grad_norm": 347.2828063964844, + "learning_rate": 3.7983485003037747e-06, + "loss": 61.6095, + "step": 205130 + }, + { + "epoch": 0.8287915577515887, + "grad_norm": 494.54150390625, + "learning_rate": 3.7967113556418043e-06, + "loss": 126.7375, + "step": 205140 + }, + { + "epoch": 0.8288319590169564, + "grad_norm": 300.3056640625, + "learning_rate": 3.7950745268712962e-06, + "loss": 41.7117, + "step": 205150 + }, + { + "epoch": 0.828872360282324, + "grad_norm": 662.928466796875, + "learning_rate": 3.793438014024147e-06, + "loss": 50.3526, + "step": 205160 + }, + { + "epoch": 0.8289127615476917, + "grad_norm": 776.7771606445312, + "learning_rate": 3.791801817132268e-06, + "loss": 69.4264, + "step": 205170 + }, + { + "epoch": 0.8289531628130593, + "grad_norm": 750.98193359375, + "learning_rate": 3.790165936227561e-06, + "loss": 78.9131, + "step": 205180 + }, + { + "epoch": 0.828993564078427, + "grad_norm": 732.28759765625, + "learning_rate": 3.788530371341903e-06, + "loss": 68.6626, + "step": 205190 + }, + { + "epoch": 0.8290339653437946, + "grad_norm": 644.3612060546875, + "learning_rate": 3.7868951225072037e-06, + "loss": 80.6455, + "step": 205200 + }, + { + "epoch": 0.8290743666091622, + "grad_norm": 708.3941040039062, + "learning_rate": 3.785260189755322e-06, + "loss": 74.334, + "step": 205210 + }, + { + "epoch": 0.8291147678745299, + "grad_norm": 438.8287353515625, + "learning_rate": 3.783625573118148e-06, + "loss": 51.6794, + "step": 205220 + }, + { + "epoch": 0.8291551691398975, + "grad_norm": 619.4467163085938, + "learning_rate": 3.781991272627539e-06, + "loss": 66.8069, + "step": 205230 + }, + { + "epoch": 0.8291955704052651, + "grad_norm": 636.0877685546875, + "learning_rate": 3.7803572883153596e-06, + "loss": 54.6417, + "step": 205240 + }, + { + "epoch": 0.8292359716706327, + "grad_norm": 715.3419189453125, + "learning_rate": 3.7787236202134714e-06, + "loss": 61.203, + "step": 205250 + }, + { + "epoch": 0.8292763729360003, + "grad_norm": 347.56890869140625, + "learning_rate": 3.7770902683537113e-06, + "loss": 71.1388, + "step": 205260 + }, + { + "epoch": 0.829316774201368, + "grad_norm": 1141.3382568359375, + "learning_rate": 3.7754572327679272e-06, + "loss": 98.8043, + "step": 205270 + }, + { + "epoch": 0.8293571754667356, + "grad_norm": 1044.984619140625, + "learning_rate": 3.7738245134879605e-06, + "loss": 124.0922, + "step": 205280 + }, + { + "epoch": 0.8293975767321032, + "grad_norm": 1197.4644775390625, + "learning_rate": 3.7721921105456362e-06, + "loss": 61.0714, + "step": 205290 + }, + { + "epoch": 0.8294379779974709, + "grad_norm": 518.3431396484375, + "learning_rate": 3.7705600239727825e-06, + "loss": 54.4297, + "step": 205300 + }, + { + "epoch": 0.8294783792628385, + "grad_norm": 672.818359375, + "learning_rate": 3.7689282538012207e-06, + "loss": 83.0008, + "step": 205310 + }, + { + "epoch": 0.8295187805282062, + "grad_norm": 471.1274719238281, + "learning_rate": 3.7672968000627543e-06, + "loss": 65.7165, + "step": 205320 + }, + { + "epoch": 0.8295591817935738, + "grad_norm": 1339.2938232421875, + "learning_rate": 3.765665662789193e-06, + "loss": 69.0904, + "step": 205330 + }, + { + "epoch": 0.8295995830589414, + "grad_norm": 730.4114379882812, + "learning_rate": 3.7640348420123386e-06, + "loss": 76.6059, + "step": 205340 + }, + { + "epoch": 0.8296399843243091, + "grad_norm": 488.1377868652344, + "learning_rate": 3.7624043377639828e-06, + "loss": 47.7173, + "step": 205350 + }, + { + "epoch": 0.8296803855896767, + "grad_norm": 572.6521606445312, + "learning_rate": 3.760774150075921e-06, + "loss": 45.0083, + "step": 205360 + }, + { + "epoch": 0.8297207868550442, + "grad_norm": 239.3827362060547, + "learning_rate": 3.759144278979916e-06, + "loss": 67.3381, + "step": 205370 + }, + { + "epoch": 0.8297611881204119, + "grad_norm": 933.4474487304688, + "learning_rate": 3.757514724507767e-06, + "loss": 61.5813, + "step": 205380 + }, + { + "epoch": 0.8298015893857795, + "grad_norm": 723.388427734375, + "learning_rate": 3.755885486691224e-06, + "loss": 65.7657, + "step": 205390 + }, + { + "epoch": 0.8298419906511472, + "grad_norm": 382.1856994628906, + "learning_rate": 3.7542565655620556e-06, + "loss": 46.5661, + "step": 205400 + }, + { + "epoch": 0.8298823919165148, + "grad_norm": 965.1820678710938, + "learning_rate": 3.75262796115202e-06, + "loss": 63.4158, + "step": 205410 + }, + { + "epoch": 0.8299227931818824, + "grad_norm": 747.7642211914062, + "learning_rate": 3.750999673492872e-06, + "loss": 80.3177, + "step": 205420 + }, + { + "epoch": 0.8299631944472501, + "grad_norm": 1038.1309814453125, + "learning_rate": 3.7493717026163466e-06, + "loss": 61.5329, + "step": 205430 + }, + { + "epoch": 0.8300035957126177, + "grad_norm": 1173.098388671875, + "learning_rate": 3.747744048554185e-06, + "loss": 52.2948, + "step": 205440 + }, + { + "epoch": 0.8300439969779854, + "grad_norm": 1063.2769775390625, + "learning_rate": 3.7461167113381213e-06, + "loss": 66.7191, + "step": 205450 + }, + { + "epoch": 0.830084398243353, + "grad_norm": 1257.1883544921875, + "learning_rate": 3.7444896909998817e-06, + "loss": 53.0945, + "step": 205460 + }, + { + "epoch": 0.8301247995087206, + "grad_norm": 775.4519653320312, + "learning_rate": 3.7428629875711876e-06, + "loss": 114.1547, + "step": 205470 + }, + { + "epoch": 0.8301652007740883, + "grad_norm": 873.256103515625, + "learning_rate": 3.7412366010837398e-06, + "loss": 78.5698, + "step": 205480 + }, + { + "epoch": 0.8302056020394559, + "grad_norm": 613.4527587890625, + "learning_rate": 3.7396105315692645e-06, + "loss": 50.2289, + "step": 205490 + }, + { + "epoch": 0.8302460033048235, + "grad_norm": 580.9442138671875, + "learning_rate": 3.7379847790594494e-06, + "loss": 37.0775, + "step": 205500 + }, + { + "epoch": 0.8302864045701911, + "grad_norm": 808.6475830078125, + "learning_rate": 3.7363593435859913e-06, + "loss": 56.0229, + "step": 205510 + }, + { + "epoch": 0.8303268058355587, + "grad_norm": 398.3319396972656, + "learning_rate": 3.7347342251805874e-06, + "loss": 51.8984, + "step": 205520 + }, + { + "epoch": 0.8303672071009264, + "grad_norm": 812.0173950195312, + "learning_rate": 3.733109423874903e-06, + "loss": 67.3749, + "step": 205530 + }, + { + "epoch": 0.830407608366294, + "grad_norm": 542.0194091796875, + "learning_rate": 3.731484939700636e-06, + "loss": 68.6937, + "step": 205540 + }, + { + "epoch": 0.8304480096316617, + "grad_norm": 689.1860961914062, + "learning_rate": 3.72986077268944e-06, + "loss": 57.5804, + "step": 205550 + }, + { + "epoch": 0.8304884108970293, + "grad_norm": 384.5655517578125, + "learning_rate": 3.7282369228729852e-06, + "loss": 72.9229, + "step": 205560 + }, + { + "epoch": 0.8305288121623969, + "grad_norm": 1008.1188354492188, + "learning_rate": 3.7266133902829294e-06, + "loss": 51.0397, + "step": 205570 + }, + { + "epoch": 0.8305692134277646, + "grad_norm": 1109.7452392578125, + "learning_rate": 3.724990174950929e-06, + "loss": 73.2767, + "step": 205580 + }, + { + "epoch": 0.8306096146931322, + "grad_norm": 814.7535400390625, + "learning_rate": 3.723367276908616e-06, + "loss": 57.0444, + "step": 205590 + }, + { + "epoch": 0.8306500159584999, + "grad_norm": 688.2266845703125, + "learning_rate": 3.7217446961876413e-06, + "loss": 66.2713, + "step": 205600 + }, + { + "epoch": 0.8306904172238675, + "grad_norm": 657.0770263671875, + "learning_rate": 3.720122432819633e-06, + "loss": 76.499, + "step": 205610 + }, + { + "epoch": 0.8307308184892351, + "grad_norm": 534.5582885742188, + "learning_rate": 3.7185004868362184e-06, + "loss": 44.0142, + "step": 205620 + }, + { + "epoch": 0.8307712197546027, + "grad_norm": 533.6033325195312, + "learning_rate": 3.716878858269024e-06, + "loss": 56.7096, + "step": 205630 + }, + { + "epoch": 0.8308116210199703, + "grad_norm": 734.3758544921875, + "learning_rate": 3.715257547149651e-06, + "loss": 64.5679, + "step": 205640 + }, + { + "epoch": 0.8308520222853379, + "grad_norm": 693.151123046875, + "learning_rate": 3.713636553509725e-06, + "loss": 62.1897, + "step": 205650 + }, + { + "epoch": 0.8308924235507056, + "grad_norm": 338.1446533203125, + "learning_rate": 3.7120158773808346e-06, + "loss": 44.4169, + "step": 205660 + }, + { + "epoch": 0.8309328248160732, + "grad_norm": 339.9268798828125, + "learning_rate": 3.7103955187945806e-06, + "loss": 47.9953, + "step": 205670 + }, + { + "epoch": 0.8309732260814409, + "grad_norm": 336.430908203125, + "learning_rate": 3.708775477782551e-06, + "loss": 63.2029, + "step": 205680 + }, + { + "epoch": 0.8310136273468085, + "grad_norm": 1453.341552734375, + "learning_rate": 3.7071557543763305e-06, + "loss": 118.6448, + "step": 205690 + }, + { + "epoch": 0.8310540286121761, + "grad_norm": 939.2996826171875, + "learning_rate": 3.7055363486075035e-06, + "loss": 84.3909, + "step": 205700 + }, + { + "epoch": 0.8310944298775438, + "grad_norm": 568.6019897460938, + "learning_rate": 3.70391726050763e-06, + "loss": 77.9086, + "step": 205710 + }, + { + "epoch": 0.8311348311429114, + "grad_norm": 535.1437377929688, + "learning_rate": 3.7022984901082757e-06, + "loss": 65.3204, + "step": 205720 + }, + { + "epoch": 0.8311752324082791, + "grad_norm": 1293.41796875, + "learning_rate": 3.7006800374410067e-06, + "loss": 71.436, + "step": 205730 + }, + { + "epoch": 0.8312156336736467, + "grad_norm": 634.4947509765625, + "learning_rate": 3.6990619025373708e-06, + "loss": 75.6619, + "step": 205740 + }, + { + "epoch": 0.8312560349390143, + "grad_norm": 709.7557983398438, + "learning_rate": 3.697444085428914e-06, + "loss": 78.1711, + "step": 205750 + }, + { + "epoch": 0.8312964362043819, + "grad_norm": 231.06875610351562, + "learning_rate": 3.695826586147184e-06, + "loss": 83.0764, + "step": 205760 + }, + { + "epoch": 0.8313368374697495, + "grad_norm": 572.2237548828125, + "learning_rate": 3.694209404723703e-06, + "loss": 76.2663, + "step": 205770 + }, + { + "epoch": 0.8313772387351172, + "grad_norm": 813.826416015625, + "learning_rate": 3.6925925411900055e-06, + "loss": 64.1273, + "step": 205780 + }, + { + "epoch": 0.8314176400004848, + "grad_norm": 1119.666748046875, + "learning_rate": 3.690975995577617e-06, + "loss": 88.1615, + "step": 205790 + }, + { + "epoch": 0.8314580412658524, + "grad_norm": 889.9226684570312, + "learning_rate": 3.6893597679180372e-06, + "loss": 113.3542, + "step": 205800 + }, + { + "epoch": 0.8314984425312201, + "grad_norm": 1082.51513671875, + "learning_rate": 3.687743858242796e-06, + "loss": 81.4625, + "step": 205810 + }, + { + "epoch": 0.8315388437965877, + "grad_norm": 849.8379516601562, + "learning_rate": 3.6861282665833753e-06, + "loss": 56.8804, + "step": 205820 + }, + { + "epoch": 0.8315792450619554, + "grad_norm": 479.277099609375, + "learning_rate": 3.6845129929712962e-06, + "loss": 47.55, + "step": 205830 + }, + { + "epoch": 0.831619646327323, + "grad_norm": 831.70361328125, + "learning_rate": 3.6828980374380273e-06, + "loss": 50.7108, + "step": 205840 + }, + { + "epoch": 0.8316600475926906, + "grad_norm": 1034.928955078125, + "learning_rate": 3.6812834000150655e-06, + "loss": 61.8897, + "step": 205850 + }, + { + "epoch": 0.8317004488580583, + "grad_norm": 759.6297607421875, + "learning_rate": 3.6796690807338873e-06, + "loss": 49.7315, + "step": 205860 + }, + { + "epoch": 0.8317408501234259, + "grad_norm": 1360.1875, + "learning_rate": 3.6780550796259597e-06, + "loss": 64.9887, + "step": 205870 + }, + { + "epoch": 0.8317812513887936, + "grad_norm": 987.2146606445312, + "learning_rate": 3.67644139672275e-06, + "loss": 86.234, + "step": 205880 + }, + { + "epoch": 0.8318216526541611, + "grad_norm": 671.8296508789062, + "learning_rate": 3.6748280320557195e-06, + "loss": 50.3949, + "step": 205890 + }, + { + "epoch": 0.8318620539195287, + "grad_norm": 2849.333984375, + "learning_rate": 3.6732149856563217e-06, + "loss": 128.2703, + "step": 205900 + }, + { + "epoch": 0.8319024551848964, + "grad_norm": 427.9268798828125, + "learning_rate": 3.671602257556002e-06, + "loss": 53.5723, + "step": 205910 + }, + { + "epoch": 0.831942856450264, + "grad_norm": 497.85076904296875, + "learning_rate": 3.6699898477862085e-06, + "loss": 55.2742, + "step": 205920 + }, + { + "epoch": 0.8319832577156316, + "grad_norm": 872.7532348632812, + "learning_rate": 3.668377756378367e-06, + "loss": 72.9755, + "step": 205930 + }, + { + "epoch": 0.8320236589809993, + "grad_norm": 448.9393310546875, + "learning_rate": 3.6667659833639067e-06, + "loss": 68.5742, + "step": 205940 + }, + { + "epoch": 0.8320640602463669, + "grad_norm": 611.2103881835938, + "learning_rate": 3.6651545287742528e-06, + "loss": 57.3888, + "step": 205950 + }, + { + "epoch": 0.8321044615117346, + "grad_norm": 539.62451171875, + "learning_rate": 3.663543392640823e-06, + "loss": 56.0222, + "step": 205960 + }, + { + "epoch": 0.8321448627771022, + "grad_norm": 931.1737060546875, + "learning_rate": 3.6619325749950285e-06, + "loss": 101.8406, + "step": 205970 + }, + { + "epoch": 0.8321852640424698, + "grad_norm": 753.3583984375, + "learning_rate": 3.6603220758682613e-06, + "loss": 75.9662, + "step": 205980 + }, + { + "epoch": 0.8322256653078375, + "grad_norm": 1061.3914794921875, + "learning_rate": 3.6587118952919353e-06, + "loss": 76.9191, + "step": 205990 + }, + { + "epoch": 0.8322660665732051, + "grad_norm": 661.611572265625, + "learning_rate": 3.6571020332974307e-06, + "loss": 102.9998, + "step": 206000 + }, + { + "epoch": 0.8323064678385727, + "grad_norm": 614.0242919921875, + "learning_rate": 3.655492489916135e-06, + "loss": 80.515, + "step": 206010 + }, + { + "epoch": 0.8323468691039403, + "grad_norm": 442.4114074707031, + "learning_rate": 3.65388326517943e-06, + "loss": 80.6294, + "step": 206020 + }, + { + "epoch": 0.8323872703693079, + "grad_norm": 666.2330932617188, + "learning_rate": 3.6522743591186884e-06, + "loss": 65.6263, + "step": 206030 + }, + { + "epoch": 0.8324276716346756, + "grad_norm": 450.52154541015625, + "learning_rate": 3.6506657717652715e-06, + "loss": 57.2056, + "step": 206040 + }, + { + "epoch": 0.8324680729000432, + "grad_norm": 628.9532470703125, + "learning_rate": 3.6490575031505415e-06, + "loss": 49.4109, + "step": 206050 + }, + { + "epoch": 0.8325084741654108, + "grad_norm": 632.1691284179688, + "learning_rate": 3.647449553305853e-06, + "loss": 64.8307, + "step": 206060 + }, + { + "epoch": 0.8325488754307785, + "grad_norm": 1057.015869140625, + "learning_rate": 3.645841922262556e-06, + "loss": 67.2966, + "step": 206070 + }, + { + "epoch": 0.8325892766961461, + "grad_norm": 615.6918334960938, + "learning_rate": 3.6442346100519953e-06, + "loss": 57.4216, + "step": 206080 + }, + { + "epoch": 0.8326296779615138, + "grad_norm": 495.1897277832031, + "learning_rate": 3.64262761670549e-06, + "loss": 67.0357, + "step": 206090 + }, + { + "epoch": 0.8326700792268814, + "grad_norm": 666.5139770507812, + "learning_rate": 3.641020942254392e-06, + "loss": 41.4153, + "step": 206100 + }, + { + "epoch": 0.832710480492249, + "grad_norm": 314.83514404296875, + "learning_rate": 3.6394145867300056e-06, + "loss": 60.906, + "step": 206110 + }, + { + "epoch": 0.8327508817576167, + "grad_norm": 964.3408203125, + "learning_rate": 3.6378085501636574e-06, + "loss": 80.0057, + "step": 206120 + }, + { + "epoch": 0.8327912830229843, + "grad_norm": 1189.7855224609375, + "learning_rate": 3.6362028325866594e-06, + "loss": 71.8842, + "step": 206130 + }, + { + "epoch": 0.8328316842883519, + "grad_norm": 446.4092102050781, + "learning_rate": 3.6345974340303027e-06, + "loss": 55.3721, + "step": 206140 + }, + { + "epoch": 0.8328720855537195, + "grad_norm": 1121.00244140625, + "learning_rate": 3.632992354525904e-06, + "loss": 74.6067, + "step": 206150 + }, + { + "epoch": 0.8329124868190871, + "grad_norm": 710.0554809570312, + "learning_rate": 3.6313875941047405e-06, + "loss": 37.5284, + "step": 206160 + }, + { + "epoch": 0.8329528880844548, + "grad_norm": 911.734375, + "learning_rate": 3.629783152798101e-06, + "loss": 70.7403, + "step": 206170 + }, + { + "epoch": 0.8329932893498224, + "grad_norm": 596.2750854492188, + "learning_rate": 3.628179030637269e-06, + "loss": 105.8906, + "step": 206180 + }, + { + "epoch": 0.8330336906151901, + "grad_norm": 1095.090576171875, + "learning_rate": 3.626575227653515e-06, + "loss": 83.8621, + "step": 206190 + }, + { + "epoch": 0.8330740918805577, + "grad_norm": 577.0567016601562, + "learning_rate": 3.624971743878112e-06, + "loss": 56.8284, + "step": 206200 + }, + { + "epoch": 0.8331144931459253, + "grad_norm": 622.8999633789062, + "learning_rate": 3.62336857934231e-06, + "loss": 57.4628, + "step": 206210 + }, + { + "epoch": 0.833154894411293, + "grad_norm": 544.2757568359375, + "learning_rate": 3.6217657340773695e-06, + "loss": 52.9365, + "step": 206220 + }, + { + "epoch": 0.8331952956766606, + "grad_norm": 691.8588256835938, + "learning_rate": 3.6201632081145375e-06, + "loss": 79.1491, + "step": 206230 + }, + { + "epoch": 0.8332356969420283, + "grad_norm": 1191.447021484375, + "learning_rate": 3.6185610014850613e-06, + "loss": 65.5795, + "step": 206240 + }, + { + "epoch": 0.8332760982073959, + "grad_norm": 752.5836181640625, + "learning_rate": 3.616959114220162e-06, + "loss": 88.5146, + "step": 206250 + }, + { + "epoch": 0.8333164994727635, + "grad_norm": 757.5531616210938, + "learning_rate": 3.61535754635109e-06, + "loss": 91.3429, + "step": 206260 + }, + { + "epoch": 0.8333569007381311, + "grad_norm": 756.7160034179688, + "learning_rate": 3.6137562979090546e-06, + "loss": 70.4678, + "step": 206270 + }, + { + "epoch": 0.8333973020034987, + "grad_norm": 575.6732788085938, + "learning_rate": 3.612155368925274e-06, + "loss": 75.0833, + "step": 206280 + }, + { + "epoch": 0.8334377032688663, + "grad_norm": 538.6513061523438, + "learning_rate": 3.6105547594309667e-06, + "loss": 54.8182, + "step": 206290 + }, + { + "epoch": 0.833478104534234, + "grad_norm": 397.3924865722656, + "learning_rate": 3.608954469457322e-06, + "loss": 66.7881, + "step": 206300 + }, + { + "epoch": 0.8335185057996016, + "grad_norm": 387.76092529296875, + "learning_rate": 3.6073544990355603e-06, + "loss": 31.5982, + "step": 206310 + }, + { + "epoch": 0.8335589070649693, + "grad_norm": 754.8309326171875, + "learning_rate": 3.6057548481968563e-06, + "loss": 86.5101, + "step": 206320 + }, + { + "epoch": 0.8335993083303369, + "grad_norm": 556.043212890625, + "learning_rate": 3.6041555169724007e-06, + "loss": 63.3544, + "step": 206330 + }, + { + "epoch": 0.8336397095957045, + "grad_norm": 395.0321044921875, + "learning_rate": 3.6025565053933754e-06, + "loss": 53.4466, + "step": 206340 + }, + { + "epoch": 0.8336801108610722, + "grad_norm": 396.8856506347656, + "learning_rate": 3.6009578134909527e-06, + "loss": 50.2746, + "step": 206350 + }, + { + "epoch": 0.8337205121264398, + "grad_norm": 1350.7470703125, + "learning_rate": 3.599359441296302e-06, + "loss": 75.3368, + "step": 206360 + }, + { + "epoch": 0.8337609133918075, + "grad_norm": 818.5650024414062, + "learning_rate": 3.597761388840586e-06, + "loss": 54.8554, + "step": 206370 + }, + { + "epoch": 0.8338013146571751, + "grad_norm": 581.5230712890625, + "learning_rate": 3.5961636561549497e-06, + "loss": 62.685, + "step": 206380 + }, + { + "epoch": 0.8338417159225427, + "grad_norm": 537.6826782226562, + "learning_rate": 3.59456624327055e-06, + "loss": 73.7453, + "step": 206390 + }, + { + "epoch": 0.8338821171879103, + "grad_norm": 965.7396850585938, + "learning_rate": 3.5929691502185327e-06, + "loss": 59.4426, + "step": 206400 + }, + { + "epoch": 0.8339225184532779, + "grad_norm": 451.5664978027344, + "learning_rate": 3.591372377030018e-06, + "loss": 51.6717, + "step": 206410 + }, + { + "epoch": 0.8339629197186456, + "grad_norm": 576.7664184570312, + "learning_rate": 3.5897759237361563e-06, + "loss": 69.4534, + "step": 206420 + }, + { + "epoch": 0.8340033209840132, + "grad_norm": 785.3343505859375, + "learning_rate": 3.5881797903680515e-06, + "loss": 72.2123, + "step": 206430 + }, + { + "epoch": 0.8340437222493808, + "grad_norm": 570.1082153320312, + "learning_rate": 3.586583976956843e-06, + "loss": 76.0437, + "step": 206440 + }, + { + "epoch": 0.8340841235147485, + "grad_norm": 362.0150451660156, + "learning_rate": 3.5849884835336225e-06, + "loss": 69.2545, + "step": 206450 + }, + { + "epoch": 0.8341245247801161, + "grad_norm": 899.4418334960938, + "learning_rate": 3.5833933101295037e-06, + "loss": 77.8516, + "step": 206460 + }, + { + "epoch": 0.8341649260454838, + "grad_norm": 1051.14990234375, + "learning_rate": 3.581798456775589e-06, + "loss": 69.2076, + "step": 206470 + }, + { + "epoch": 0.8342053273108514, + "grad_norm": 1344.1231689453125, + "learning_rate": 3.580203923502963e-06, + "loss": 73.8734, + "step": 206480 + }, + { + "epoch": 0.834245728576219, + "grad_norm": 473.0298767089844, + "learning_rate": 3.578609710342713e-06, + "loss": 56.0464, + "step": 206490 + }, + { + "epoch": 0.8342861298415867, + "grad_norm": 704.240966796875, + "learning_rate": 3.5770158173259195e-06, + "loss": 78.4156, + "step": 206500 + }, + { + "epoch": 0.8343265311069543, + "grad_norm": 597.7078857421875, + "learning_rate": 3.5754222444836595e-06, + "loss": 66.9745, + "step": 206510 + }, + { + "epoch": 0.8343669323723218, + "grad_norm": 919.501708984375, + "learning_rate": 3.5738289918469993e-06, + "loss": 38.6486, + "step": 206520 + }, + { + "epoch": 0.8344073336376895, + "grad_norm": 799.5490112304688, + "learning_rate": 3.572236059447005e-06, + "loss": 65.84, + "step": 206530 + }, + { + "epoch": 0.8344477349030571, + "grad_norm": 354.2010192871094, + "learning_rate": 3.570643447314719e-06, + "loss": 53.8969, + "step": 206540 + }, + { + "epoch": 0.8344881361684248, + "grad_norm": 462.3268737792969, + "learning_rate": 3.5690511554812e-06, + "loss": 46.9538, + "step": 206550 + }, + { + "epoch": 0.8345285374337924, + "grad_norm": 740.3003540039062, + "learning_rate": 3.567459183977491e-06, + "loss": 77.2031, + "step": 206560 + }, + { + "epoch": 0.83456893869916, + "grad_norm": 569.7315673828125, + "learning_rate": 3.565867532834617e-06, + "loss": 59.827, + "step": 206570 + }, + { + "epoch": 0.8346093399645277, + "grad_norm": 825.3013916015625, + "learning_rate": 3.564276202083625e-06, + "loss": 61.7715, + "step": 206580 + }, + { + "epoch": 0.8346497412298953, + "grad_norm": 657.6808471679688, + "learning_rate": 3.5626851917555195e-06, + "loss": 60.3794, + "step": 206590 + }, + { + "epoch": 0.834690142495263, + "grad_norm": 735.840576171875, + "learning_rate": 3.561094501881339e-06, + "loss": 71.1363, + "step": 206600 + }, + { + "epoch": 0.8347305437606306, + "grad_norm": 873.139404296875, + "learning_rate": 3.5595041324920822e-06, + "loss": 73.8286, + "step": 206610 + }, + { + "epoch": 0.8347709450259982, + "grad_norm": 731.7775268554688, + "learning_rate": 3.5579140836187544e-06, + "loss": 78.8304, + "step": 206620 + }, + { + "epoch": 0.8348113462913659, + "grad_norm": 1141.6910400390625, + "learning_rate": 3.55632435529236e-06, + "loss": 90.5889, + "step": 206630 + }, + { + "epoch": 0.8348517475567335, + "grad_norm": 169.7196807861328, + "learning_rate": 3.5547349475438852e-06, + "loss": 47.4354, + "step": 206640 + }, + { + "epoch": 0.834892148822101, + "grad_norm": 1395.6865234375, + "learning_rate": 3.553145860404319e-06, + "loss": 85.078, + "step": 206650 + }, + { + "epoch": 0.8349325500874687, + "grad_norm": 723.7252807617188, + "learning_rate": 3.5515570939046406e-06, + "loss": 55.9318, + "step": 206660 + }, + { + "epoch": 0.8349729513528363, + "grad_norm": 271.974609375, + "learning_rate": 3.5499686480758256e-06, + "loss": 46.7281, + "step": 206670 + }, + { + "epoch": 0.835013352618204, + "grad_norm": 474.4967346191406, + "learning_rate": 3.54838052294884e-06, + "loss": 48.754, + "step": 206680 + }, + { + "epoch": 0.8350537538835716, + "grad_norm": 530.2744140625, + "learning_rate": 3.5467927185546526e-06, + "loss": 73.4227, + "step": 206690 + }, + { + "epoch": 0.8350941551489393, + "grad_norm": 410.1844787597656, + "learning_rate": 3.5452052349242007e-06, + "loss": 50.7103, + "step": 206700 + }, + { + "epoch": 0.8351345564143069, + "grad_norm": 1368.9107666015625, + "learning_rate": 3.543618072088455e-06, + "loss": 65.0583, + "step": 206710 + }, + { + "epoch": 0.8351749576796745, + "grad_norm": 791.0393676757812, + "learning_rate": 3.54203123007834e-06, + "loss": 71.3154, + "step": 206720 + }, + { + "epoch": 0.8352153589450422, + "grad_norm": 878.8546752929688, + "learning_rate": 3.5404447089248016e-06, + "loss": 77.6026, + "step": 206730 + }, + { + "epoch": 0.8352557602104098, + "grad_norm": 509.93646240234375, + "learning_rate": 3.5388585086587714e-06, + "loss": 70.7454, + "step": 206740 + }, + { + "epoch": 0.8352961614757775, + "grad_norm": 707.8140869140625, + "learning_rate": 3.5372726293111615e-06, + "loss": 54.0695, + "step": 206750 + }, + { + "epoch": 0.8353365627411451, + "grad_norm": 612.4566040039062, + "learning_rate": 3.5356870709129056e-06, + "loss": 64.851, + "step": 206760 + }, + { + "epoch": 0.8353769640065127, + "grad_norm": 855.1297607421875, + "learning_rate": 3.5341018334949005e-06, + "loss": 81.118, + "step": 206770 + }, + { + "epoch": 0.8354173652718803, + "grad_norm": 721.7530517578125, + "learning_rate": 3.5325169170880604e-06, + "loss": 87.7254, + "step": 206780 + }, + { + "epoch": 0.8354577665372479, + "grad_norm": 542.6119384765625, + "learning_rate": 3.5309323217232795e-06, + "loss": 73.5958, + "step": 206790 + }, + { + "epoch": 0.8354981678026155, + "grad_norm": 533.9613647460938, + "learning_rate": 3.529348047431451e-06, + "loss": 66.3965, + "step": 206800 + }, + { + "epoch": 0.8355385690679832, + "grad_norm": 517.8533935546875, + "learning_rate": 3.5277640942434664e-06, + "loss": 53.5081, + "step": 206810 + }, + { + "epoch": 0.8355789703333508, + "grad_norm": 656.4945068359375, + "learning_rate": 3.5261804621901986e-06, + "loss": 51.912, + "step": 206820 + }, + { + "epoch": 0.8356193715987185, + "grad_norm": 732.1359252929688, + "learning_rate": 3.5245971513025225e-06, + "loss": 65.9672, + "step": 206830 + }, + { + "epoch": 0.8356597728640861, + "grad_norm": 478.84161376953125, + "learning_rate": 3.5230141616113066e-06, + "loss": 108.8698, + "step": 206840 + }, + { + "epoch": 0.8357001741294537, + "grad_norm": 605.939208984375, + "learning_rate": 3.521431493147418e-06, + "loss": 50.4907, + "step": 206850 + }, + { + "epoch": 0.8357405753948214, + "grad_norm": 624.0269165039062, + "learning_rate": 3.5198491459416982e-06, + "loss": 57.9866, + "step": 206860 + }, + { + "epoch": 0.835780976660189, + "grad_norm": 725.5579833984375, + "learning_rate": 3.5182671200250116e-06, + "loss": 64.5091, + "step": 206870 + }, + { + "epoch": 0.8358213779255567, + "grad_norm": 416.8826599121094, + "learning_rate": 3.5166854154281894e-06, + "loss": 43.8291, + "step": 206880 + }, + { + "epoch": 0.8358617791909243, + "grad_norm": 737.301513671875, + "learning_rate": 3.5151040321820683e-06, + "loss": 58.848, + "step": 206890 + }, + { + "epoch": 0.8359021804562919, + "grad_norm": 671.8460083007812, + "learning_rate": 3.5135229703174887e-06, + "loss": 61.5925, + "step": 206900 + }, + { + "epoch": 0.8359425817216595, + "grad_norm": 1051.4453125, + "learning_rate": 3.511942229865255e-06, + "loss": 62.3561, + "step": 206910 + }, + { + "epoch": 0.8359829829870271, + "grad_norm": 898.0015869140625, + "learning_rate": 3.510361810856209e-06, + "loss": 80.796, + "step": 206920 + }, + { + "epoch": 0.8360233842523948, + "grad_norm": 755.3038940429688, + "learning_rate": 3.508781713321141e-06, + "loss": 75.5909, + "step": 206930 + }, + { + "epoch": 0.8360637855177624, + "grad_norm": 552.0639038085938, + "learning_rate": 3.507201937290865e-06, + "loss": 37.3303, + "step": 206940 + }, + { + "epoch": 0.83610418678313, + "grad_norm": 299.9013671875, + "learning_rate": 3.5056224827961783e-06, + "loss": 77.7777, + "step": 206950 + }, + { + "epoch": 0.8361445880484977, + "grad_norm": 532.4324340820312, + "learning_rate": 3.5040433498678715e-06, + "loss": 68.927, + "step": 206960 + }, + { + "epoch": 0.8361849893138653, + "grad_norm": 444.0167541503906, + "learning_rate": 3.5024645385367405e-06, + "loss": 50.9656, + "step": 206970 + }, + { + "epoch": 0.836225390579233, + "grad_norm": 759.0509033203125, + "learning_rate": 3.5008860488335495e-06, + "loss": 77.7239, + "step": 206980 + }, + { + "epoch": 0.8362657918446006, + "grad_norm": 407.7332458496094, + "learning_rate": 3.49930788078908e-06, + "loss": 83.4969, + "step": 206990 + }, + { + "epoch": 0.8363061931099682, + "grad_norm": 477.5789489746094, + "learning_rate": 3.497730034434099e-06, + "loss": 52.3867, + "step": 207000 + }, + { + "epoch": 0.8363465943753359, + "grad_norm": 579.8424072265625, + "learning_rate": 3.496152509799371e-06, + "loss": 82.1228, + "step": 207010 + }, + { + "epoch": 0.8363869956407035, + "grad_norm": 499.09771728515625, + "learning_rate": 3.4945753069156354e-06, + "loss": 73.6266, + "step": 207020 + }, + { + "epoch": 0.8364273969060712, + "grad_norm": 1120.424560546875, + "learning_rate": 3.4929984258136653e-06, + "loss": 98.4846, + "step": 207030 + }, + { + "epoch": 0.8364677981714387, + "grad_norm": 449.077392578125, + "learning_rate": 3.4914218665241762e-06, + "loss": 53.1594, + "step": 207040 + }, + { + "epoch": 0.8365081994368063, + "grad_norm": 710.2581176757812, + "learning_rate": 3.489845629077928e-06, + "loss": 45.2376, + "step": 207050 + }, + { + "epoch": 0.836548600702174, + "grad_norm": 647.9352416992188, + "learning_rate": 3.4882697135056345e-06, + "loss": 81.4907, + "step": 207060 + }, + { + "epoch": 0.8365890019675416, + "grad_norm": 586.7609252929688, + "learning_rate": 3.4866941198380234e-06, + "loss": 69.6997, + "step": 207070 + }, + { + "epoch": 0.8366294032329092, + "grad_norm": 565.03515625, + "learning_rate": 3.4851188481058174e-06, + "loss": 59.8026, + "step": 207080 + }, + { + "epoch": 0.8366698044982769, + "grad_norm": 392.1166076660156, + "learning_rate": 3.483543898339716e-06, + "loss": 64.0419, + "step": 207090 + }, + { + "epoch": 0.8367102057636445, + "grad_norm": 463.5969543457031, + "learning_rate": 3.481969270570429e-06, + "loss": 62.5362, + "step": 207100 + }, + { + "epoch": 0.8367506070290122, + "grad_norm": 1151.1402587890625, + "learning_rate": 3.480394964828657e-06, + "loss": 51.3491, + "step": 207110 + }, + { + "epoch": 0.8367910082943798, + "grad_norm": 272.35321044921875, + "learning_rate": 3.47882098114509e-06, + "loss": 48.4927, + "step": 207120 + }, + { + "epoch": 0.8368314095597474, + "grad_norm": 551.7793579101562, + "learning_rate": 3.477247319550412e-06, + "loss": 68.8062, + "step": 207130 + }, + { + "epoch": 0.8368718108251151, + "grad_norm": 516.769287109375, + "learning_rate": 3.4756739800753115e-06, + "loss": 74.3483, + "step": 207140 + }, + { + "epoch": 0.8369122120904827, + "grad_norm": 878.7603759765625, + "learning_rate": 3.4741009627504464e-06, + "loss": 77.3181, + "step": 207150 + }, + { + "epoch": 0.8369526133558503, + "grad_norm": 618.9376831054688, + "learning_rate": 3.472528267606492e-06, + "loss": 72.2822, + "step": 207160 + }, + { + "epoch": 0.8369930146212179, + "grad_norm": 670.5582885742188, + "learning_rate": 3.4709558946741084e-06, + "loss": 55.5917, + "step": 207170 + }, + { + "epoch": 0.8370334158865855, + "grad_norm": 1085.524169921875, + "learning_rate": 3.4693838439839488e-06, + "loss": 45.8579, + "step": 207180 + }, + { + "epoch": 0.8370738171519532, + "grad_norm": 804.9151611328125, + "learning_rate": 3.467812115566667e-06, + "loss": 60.6111, + "step": 207190 + }, + { + "epoch": 0.8371142184173208, + "grad_norm": 646.5005493164062, + "learning_rate": 3.4662407094528904e-06, + "loss": 60.5895, + "step": 207200 + }, + { + "epoch": 0.8371546196826885, + "grad_norm": 468.0220642089844, + "learning_rate": 3.4646696256732716e-06, + "loss": 56.977, + "step": 207210 + }, + { + "epoch": 0.8371950209480561, + "grad_norm": 1369.2379150390625, + "learning_rate": 3.4630988642584273e-06, + "loss": 89.2603, + "step": 207220 + }, + { + "epoch": 0.8372354222134237, + "grad_norm": 924.2322998046875, + "learning_rate": 3.4615284252389847e-06, + "loss": 93.8177, + "step": 207230 + }, + { + "epoch": 0.8372758234787914, + "grad_norm": 661.7150268554688, + "learning_rate": 3.4599583086455635e-06, + "loss": 59.296, + "step": 207240 + }, + { + "epoch": 0.837316224744159, + "grad_norm": 560.3021240234375, + "learning_rate": 3.4583885145087613e-06, + "loss": 66.9729, + "step": 207250 + }, + { + "epoch": 0.8373566260095267, + "grad_norm": 992.9057006835938, + "learning_rate": 3.456819042859203e-06, + "loss": 67.2869, + "step": 207260 + }, + { + "epoch": 0.8373970272748943, + "grad_norm": 1030.796630859375, + "learning_rate": 3.4552498937274658e-06, + "loss": 94.0989, + "step": 207270 + }, + { + "epoch": 0.8374374285402619, + "grad_norm": 708.08251953125, + "learning_rate": 3.4536810671441524e-06, + "loss": 74.9758, + "step": 207280 + }, + { + "epoch": 0.8374778298056295, + "grad_norm": 2371.642578125, + "learning_rate": 3.4521125631398446e-06, + "loss": 74.2902, + "step": 207290 + }, + { + "epoch": 0.8375182310709971, + "grad_norm": 1295.576416015625, + "learning_rate": 3.4505443817451266e-06, + "loss": 84.8724, + "step": 207300 + }, + { + "epoch": 0.8375586323363647, + "grad_norm": 913.6698608398438, + "learning_rate": 3.4489765229905615e-06, + "loss": 80.9072, + "step": 207310 + }, + { + "epoch": 0.8375990336017324, + "grad_norm": 487.251953125, + "learning_rate": 3.4474089869067196e-06, + "loss": 47.48, + "step": 207320 + }, + { + "epoch": 0.8376394348671, + "grad_norm": 291.40777587890625, + "learning_rate": 3.4458417735241634e-06, + "loss": 53.4211, + "step": 207330 + }, + { + "epoch": 0.8376798361324677, + "grad_norm": 1523.2333984375, + "learning_rate": 3.4442748828734417e-06, + "loss": 68.6639, + "step": 207340 + }, + { + "epoch": 0.8377202373978353, + "grad_norm": 691.394287109375, + "learning_rate": 3.4427083149851103e-06, + "loss": 59.2945, + "step": 207350 + }, + { + "epoch": 0.8377606386632029, + "grad_norm": 1728.5185546875, + "learning_rate": 3.441142069889696e-06, + "loss": 71.7049, + "step": 207360 + }, + { + "epoch": 0.8378010399285706, + "grad_norm": 456.6267395019531, + "learning_rate": 3.4395761476177513e-06, + "loss": 62.144, + "step": 207370 + }, + { + "epoch": 0.8378414411939382, + "grad_norm": 1174.771484375, + "learning_rate": 3.4380105481997906e-06, + "loss": 54.6145, + "step": 207380 + }, + { + "epoch": 0.8378818424593059, + "grad_norm": 1359.4735107421875, + "learning_rate": 3.43644527166634e-06, + "loss": 49.1379, + "step": 207390 + }, + { + "epoch": 0.8379222437246735, + "grad_norm": 541.1575317382812, + "learning_rate": 3.4348803180479174e-06, + "loss": 42.6462, + "step": 207400 + }, + { + "epoch": 0.8379626449900411, + "grad_norm": 660.9223022460938, + "learning_rate": 3.4333156873750294e-06, + "loss": 143.7099, + "step": 207410 + }, + { + "epoch": 0.8380030462554087, + "grad_norm": 676.4644775390625, + "learning_rate": 3.4317513796781878e-06, + "loss": 48.5901, + "step": 207420 + }, + { + "epoch": 0.8380434475207763, + "grad_norm": 381.83392333984375, + "learning_rate": 3.4301873949878784e-06, + "loss": 75.7044, + "step": 207430 + }, + { + "epoch": 0.838083848786144, + "grad_norm": 848.6807250976562, + "learning_rate": 3.428623733334595e-06, + "loss": 75.1713, + "step": 207440 + }, + { + "epoch": 0.8381242500515116, + "grad_norm": 446.1241149902344, + "learning_rate": 3.4270603947488244e-06, + "loss": 98.5851, + "step": 207450 + }, + { + "epoch": 0.8381646513168792, + "grad_norm": 804.875732421875, + "learning_rate": 3.425497379261049e-06, + "loss": 72.4386, + "step": 207460 + }, + { + "epoch": 0.8382050525822469, + "grad_norm": 803.0800170898438, + "learning_rate": 3.4239346869017265e-06, + "loss": 66.1259, + "step": 207470 + }, + { + "epoch": 0.8382454538476145, + "grad_norm": 793.05029296875, + "learning_rate": 3.422372317701339e-06, + "loss": 102.4979, + "step": 207480 + }, + { + "epoch": 0.8382858551129821, + "grad_norm": 540.7957763671875, + "learning_rate": 3.4208102716903336e-06, + "loss": 44.0655, + "step": 207490 + }, + { + "epoch": 0.8383262563783498, + "grad_norm": 470.6645202636719, + "learning_rate": 3.419248548899168e-06, + "loss": 70.0691, + "step": 207500 + }, + { + "epoch": 0.8383666576437174, + "grad_norm": 338.07513427734375, + "learning_rate": 3.417687149358291e-06, + "loss": 82.9096, + "step": 207510 + }, + { + "epoch": 0.8384070589090851, + "grad_norm": 624.0934448242188, + "learning_rate": 3.4161260730981315e-06, + "loss": 42.734, + "step": 207520 + }, + { + "epoch": 0.8384474601744527, + "grad_norm": 393.3742370605469, + "learning_rate": 3.4145653201491437e-06, + "loss": 54.4049, + "step": 207530 + }, + { + "epoch": 0.8384878614398203, + "grad_norm": 604.0125732421875, + "learning_rate": 3.413004890541738e-06, + "loss": 78.9069, + "step": 207540 + }, + { + "epoch": 0.8385282627051879, + "grad_norm": 919.8623046875, + "learning_rate": 3.4114447843063412e-06, + "loss": 83.576, + "step": 207550 + }, + { + "epoch": 0.8385686639705555, + "grad_norm": 541.9603881835938, + "learning_rate": 3.4098850014733676e-06, + "loss": 67.5839, + "step": 207560 + }, + { + "epoch": 0.8386090652359232, + "grad_norm": 725.0718994140625, + "learning_rate": 3.4083255420732274e-06, + "loss": 36.5268, + "step": 207570 + }, + { + "epoch": 0.8386494665012908, + "grad_norm": 730.4462890625, + "learning_rate": 3.4067664061363304e-06, + "loss": 57.6753, + "step": 207580 + }, + { + "epoch": 0.8386898677666584, + "grad_norm": 749.0494995117188, + "learning_rate": 3.4052075936930563e-06, + "loss": 59.712, + "step": 207590 + }, + { + "epoch": 0.8387302690320261, + "grad_norm": 1004.422119140625, + "learning_rate": 3.4036491047738075e-06, + "loss": 74.9926, + "step": 207600 + }, + { + "epoch": 0.8387706702973937, + "grad_norm": 721.5634765625, + "learning_rate": 3.4020909394089618e-06, + "loss": 87.5822, + "step": 207610 + }, + { + "epoch": 0.8388110715627614, + "grad_norm": 554.6776123046875, + "learning_rate": 3.400533097628902e-06, + "loss": 87.4478, + "step": 207620 + }, + { + "epoch": 0.838851472828129, + "grad_norm": 577.367919921875, + "learning_rate": 3.3989755794639877e-06, + "loss": 71.7075, + "step": 207630 + }, + { + "epoch": 0.8388918740934966, + "grad_norm": 669.9638061523438, + "learning_rate": 3.3974183849445998e-06, + "loss": 50.4924, + "step": 207640 + }, + { + "epoch": 0.8389322753588643, + "grad_norm": 826.5859375, + "learning_rate": 3.3958615141010844e-06, + "loss": 120.5171, + "step": 207650 + }, + { + "epoch": 0.8389726766242319, + "grad_norm": 830.2062377929688, + "learning_rate": 3.394304966963795e-06, + "loss": 84.7499, + "step": 207660 + }, + { + "epoch": 0.8390130778895996, + "grad_norm": 863.4185180664062, + "learning_rate": 3.3927487435630813e-06, + "loss": 59.3707, + "step": 207670 + }, + { + "epoch": 0.8390534791549671, + "grad_norm": 1992.4669189453125, + "learning_rate": 3.391192843929281e-06, + "loss": 77.5359, + "step": 207680 + }, + { + "epoch": 0.8390938804203347, + "grad_norm": 414.238037109375, + "learning_rate": 3.3896372680927313e-06, + "loss": 59.671, + "step": 207690 + }, + { + "epoch": 0.8391342816857024, + "grad_norm": 736.580322265625, + "learning_rate": 3.3880820160837447e-06, + "loss": 53.5795, + "step": 207700 + }, + { + "epoch": 0.83917468295107, + "grad_norm": 500.7801208496094, + "learning_rate": 3.3865270879326627e-06, + "loss": 76.5419, + "step": 207710 + }, + { + "epoch": 0.8392150842164376, + "grad_norm": 576.613525390625, + "learning_rate": 3.3849724836697816e-06, + "loss": 59.5728, + "step": 207720 + }, + { + "epoch": 0.8392554854818053, + "grad_norm": 648.8595581054688, + "learning_rate": 3.383418203325417e-06, + "loss": 81.0679, + "step": 207730 + }, + { + "epoch": 0.8392958867471729, + "grad_norm": 1234.98681640625, + "learning_rate": 3.381864246929869e-06, + "loss": 69.3161, + "step": 207740 + }, + { + "epoch": 0.8393362880125406, + "grad_norm": 465.4539794921875, + "learning_rate": 3.3803106145134377e-06, + "loss": 58.8058, + "step": 207750 + }, + { + "epoch": 0.8393766892779082, + "grad_norm": 704.2286987304688, + "learning_rate": 3.378757306106402e-06, + "loss": 64.5802, + "step": 207760 + }, + { + "epoch": 0.8394170905432758, + "grad_norm": 667.9896240234375, + "learning_rate": 3.3772043217390514e-06, + "loss": 68.8729, + "step": 207770 + }, + { + "epoch": 0.8394574918086435, + "grad_norm": 790.4489135742188, + "learning_rate": 3.375651661441659e-06, + "loss": 78.6392, + "step": 207780 + }, + { + "epoch": 0.8394978930740111, + "grad_norm": 646.698974609375, + "learning_rate": 3.3740993252444978e-06, + "loss": 59.7444, + "step": 207790 + }, + { + "epoch": 0.8395382943393787, + "grad_norm": 1053.682861328125, + "learning_rate": 3.372547313177834e-06, + "loss": 60.4208, + "step": 207800 + }, + { + "epoch": 0.8395786956047463, + "grad_norm": 800.1911010742188, + "learning_rate": 3.3709956252719112e-06, + "loss": 73.9747, + "step": 207810 + }, + { + "epoch": 0.8396190968701139, + "grad_norm": 446.1049499511719, + "learning_rate": 3.3694442615569977e-06, + "loss": 74.1524, + "step": 207820 + }, + { + "epoch": 0.8396594981354816, + "grad_norm": 615.0440063476562, + "learning_rate": 3.3678932220633277e-06, + "loss": 60.991, + "step": 207830 + }, + { + "epoch": 0.8396998994008492, + "grad_norm": 530.890380859375, + "learning_rate": 3.366342506821141e-06, + "loss": 75.4675, + "step": 207840 + }, + { + "epoch": 0.8397403006662169, + "grad_norm": 638.2509155273438, + "learning_rate": 3.3647921158606755e-06, + "loss": 54.0676, + "step": 207850 + }, + { + "epoch": 0.8397807019315845, + "grad_norm": 568.9007568359375, + "learning_rate": 3.363242049212143e-06, + "loss": 55.8666, + "step": 207860 + }, + { + "epoch": 0.8398211031969521, + "grad_norm": 1816.7598876953125, + "learning_rate": 3.3616923069057816e-06, + "loss": 73.111, + "step": 207870 + }, + { + "epoch": 0.8398615044623198, + "grad_norm": 724.9201049804688, + "learning_rate": 3.360142888971789e-06, + "loss": 74.9734, + "step": 207880 + }, + { + "epoch": 0.8399019057276874, + "grad_norm": 1360.15478515625, + "learning_rate": 3.358593795440379e-06, + "loss": 81.9969, + "step": 207890 + }, + { + "epoch": 0.839942306993055, + "grad_norm": 701.8643798828125, + "learning_rate": 3.3570450263417497e-06, + "loss": 84.1747, + "step": 207900 + }, + { + "epoch": 0.8399827082584227, + "grad_norm": 475.3885498046875, + "learning_rate": 3.3554965817061016e-06, + "loss": 89.9288, + "step": 207910 + }, + { + "epoch": 0.8400231095237903, + "grad_norm": 540.217529296875, + "learning_rate": 3.35394846156361e-06, + "loss": 110.3829, + "step": 207920 + }, + { + "epoch": 0.8400635107891579, + "grad_norm": 792.7518310546875, + "learning_rate": 3.3524006659444642e-06, + "loss": 66.3115, + "step": 207930 + }, + { + "epoch": 0.8401039120545255, + "grad_norm": 760.2859497070312, + "learning_rate": 3.35085319487884e-06, + "loss": 72.8702, + "step": 207940 + }, + { + "epoch": 0.8401443133198931, + "grad_norm": 438.9905090332031, + "learning_rate": 3.3493060483969008e-06, + "loss": 60.7861, + "step": 207950 + }, + { + "epoch": 0.8401847145852608, + "grad_norm": 754.8431396484375, + "learning_rate": 3.3477592265288197e-06, + "loss": 82.0207, + "step": 207960 + }, + { + "epoch": 0.8402251158506284, + "grad_norm": 438.2242126464844, + "learning_rate": 3.3462127293047364e-06, + "loss": 71.1685, + "step": 207970 + }, + { + "epoch": 0.8402655171159961, + "grad_norm": 568.0570678710938, + "learning_rate": 3.3446665567548187e-06, + "loss": 74.3396, + "step": 207980 + }, + { + "epoch": 0.8403059183813637, + "grad_norm": 680.380126953125, + "learning_rate": 3.3431207089091954e-06, + "loss": 69.6871, + "step": 207990 + }, + { + "epoch": 0.8403463196467313, + "grad_norm": 1093.2723388671875, + "learning_rate": 3.3415751857980118e-06, + "loss": 69.6418, + "step": 208000 + }, + { + "epoch": 0.840386720912099, + "grad_norm": 546.8598022460938, + "learning_rate": 3.3400299874513943e-06, + "loss": 54.7796, + "step": 208010 + }, + { + "epoch": 0.8404271221774666, + "grad_norm": 418.8149108886719, + "learning_rate": 3.33848511389947e-06, + "loss": 84.7413, + "step": 208020 + }, + { + "epoch": 0.8404675234428343, + "grad_norm": 472.0920715332031, + "learning_rate": 3.3369405651723596e-06, + "loss": 65.3629, + "step": 208030 + }, + { + "epoch": 0.8405079247082019, + "grad_norm": 565.1979370117188, + "learning_rate": 3.33539634130017e-06, + "loss": 78.8701, + "step": 208040 + }, + { + "epoch": 0.8405483259735695, + "grad_norm": 734.640869140625, + "learning_rate": 3.333852442313006e-06, + "loss": 73.7953, + "step": 208050 + }, + { + "epoch": 0.8405887272389371, + "grad_norm": 641.9617309570312, + "learning_rate": 3.3323088682409697e-06, + "loss": 58.6587, + "step": 208060 + }, + { + "epoch": 0.8406291285043047, + "grad_norm": 1138.0428466796875, + "learning_rate": 3.330765619114158e-06, + "loss": 47.8879, + "step": 208070 + }, + { + "epoch": 0.8406695297696724, + "grad_norm": 469.6271057128906, + "learning_rate": 3.3292226949626417e-06, + "loss": 93.0633, + "step": 208080 + }, + { + "epoch": 0.84070993103504, + "grad_norm": 676.5130004882812, + "learning_rate": 3.3276800958165214e-06, + "loss": 81.9935, + "step": 208090 + }, + { + "epoch": 0.8407503323004076, + "grad_norm": 424.1094055175781, + "learning_rate": 3.3261378217058572e-06, + "loss": 62.3971, + "step": 208100 + }, + { + "epoch": 0.8407907335657753, + "grad_norm": 793.9649658203125, + "learning_rate": 3.324595872660721e-06, + "loss": 73.4729, + "step": 208110 + }, + { + "epoch": 0.8408311348311429, + "grad_norm": 547.1851196289062, + "learning_rate": 3.3230542487111774e-06, + "loss": 75.4201, + "step": 208120 + }, + { + "epoch": 0.8408715360965106, + "grad_norm": 769.9069213867188, + "learning_rate": 3.3215129498872667e-06, + "loss": 77.0699, + "step": 208130 + }, + { + "epoch": 0.8409119373618782, + "grad_norm": 1020.541259765625, + "learning_rate": 3.3199719762190584e-06, + "loss": 69.7594, + "step": 208140 + }, + { + "epoch": 0.8409523386272458, + "grad_norm": 1323.514404296875, + "learning_rate": 3.3184313277365774e-06, + "loss": 71.916, + "step": 208150 + }, + { + "epoch": 0.8409927398926135, + "grad_norm": 2194.29931640625, + "learning_rate": 3.3168910044698686e-06, + "loss": 104.4535, + "step": 208160 + }, + { + "epoch": 0.8410331411579811, + "grad_norm": 774.9472045898438, + "learning_rate": 3.315351006448957e-06, + "loss": 79.281, + "step": 208170 + }, + { + "epoch": 0.8410735424233488, + "grad_norm": 346.4601135253906, + "learning_rate": 3.313811333703867e-06, + "loss": 42.4987, + "step": 208180 + }, + { + "epoch": 0.8411139436887163, + "grad_norm": 1051.0504150390625, + "learning_rate": 3.3122719862646214e-06, + "loss": 58.3766, + "step": 208190 + }, + { + "epoch": 0.8411543449540839, + "grad_norm": 528.8670043945312, + "learning_rate": 3.3107329641612205e-06, + "loss": 73.074, + "step": 208200 + }, + { + "epoch": 0.8411947462194516, + "grad_norm": 971.3807373046875, + "learning_rate": 3.309194267423672e-06, + "loss": 57.5378, + "step": 208210 + }, + { + "epoch": 0.8412351474848192, + "grad_norm": 684.7073974609375, + "learning_rate": 3.307655896081974e-06, + "loss": 82.4158, + "step": 208220 + }, + { + "epoch": 0.8412755487501868, + "grad_norm": 541.134765625, + "learning_rate": 3.3061178501661194e-06, + "loss": 71.3699, + "step": 208230 + }, + { + "epoch": 0.8413159500155545, + "grad_norm": 333.71649169921875, + "learning_rate": 3.30458012970609e-06, + "loss": 63.2444, + "step": 208240 + }, + { + "epoch": 0.8413563512809221, + "grad_norm": 507.67742919921875, + "learning_rate": 3.303042734731872e-06, + "loss": 89.0056, + "step": 208250 + }, + { + "epoch": 0.8413967525462898, + "grad_norm": 532.140380859375, + "learning_rate": 3.301505665273428e-06, + "loss": 66.9714, + "step": 208260 + }, + { + "epoch": 0.8414371538116574, + "grad_norm": 1119.0196533203125, + "learning_rate": 3.2999689213607255e-06, + "loss": 71.9588, + "step": 208270 + }, + { + "epoch": 0.841477555077025, + "grad_norm": 726.047119140625, + "learning_rate": 3.2984325030237295e-06, + "loss": 86.9445, + "step": 208280 + }, + { + "epoch": 0.8415179563423927, + "grad_norm": 377.6922302246094, + "learning_rate": 3.2968964102923895e-06, + "loss": 58.9312, + "step": 208290 + }, + { + "epoch": 0.8415583576077603, + "grad_norm": 415.1799621582031, + "learning_rate": 3.295360643196659e-06, + "loss": 83.6848, + "step": 208300 + }, + { + "epoch": 0.841598758873128, + "grad_norm": 957.2704467773438, + "learning_rate": 3.2938252017664606e-06, + "loss": 45.2974, + "step": 208310 + }, + { + "epoch": 0.8416391601384955, + "grad_norm": 949.0193481445312, + "learning_rate": 3.292290086031753e-06, + "loss": 99.9311, + "step": 208320 + }, + { + "epoch": 0.8416795614038631, + "grad_norm": 250.8604736328125, + "learning_rate": 3.2907552960224455e-06, + "loss": 54.0585, + "step": 208330 + }, + { + "epoch": 0.8417199626692308, + "grad_norm": 841.0300903320312, + "learning_rate": 3.289220831768467e-06, + "loss": 58.6888, + "step": 208340 + }, + { + "epoch": 0.8417603639345984, + "grad_norm": 554.4972534179688, + "learning_rate": 3.287686693299732e-06, + "loss": 60.8177, + "step": 208350 + }, + { + "epoch": 0.841800765199966, + "grad_norm": 854.8633422851562, + "learning_rate": 3.2861528806461516e-06, + "loss": 60.5687, + "step": 208360 + }, + { + "epoch": 0.8418411664653337, + "grad_norm": 660.091064453125, + "learning_rate": 3.2846193938376226e-06, + "loss": 53.7638, + "step": 208370 + }, + { + "epoch": 0.8418815677307013, + "grad_norm": 950.1257934570312, + "learning_rate": 3.2830862329040446e-06, + "loss": 73.3438, + "step": 208380 + }, + { + "epoch": 0.841921968996069, + "grad_norm": 1758.550048828125, + "learning_rate": 3.2815533978753055e-06, + "loss": 83.9993, + "step": 208390 + }, + { + "epoch": 0.8419623702614366, + "grad_norm": 676.259765625, + "learning_rate": 3.2800208887812925e-06, + "loss": 63.9166, + "step": 208400 + }, + { + "epoch": 0.8420027715268043, + "grad_norm": 849.3679809570312, + "learning_rate": 3.2784887056518834e-06, + "loss": 58.5239, + "step": 208410 + }, + { + "epoch": 0.8420431727921719, + "grad_norm": 1193.5679931640625, + "learning_rate": 3.2769568485169343e-06, + "loss": 90.7288, + "step": 208420 + }, + { + "epoch": 0.8420835740575395, + "grad_norm": 2159.9453125, + "learning_rate": 3.2754253174063334e-06, + "loss": 104.0492, + "step": 208430 + }, + { + "epoch": 0.8421239753229071, + "grad_norm": 847.5103149414062, + "learning_rate": 3.2738941123499203e-06, + "loss": 58.1034, + "step": 208440 + }, + { + "epoch": 0.8421643765882747, + "grad_norm": 623.2222290039062, + "learning_rate": 3.2723632333775536e-06, + "loss": 77.4863, + "step": 208450 + }, + { + "epoch": 0.8422047778536423, + "grad_norm": 907.5113525390625, + "learning_rate": 3.2708326805190826e-06, + "loss": 74.3104, + "step": 208460 + }, + { + "epoch": 0.84224517911901, + "grad_norm": 780.1671142578125, + "learning_rate": 3.2693024538043307e-06, + "loss": 48.8403, + "step": 208470 + }, + { + "epoch": 0.8422855803843776, + "grad_norm": 456.8218078613281, + "learning_rate": 3.267772553263151e-06, + "loss": 63.935, + "step": 208480 + }, + { + "epoch": 0.8423259816497453, + "grad_norm": 627.5384521484375, + "learning_rate": 3.266242978925356e-06, + "loss": 52.7799, + "step": 208490 + }, + { + "epoch": 0.8423663829151129, + "grad_norm": 537.2757568359375, + "learning_rate": 3.2647137308207676e-06, + "loss": 65.2094, + "step": 208500 + }, + { + "epoch": 0.8424067841804805, + "grad_norm": 692.0653686523438, + "learning_rate": 3.2631848089792005e-06, + "loss": 50.6209, + "step": 208510 + }, + { + "epoch": 0.8424471854458482, + "grad_norm": 527.7442016601562, + "learning_rate": 3.2616562134304684e-06, + "loss": 69.5838, + "step": 208520 + }, + { + "epoch": 0.8424875867112158, + "grad_norm": 448.4066467285156, + "learning_rate": 3.260127944204361e-06, + "loss": 73.5021, + "step": 208530 + }, + { + "epoch": 0.8425279879765835, + "grad_norm": 458.5916748046875, + "learning_rate": 3.258600001330676e-06, + "loss": 35.2617, + "step": 208540 + }, + { + "epoch": 0.8425683892419511, + "grad_norm": 487.3787536621094, + "learning_rate": 3.2570723848392063e-06, + "loss": 51.9757, + "step": 208550 + }, + { + "epoch": 0.8426087905073187, + "grad_norm": 1023.3383178710938, + "learning_rate": 3.2555450947597266e-06, + "loss": 57.874, + "step": 208560 + }, + { + "epoch": 0.8426491917726863, + "grad_norm": 800.5846557617188, + "learning_rate": 3.254018131122023e-06, + "loss": 55.1119, + "step": 208570 + }, + { + "epoch": 0.8426895930380539, + "grad_norm": 448.1301574707031, + "learning_rate": 3.252491493955847e-06, + "loss": 51.7938, + "step": 208580 + }, + { + "epoch": 0.8427299943034215, + "grad_norm": 762.2635498046875, + "learning_rate": 3.2509651832909815e-06, + "loss": 64.6694, + "step": 208590 + }, + { + "epoch": 0.8427703955687892, + "grad_norm": 393.4840087890625, + "learning_rate": 3.249439199157167e-06, + "loss": 67.824, + "step": 208600 + }, + { + "epoch": 0.8428107968341568, + "grad_norm": 811.9481201171875, + "learning_rate": 3.2479135415841602e-06, + "loss": 75.1419, + "step": 208610 + }, + { + "epoch": 0.8428511980995245, + "grad_norm": 653.08984375, + "learning_rate": 3.246388210601703e-06, + "loss": 67.878, + "step": 208620 + }, + { + "epoch": 0.8428915993648921, + "grad_norm": 981.3873291015625, + "learning_rate": 3.2448632062395326e-06, + "loss": 56.4616, + "step": 208630 + }, + { + "epoch": 0.8429320006302597, + "grad_norm": 634.5328369140625, + "learning_rate": 3.2433385285273868e-06, + "loss": 41.4416, + "step": 208640 + }, + { + "epoch": 0.8429724018956274, + "grad_norm": 617.4448852539062, + "learning_rate": 3.2418141774949752e-06, + "loss": 58.2411, + "step": 208650 + }, + { + "epoch": 0.843012803160995, + "grad_norm": 686.1888427734375, + "learning_rate": 3.2402901531720255e-06, + "loss": 82.1729, + "step": 208660 + }, + { + "epoch": 0.8430532044263627, + "grad_norm": 496.2076416015625, + "learning_rate": 3.2387664555882493e-06, + "loss": 64.9347, + "step": 208670 + }, + { + "epoch": 0.8430936056917303, + "grad_norm": 599.830078125, + "learning_rate": 3.237243084773354e-06, + "loss": 49.8175, + "step": 208680 + }, + { + "epoch": 0.843134006957098, + "grad_norm": 293.838134765625, + "learning_rate": 3.2357200407570243e-06, + "loss": 60.3509, + "step": 208690 + }, + { + "epoch": 0.8431744082224655, + "grad_norm": 722.0048217773438, + "learning_rate": 3.234197323568973e-06, + "loss": 43.349, + "step": 208700 + }, + { + "epoch": 0.8432148094878331, + "grad_norm": 903.9891967773438, + "learning_rate": 3.232674933238873e-06, + "loss": 74.1543, + "step": 208710 + }, + { + "epoch": 0.8432552107532008, + "grad_norm": 594.497314453125, + "learning_rate": 3.2311528697964055e-06, + "loss": 65.2138, + "step": 208720 + }, + { + "epoch": 0.8432956120185684, + "grad_norm": 611.4979248046875, + "learning_rate": 3.229631133271254e-06, + "loss": 72.0906, + "step": 208730 + }, + { + "epoch": 0.843336013283936, + "grad_norm": 561.435302734375, + "learning_rate": 3.2281097236930647e-06, + "loss": 48.5446, + "step": 208740 + }, + { + "epoch": 0.8433764145493037, + "grad_norm": 891.5610961914062, + "learning_rate": 3.2265886410915214e-06, + "loss": 43.3047, + "step": 208750 + }, + { + "epoch": 0.8434168158146713, + "grad_norm": 861.835693359375, + "learning_rate": 3.22506788549626e-06, + "loss": 118.8604, + "step": 208760 + }, + { + "epoch": 0.843457217080039, + "grad_norm": 1102.69677734375, + "learning_rate": 3.2235474569369463e-06, + "loss": 78.6831, + "step": 208770 + }, + { + "epoch": 0.8434976183454066, + "grad_norm": 1148.3504638671875, + "learning_rate": 3.2220273554432045e-06, + "loss": 86.8069, + "step": 208780 + }, + { + "epoch": 0.8435380196107742, + "grad_norm": 813.3003540039062, + "learning_rate": 3.2205075810446782e-06, + "loss": 76.9229, + "step": 208790 + }, + { + "epoch": 0.8435784208761419, + "grad_norm": 613.9979248046875, + "learning_rate": 3.2189881337710016e-06, + "loss": 106.4013, + "step": 208800 + }, + { + "epoch": 0.8436188221415095, + "grad_norm": 1215.6361083984375, + "learning_rate": 3.217469013651786e-06, + "loss": 68.1876, + "step": 208810 + }, + { + "epoch": 0.8436592234068772, + "grad_norm": 813.9932250976562, + "learning_rate": 3.2159502207166505e-06, + "loss": 66.9121, + "step": 208820 + }, + { + "epoch": 0.8436996246722447, + "grad_norm": 471.5795593261719, + "learning_rate": 3.2144317549952064e-06, + "loss": 58.5896, + "step": 208830 + }, + { + "epoch": 0.8437400259376123, + "grad_norm": 767.2179565429688, + "learning_rate": 3.2129136165170594e-06, + "loss": 119.3986, + "step": 208840 + }, + { + "epoch": 0.84378042720298, + "grad_norm": 1210.81640625, + "learning_rate": 3.211395805311801e-06, + "loss": 84.9577, + "step": 208850 + }, + { + "epoch": 0.8438208284683476, + "grad_norm": 1263.48486328125, + "learning_rate": 3.209878321409032e-06, + "loss": 93.5042, + "step": 208860 + }, + { + "epoch": 0.8438612297337152, + "grad_norm": 533.448974609375, + "learning_rate": 3.2083611648383227e-06, + "loss": 49.2014, + "step": 208870 + }, + { + "epoch": 0.8439016309990829, + "grad_norm": 508.18353271484375, + "learning_rate": 3.20684433562926e-06, + "loss": 60.1614, + "step": 208880 + }, + { + "epoch": 0.8439420322644505, + "grad_norm": 588.3695068359375, + "learning_rate": 3.2053278338114157e-06, + "loss": 64.9238, + "step": 208890 + }, + { + "epoch": 0.8439824335298182, + "grad_norm": 706.7536010742188, + "learning_rate": 3.203811659414342e-06, + "loss": 64.8204, + "step": 208900 + }, + { + "epoch": 0.8440228347951858, + "grad_norm": 459.6016845703125, + "learning_rate": 3.2022958124676175e-06, + "loss": 54.4275, + "step": 208910 + }, + { + "epoch": 0.8440632360605534, + "grad_norm": 619.5484008789062, + "learning_rate": 3.2007802930007737e-06, + "loss": 70.5453, + "step": 208920 + }, + { + "epoch": 0.8441036373259211, + "grad_norm": 614.35791015625, + "learning_rate": 3.199265101043376e-06, + "loss": 86.987, + "step": 208930 + }, + { + "epoch": 0.8441440385912887, + "grad_norm": 1581.759521484375, + "learning_rate": 3.1977502366249502e-06, + "loss": 65.7584, + "step": 208940 + }, + { + "epoch": 0.8441844398566564, + "grad_norm": 1106.330078125, + "learning_rate": 3.1962356997750364e-06, + "loss": 51.4597, + "step": 208950 + }, + { + "epoch": 0.8442248411220239, + "grad_norm": 297.1744689941406, + "learning_rate": 3.1947214905231605e-06, + "loss": 58.4479, + "step": 208960 + }, + { + "epoch": 0.8442652423873915, + "grad_norm": 594.2415771484375, + "learning_rate": 3.1932076088988386e-06, + "loss": 55.124, + "step": 208970 + }, + { + "epoch": 0.8443056436527592, + "grad_norm": 608.5470581054688, + "learning_rate": 3.1916940549315843e-06, + "loss": 59.0694, + "step": 208980 + }, + { + "epoch": 0.8443460449181268, + "grad_norm": 3423.928955078125, + "learning_rate": 3.190180828650911e-06, + "loss": 69.9118, + "step": 208990 + }, + { + "epoch": 0.8443864461834945, + "grad_norm": 672.8638916015625, + "learning_rate": 3.1886679300863156e-06, + "loss": 88.3564, + "step": 209000 + }, + { + "epoch": 0.8444268474488621, + "grad_norm": 1036.3592529296875, + "learning_rate": 3.1871553592672932e-06, + "loss": 50.3297, + "step": 209010 + }, + { + "epoch": 0.8444672487142297, + "grad_norm": 263.0315246582031, + "learning_rate": 3.185643116223338e-06, + "loss": 59.4567, + "step": 209020 + }, + { + "epoch": 0.8445076499795974, + "grad_norm": 1038.7952880859375, + "learning_rate": 3.1841312009839175e-06, + "loss": 80.6959, + "step": 209030 + }, + { + "epoch": 0.844548051244965, + "grad_norm": 895.684326171875, + "learning_rate": 3.1826196135785258e-06, + "loss": 51.8236, + "step": 209040 + }, + { + "epoch": 0.8445884525103327, + "grad_norm": 528.5926513671875, + "learning_rate": 3.1811083540366193e-06, + "loss": 58.9764, + "step": 209050 + }, + { + "epoch": 0.8446288537757003, + "grad_norm": 469.8354797363281, + "learning_rate": 3.1795974223876635e-06, + "loss": 61.286, + "step": 209060 + }, + { + "epoch": 0.8446692550410679, + "grad_norm": 882.0556030273438, + "learning_rate": 3.1780868186611214e-06, + "loss": 62.868, + "step": 209070 + }, + { + "epoch": 0.8447096563064355, + "grad_norm": 507.1385498046875, + "learning_rate": 3.1765765428864272e-06, + "loss": 75.0054, + "step": 209080 + }, + { + "epoch": 0.8447500575718031, + "grad_norm": 1011.0353393554688, + "learning_rate": 3.1750665950930436e-06, + "loss": 86.4785, + "step": 209090 + }, + { + "epoch": 0.8447904588371707, + "grad_norm": 2833.973876953125, + "learning_rate": 3.1735569753103947e-06, + "loss": 80.4849, + "step": 209100 + }, + { + "epoch": 0.8448308601025384, + "grad_norm": 873.5023803710938, + "learning_rate": 3.1720476835679137e-06, + "loss": 44.409, + "step": 209110 + }, + { + "epoch": 0.844871261367906, + "grad_norm": 612.8682861328125, + "learning_rate": 3.1705387198950287e-06, + "loss": 60.8897, + "step": 209120 + }, + { + "epoch": 0.8449116626332737, + "grad_norm": 576.4819946289062, + "learning_rate": 3.1690300843211584e-06, + "loss": 94.4704, + "step": 209130 + }, + { + "epoch": 0.8449520638986413, + "grad_norm": 197.09432983398438, + "learning_rate": 3.1675217768757084e-06, + "loss": 73.0657, + "step": 209140 + }, + { + "epoch": 0.8449924651640089, + "grad_norm": 922.6818237304688, + "learning_rate": 3.166013797588088e-06, + "loss": 53.4894, + "step": 209150 + }, + { + "epoch": 0.8450328664293766, + "grad_norm": 541.3510131835938, + "learning_rate": 3.1645061464876936e-06, + "loss": 61.8437, + "step": 209160 + }, + { + "epoch": 0.8450732676947442, + "grad_norm": 792.8218994140625, + "learning_rate": 3.1629988236039223e-06, + "loss": 64.1433, + "step": 209170 + }, + { + "epoch": 0.8451136689601119, + "grad_norm": 512.6209716796875, + "learning_rate": 3.161491828966159e-06, + "loss": 46.6555, + "step": 209180 + }, + { + "epoch": 0.8451540702254795, + "grad_norm": 626.2440185546875, + "learning_rate": 3.1599851626037735e-06, + "loss": 39.0873, + "step": 209190 + }, + { + "epoch": 0.8451944714908471, + "grad_norm": 267.4899597167969, + "learning_rate": 3.158478824546156e-06, + "loss": 75.5345, + "step": 209200 + }, + { + "epoch": 0.8452348727562147, + "grad_norm": 720.9505615234375, + "learning_rate": 3.1569728148226607e-06, + "loss": 86.3453, + "step": 209210 + }, + { + "epoch": 0.8452752740215823, + "grad_norm": 1308.795166015625, + "learning_rate": 3.155467133462651e-06, + "loss": 67.822, + "step": 209220 + }, + { + "epoch": 0.84531567528695, + "grad_norm": 2015.7830810546875, + "learning_rate": 3.1539617804954847e-06, + "loss": 99.7054, + "step": 209230 + }, + { + "epoch": 0.8453560765523176, + "grad_norm": 984.2779541015625, + "learning_rate": 3.152456755950499e-06, + "loss": 55.6955, + "step": 209240 + }, + { + "epoch": 0.8453964778176852, + "grad_norm": 521.5396728515625, + "learning_rate": 3.150952059857051e-06, + "loss": 49.2134, + "step": 209250 + }, + { + "epoch": 0.8454368790830529, + "grad_norm": 600.5093383789062, + "learning_rate": 3.1494476922444627e-06, + "loss": 62.8779, + "step": 209260 + }, + { + "epoch": 0.8454772803484205, + "grad_norm": 624.6716918945312, + "learning_rate": 3.147943653142067e-06, + "loss": 67.3765, + "step": 209270 + }, + { + "epoch": 0.8455176816137882, + "grad_norm": 546.228515625, + "learning_rate": 3.146439942579187e-06, + "loss": 59.209, + "step": 209280 + }, + { + "epoch": 0.8455580828791558, + "grad_norm": 673.7874755859375, + "learning_rate": 3.144936560585137e-06, + "loss": 63.4131, + "step": 209290 + }, + { + "epoch": 0.8455984841445234, + "grad_norm": 889.4406127929688, + "learning_rate": 3.1434335071892284e-06, + "loss": 51.8259, + "step": 209300 + }, + { + "epoch": 0.8456388854098911, + "grad_norm": 1041.2979736328125, + "learning_rate": 3.141930782420759e-06, + "loss": 63.4135, + "step": 209310 + }, + { + "epoch": 0.8456792866752587, + "grad_norm": 606.8555908203125, + "learning_rate": 3.140428386309029e-06, + "loss": 106.7905, + "step": 209320 + }, + { + "epoch": 0.8457196879406264, + "grad_norm": 519.7428588867188, + "learning_rate": 3.1389263188833263e-06, + "loss": 63.5454, + "step": 209330 + }, + { + "epoch": 0.8457600892059939, + "grad_norm": 509.1273498535156, + "learning_rate": 3.137424580172941e-06, + "loss": 45.5877, + "step": 209340 + }, + { + "epoch": 0.8458004904713615, + "grad_norm": 730.9439697265625, + "learning_rate": 3.1359231702071338e-06, + "loss": 66.47, + "step": 209350 + }, + { + "epoch": 0.8458408917367292, + "grad_norm": 512.4368286132812, + "learning_rate": 3.1344220890151987e-06, + "loss": 41.4836, + "step": 209360 + }, + { + "epoch": 0.8458812930020968, + "grad_norm": 497.9188537597656, + "learning_rate": 3.1329213366263754e-06, + "loss": 60.9, + "step": 209370 + }, + { + "epoch": 0.8459216942674644, + "grad_norm": 903.7112426757812, + "learning_rate": 3.131420913069947e-06, + "loss": 68.5756, + "step": 209380 + }, + { + "epoch": 0.8459620955328321, + "grad_norm": 774.0703125, + "learning_rate": 3.129920818375145e-06, + "loss": 80.4822, + "step": 209390 + }, + { + "epoch": 0.8460024967981997, + "grad_norm": 703.6175537109375, + "learning_rate": 3.1284210525712243e-06, + "loss": 50.7595, + "step": 209400 + }, + { + "epoch": 0.8460428980635674, + "grad_norm": 630.6016235351562, + "learning_rate": 3.126921615687424e-06, + "loss": 67.12, + "step": 209410 + }, + { + "epoch": 0.846083299328935, + "grad_norm": 718.1817626953125, + "learning_rate": 3.125422507752971e-06, + "loss": 60.7771, + "step": 209420 + }, + { + "epoch": 0.8461237005943026, + "grad_norm": 739.6641235351562, + "learning_rate": 3.1239237287970914e-06, + "loss": 67.3418, + "step": 209430 + }, + { + "epoch": 0.8461641018596703, + "grad_norm": 377.2901306152344, + "learning_rate": 3.122425278849008e-06, + "loss": 45.6695, + "step": 209440 + }, + { + "epoch": 0.8462045031250379, + "grad_norm": 915.4920043945312, + "learning_rate": 3.120927157937934e-06, + "loss": 66.5098, + "step": 209450 + }, + { + "epoch": 0.8462449043904056, + "grad_norm": 549.3414916992188, + "learning_rate": 3.1194293660930743e-06, + "loss": 64.4454, + "step": 209460 + }, + { + "epoch": 0.8462853056557731, + "grad_norm": 405.20196533203125, + "learning_rate": 3.117931903343634e-06, + "loss": 70.5482, + "step": 209470 + }, + { + "epoch": 0.8463257069211407, + "grad_norm": 512.661865234375, + "learning_rate": 3.116434769718799e-06, + "loss": 92.5761, + "step": 209480 + }, + { + "epoch": 0.8463661081865084, + "grad_norm": 357.98095703125, + "learning_rate": 3.1149379652477597e-06, + "loss": 64.8814, + "step": 209490 + }, + { + "epoch": 0.846406509451876, + "grad_norm": 512.452392578125, + "learning_rate": 3.1134414899597033e-06, + "loss": 28.9175, + "step": 209500 + }, + { + "epoch": 0.8464469107172437, + "grad_norm": 673.2363891601562, + "learning_rate": 3.1119453438837887e-06, + "loss": 53.7514, + "step": 209510 + }, + { + "epoch": 0.8464873119826113, + "grad_norm": 559.218994140625, + "learning_rate": 3.1104495270492042e-06, + "loss": 66.3591, + "step": 209520 + }, + { + "epoch": 0.8465277132479789, + "grad_norm": 1562.1456298828125, + "learning_rate": 3.1089540394850924e-06, + "loss": 62.3677, + "step": 209530 + }, + { + "epoch": 0.8465681145133466, + "grad_norm": 591.0670776367188, + "learning_rate": 3.1074588812206264e-06, + "loss": 59.4713, + "step": 209540 + }, + { + "epoch": 0.8466085157787142, + "grad_norm": 977.609619140625, + "learning_rate": 3.10596405228494e-06, + "loss": 92.3434, + "step": 209550 + }, + { + "epoch": 0.8466489170440819, + "grad_norm": 436.8194274902344, + "learning_rate": 3.1044695527071833e-06, + "loss": 102.1792, + "step": 209560 + }, + { + "epoch": 0.8466893183094495, + "grad_norm": 1379.49267578125, + "learning_rate": 3.102975382516498e-06, + "loss": 60.6514, + "step": 209570 + }, + { + "epoch": 0.8467297195748171, + "grad_norm": 446.8595886230469, + "learning_rate": 3.1014815417419997e-06, + "loss": 76.1756, + "step": 209580 + }, + { + "epoch": 0.8467701208401847, + "grad_norm": 595.1066284179688, + "learning_rate": 3.0999880304128192e-06, + "loss": 77.1614, + "step": 209590 + }, + { + "epoch": 0.8468105221055523, + "grad_norm": 704.0342407226562, + "learning_rate": 3.0984948485580736e-06, + "loss": 57.3854, + "step": 209600 + }, + { + "epoch": 0.8468509233709199, + "grad_norm": 467.90582275390625, + "learning_rate": 3.09700199620687e-06, + "loss": 99.8732, + "step": 209610 + }, + { + "epoch": 0.8468913246362876, + "grad_norm": 768.0868530273438, + "learning_rate": 3.095509473388316e-06, + "loss": 93.3747, + "step": 209620 + }, + { + "epoch": 0.8469317259016552, + "grad_norm": 588.6419677734375, + "learning_rate": 3.0940172801315137e-06, + "loss": 67.185, + "step": 209630 + }, + { + "epoch": 0.8469721271670229, + "grad_norm": 997.0317993164062, + "learning_rate": 3.092525416465544e-06, + "loss": 76.5221, + "step": 209640 + }, + { + "epoch": 0.8470125284323905, + "grad_norm": 540.0697021484375, + "learning_rate": 3.091033882419494e-06, + "loss": 47.8714, + "step": 209650 + }, + { + "epoch": 0.8470529296977581, + "grad_norm": 556.3004150390625, + "learning_rate": 3.0895426780224437e-06, + "loss": 58.3431, + "step": 209660 + }, + { + "epoch": 0.8470933309631258, + "grad_norm": 761.0225219726562, + "learning_rate": 3.088051803303467e-06, + "loss": 76.2264, + "step": 209670 + }, + { + "epoch": 0.8471337322284934, + "grad_norm": 446.44488525390625, + "learning_rate": 3.0865612582916293e-06, + "loss": 66.4168, + "step": 209680 + }, + { + "epoch": 0.8471741334938611, + "grad_norm": 804.018798828125, + "learning_rate": 3.0850710430159793e-06, + "loss": 60.1533, + "step": 209690 + }, + { + "epoch": 0.8472145347592287, + "grad_norm": 691.5404663085938, + "learning_rate": 3.0835811575055886e-06, + "loss": 50.7642, + "step": 209700 + }, + { + "epoch": 0.8472549360245963, + "grad_norm": 1622.4486083984375, + "learning_rate": 3.082091601789485e-06, + "loss": 88.452, + "step": 209710 + }, + { + "epoch": 0.8472953372899639, + "grad_norm": 493.63916015625, + "learning_rate": 3.0806023758967174e-06, + "loss": 88.0158, + "step": 209720 + }, + { + "epoch": 0.8473357385553315, + "grad_norm": 402.5296630859375, + "learning_rate": 3.079113479856317e-06, + "loss": 44.3716, + "step": 209730 + }, + { + "epoch": 0.8473761398206991, + "grad_norm": 409.9522705078125, + "learning_rate": 3.077624913697315e-06, + "loss": 112.3349, + "step": 209740 + }, + { + "epoch": 0.8474165410860668, + "grad_norm": 653.3150634765625, + "learning_rate": 3.076136677448722e-06, + "loss": 74.8094, + "step": 209750 + }, + { + "epoch": 0.8474569423514344, + "grad_norm": 603.5338134765625, + "learning_rate": 3.074648771139559e-06, + "loss": 68.6105, + "step": 209760 + }, + { + "epoch": 0.8474973436168021, + "grad_norm": 789.7094116210938, + "learning_rate": 3.0731611947988305e-06, + "loss": 62.1776, + "step": 209770 + }, + { + "epoch": 0.8475377448821697, + "grad_norm": 747.8154296875, + "learning_rate": 3.0716739484555404e-06, + "loss": 46.2296, + "step": 209780 + }, + { + "epoch": 0.8475781461475373, + "grad_norm": 415.0138854980469, + "learning_rate": 3.0701870321386875e-06, + "loss": 87.5247, + "step": 209790 + }, + { + "epoch": 0.847618547412905, + "grad_norm": 847.2984008789062, + "learning_rate": 3.0687004458772417e-06, + "loss": 87.6701, + "step": 209800 + }, + { + "epoch": 0.8476589486782726, + "grad_norm": 628.9144897460938, + "learning_rate": 3.0672141897002093e-06, + "loss": 94.2591, + "step": 209810 + }, + { + "epoch": 0.8476993499436403, + "grad_norm": 765.0391845703125, + "learning_rate": 3.0657282636365494e-06, + "loss": 66.1948, + "step": 209820 + }, + { + "epoch": 0.8477397512090079, + "grad_norm": 340.5775146484375, + "learning_rate": 3.064242667715236e-06, + "loss": 57.8473, + "step": 209830 + }, + { + "epoch": 0.8477801524743755, + "grad_norm": 508.4102478027344, + "learning_rate": 3.0627574019652327e-06, + "loss": 66.1962, + "step": 209840 + }, + { + "epoch": 0.8478205537397431, + "grad_norm": 861.8114624023438, + "learning_rate": 3.061272466415486e-06, + "loss": 102.9253, + "step": 209850 + }, + { + "epoch": 0.8478609550051107, + "grad_norm": 568.8466796875, + "learning_rate": 3.059787861094963e-06, + "loss": 89.9772, + "step": 209860 + }, + { + "epoch": 0.8479013562704784, + "grad_norm": 246.28253173828125, + "learning_rate": 3.0583035860325914e-06, + "loss": 75.1322, + "step": 209870 + }, + { + "epoch": 0.847941757535846, + "grad_norm": 1134.117431640625, + "learning_rate": 3.056819641257316e-06, + "loss": 76.632, + "step": 209880 + }, + { + "epoch": 0.8479821588012136, + "grad_norm": 771.6000366210938, + "learning_rate": 3.0553360267980616e-06, + "loss": 61.0703, + "step": 209890 + }, + { + "epoch": 0.8480225600665813, + "grad_norm": 511.015625, + "learning_rate": 3.053852742683756e-06, + "loss": 93.6006, + "step": 209900 + }, + { + "epoch": 0.8480629613319489, + "grad_norm": 594.7653198242188, + "learning_rate": 3.0523697889433214e-06, + "loss": 45.5632, + "step": 209910 + }, + { + "epoch": 0.8481033625973166, + "grad_norm": 766.3318481445312, + "learning_rate": 3.050887165605656e-06, + "loss": 67.2986, + "step": 209920 + }, + { + "epoch": 0.8481437638626842, + "grad_norm": 509.18408203125, + "learning_rate": 3.049404872699673e-06, + "loss": 67.6859, + "step": 209930 + }, + { + "epoch": 0.8481841651280518, + "grad_norm": 1128.76708984375, + "learning_rate": 3.0479229102542687e-06, + "loss": 71.4092, + "step": 209940 + }, + { + "epoch": 0.8482245663934195, + "grad_norm": 430.65692138671875, + "learning_rate": 3.04644127829834e-06, + "loss": 69.8132, + "step": 209950 + }, + { + "epoch": 0.8482649676587871, + "grad_norm": 663.0028076171875, + "learning_rate": 3.0449599768607576e-06, + "loss": 72.5856, + "step": 209960 + }, + { + "epoch": 0.8483053689241548, + "grad_norm": 835.616455078125, + "learning_rate": 3.043479005970418e-06, + "loss": 53.7462, + "step": 209970 + }, + { + "epoch": 0.8483457701895223, + "grad_norm": 522.2583618164062, + "learning_rate": 3.04199836565618e-06, + "loss": 79.3443, + "step": 209980 + }, + { + "epoch": 0.8483861714548899, + "grad_norm": 711.4619140625, + "learning_rate": 3.040518055946915e-06, + "loss": 70.0315, + "step": 209990 + }, + { + "epoch": 0.8484265727202576, + "grad_norm": 424.2403259277344, + "learning_rate": 3.039038076871481e-06, + "loss": 57.1587, + "step": 210000 + }, + { + "epoch": 0.8484669739856252, + "grad_norm": 663.3494262695312, + "learning_rate": 3.0375584284587314e-06, + "loss": 91.4195, + "step": 210010 + }, + { + "epoch": 0.8485073752509928, + "grad_norm": 865.486572265625, + "learning_rate": 3.03607911073752e-06, + "loss": 53.4294, + "step": 210020 + }, + { + "epoch": 0.8485477765163605, + "grad_norm": 402.5242919921875, + "learning_rate": 3.034600123736673e-06, + "loss": 48.8564, + "step": 210030 + }, + { + "epoch": 0.8485881777817281, + "grad_norm": 637.0618286132812, + "learning_rate": 3.0331214674850317e-06, + "loss": 39.1362, + "step": 210040 + }, + { + "epoch": 0.8486285790470958, + "grad_norm": 434.2763671875, + "learning_rate": 3.0316431420114247e-06, + "loss": 76.1609, + "step": 210050 + }, + { + "epoch": 0.8486689803124634, + "grad_norm": 741.5997314453125, + "learning_rate": 3.030165147344668e-06, + "loss": 70.4879, + "step": 210060 + }, + { + "epoch": 0.848709381577831, + "grad_norm": 378.96197509765625, + "learning_rate": 3.0286874835135793e-06, + "loss": 45.0001, + "step": 210070 + }, + { + "epoch": 0.8487497828431987, + "grad_norm": 373.2505187988281, + "learning_rate": 3.0272101505469687e-06, + "loss": 58.43, + "step": 210080 + }, + { + "epoch": 0.8487901841085663, + "grad_norm": 935.0189208984375, + "learning_rate": 3.025733148473631e-06, + "loss": 73.9743, + "step": 210090 + }, + { + "epoch": 0.848830585373934, + "grad_norm": 715.925537109375, + "learning_rate": 3.0242564773223646e-06, + "loss": 72.9449, + "step": 210100 + }, + { + "epoch": 0.8488709866393015, + "grad_norm": 573.5641479492188, + "learning_rate": 3.0227801371219634e-06, + "loss": 68.183, + "step": 210110 + }, + { + "epoch": 0.8489113879046691, + "grad_norm": 506.4945983886719, + "learning_rate": 3.0213041279011944e-06, + "loss": 62.9217, + "step": 210120 + }, + { + "epoch": 0.8489517891700368, + "grad_norm": 1035.5145263671875, + "learning_rate": 3.019828449688851e-06, + "loss": 76.4438, + "step": 210130 + }, + { + "epoch": 0.8489921904354044, + "grad_norm": 528.3836669921875, + "learning_rate": 3.018353102513685e-06, + "loss": 39.1607, + "step": 210140 + }, + { + "epoch": 0.849032591700772, + "grad_norm": 826.573974609375, + "learning_rate": 3.0168780864044754e-06, + "loss": 74.3518, + "step": 210150 + }, + { + "epoch": 0.8490729929661397, + "grad_norm": 402.9294738769531, + "learning_rate": 3.0154034013899692e-06, + "loss": 54.3841, + "step": 210160 + }, + { + "epoch": 0.8491133942315073, + "grad_norm": 1054.663818359375, + "learning_rate": 3.0139290474989156e-06, + "loss": 80.621, + "step": 210170 + }, + { + "epoch": 0.849153795496875, + "grad_norm": 492.2187805175781, + "learning_rate": 3.012455024760068e-06, + "loss": 63.1263, + "step": 210180 + }, + { + "epoch": 0.8491941967622426, + "grad_norm": 811.7714233398438, + "learning_rate": 3.0109813332021474e-06, + "loss": 45.1416, + "step": 210190 + }, + { + "epoch": 0.8492345980276103, + "grad_norm": 831.8464965820312, + "learning_rate": 3.0095079728538955e-06, + "loss": 71.1601, + "step": 210200 + }, + { + "epoch": 0.8492749992929779, + "grad_norm": 675.057373046875, + "learning_rate": 3.008034943744029e-06, + "loss": 58.6954, + "step": 210210 + }, + { + "epoch": 0.8493154005583455, + "grad_norm": 444.53875732421875, + "learning_rate": 3.006562245901272e-06, + "loss": 66.389, + "step": 210220 + }, + { + "epoch": 0.8493558018237131, + "grad_norm": 610.64208984375, + "learning_rate": 3.0050898793543326e-06, + "loss": 39.7422, + "step": 210230 + }, + { + "epoch": 0.8493962030890807, + "grad_norm": 391.890625, + "learning_rate": 3.003617844131921e-06, + "loss": 68.2742, + "step": 210240 + }, + { + "epoch": 0.8494366043544483, + "grad_norm": 833.98828125, + "learning_rate": 3.002146140262725e-06, + "loss": 51.4428, + "step": 210250 + }, + { + "epoch": 0.849477005619816, + "grad_norm": 611.6705322265625, + "learning_rate": 3.00067476777544e-06, + "loss": 67.6774, + "step": 210260 + }, + { + "epoch": 0.8495174068851836, + "grad_norm": 734.0313110351562, + "learning_rate": 2.9992037266987516e-06, + "loss": 76.2628, + "step": 210270 + }, + { + "epoch": 0.8495578081505513, + "grad_norm": 985.18017578125, + "learning_rate": 2.9977330170613395e-06, + "loss": 76.9275, + "step": 210280 + }, + { + "epoch": 0.8495982094159189, + "grad_norm": 746.89990234375, + "learning_rate": 2.9962626388918827e-06, + "loss": 98.0585, + "step": 210290 + }, + { + "epoch": 0.8496386106812865, + "grad_norm": 535.1170043945312, + "learning_rate": 2.994792592219027e-06, + "loss": 98.1932, + "step": 210300 + }, + { + "epoch": 0.8496790119466542, + "grad_norm": 269.4791259765625, + "learning_rate": 2.993322877071456e-06, + "loss": 65.5149, + "step": 210310 + }, + { + "epoch": 0.8497194132120218, + "grad_norm": 457.9655456542969, + "learning_rate": 2.9918534934778077e-06, + "loss": 59.4835, + "step": 210320 + }, + { + "epoch": 0.8497598144773895, + "grad_norm": 546.13720703125, + "learning_rate": 2.9903844414667295e-06, + "loss": 69.9183, + "step": 210330 + }, + { + "epoch": 0.8498002157427571, + "grad_norm": 759.4251708984375, + "learning_rate": 2.9889157210668653e-06, + "loss": 57.7752, + "step": 210340 + }, + { + "epoch": 0.8498406170081247, + "grad_norm": 393.01617431640625, + "learning_rate": 2.9874473323068455e-06, + "loss": 59.8933, + "step": 210350 + }, + { + "epoch": 0.8498810182734923, + "grad_norm": 565.8057861328125, + "learning_rate": 2.985979275215305e-06, + "loss": 48.9967, + "step": 210360 + }, + { + "epoch": 0.8499214195388599, + "grad_norm": 476.7004089355469, + "learning_rate": 2.9845115498208523e-06, + "loss": 69.0477, + "step": 210370 + }, + { + "epoch": 0.8499618208042276, + "grad_norm": 1163.6187744140625, + "learning_rate": 2.983044156152106e-06, + "loss": 53.1004, + "step": 210380 + }, + { + "epoch": 0.8500022220695952, + "grad_norm": 526.5293579101562, + "learning_rate": 2.9815770942376754e-06, + "loss": 66.1084, + "step": 210390 + }, + { + "epoch": 0.8500426233349628, + "grad_norm": 709.1412963867188, + "learning_rate": 2.980110364106166e-06, + "loss": 69.6691, + "step": 210400 + }, + { + "epoch": 0.8500830246003305, + "grad_norm": 1194.279541015625, + "learning_rate": 2.9786439657861587e-06, + "loss": 69.1793, + "step": 210410 + }, + { + "epoch": 0.8501234258656981, + "grad_norm": 570.5025634765625, + "learning_rate": 2.9771778993062605e-06, + "loss": 59.3821, + "step": 210420 + }, + { + "epoch": 0.8501638271310658, + "grad_norm": 439.6074523925781, + "learning_rate": 2.975712164695037e-06, + "loss": 57.8821, + "step": 210430 + }, + { + "epoch": 0.8502042283964334, + "grad_norm": 899.7724609375, + "learning_rate": 2.97424676198107e-06, + "loss": 45.7406, + "step": 210440 + }, + { + "epoch": 0.850244629661801, + "grad_norm": 1323.18798828125, + "learning_rate": 2.972781691192932e-06, + "loss": 67.3628, + "step": 210450 + }, + { + "epoch": 0.8502850309271687, + "grad_norm": 785.9102172851562, + "learning_rate": 2.9713169523591732e-06, + "loss": 81.4898, + "step": 210460 + }, + { + "epoch": 0.8503254321925363, + "grad_norm": 320.5450439453125, + "learning_rate": 2.969852545508365e-06, + "loss": 68.2714, + "step": 210470 + }, + { + "epoch": 0.850365833457904, + "grad_norm": 483.0361328125, + "learning_rate": 2.9683884706690456e-06, + "loss": 64.7558, + "step": 210480 + }, + { + "epoch": 0.8504062347232715, + "grad_norm": 714.6460571289062, + "learning_rate": 2.96692472786976e-06, + "loss": 81.0499, + "step": 210490 + }, + { + "epoch": 0.8504466359886391, + "grad_norm": 686.2628784179688, + "learning_rate": 2.965461317139047e-06, + "loss": 65.3511, + "step": 210500 + }, + { + "epoch": 0.8504870372540068, + "grad_norm": 141.5917205810547, + "learning_rate": 2.9639982385054365e-06, + "loss": 56.146, + "step": 210510 + }, + { + "epoch": 0.8505274385193744, + "grad_norm": 438.7379150390625, + "learning_rate": 2.962535491997456e-06, + "loss": 86.524, + "step": 210520 + }, + { + "epoch": 0.850567839784742, + "grad_norm": 402.8752136230469, + "learning_rate": 2.9610730776436103e-06, + "loss": 75.1524, + "step": 210530 + }, + { + "epoch": 0.8506082410501097, + "grad_norm": 1478.2071533203125, + "learning_rate": 2.9596109954724197e-06, + "loss": 72.1444, + "step": 210540 + }, + { + "epoch": 0.8506486423154773, + "grad_norm": 509.8668212890625, + "learning_rate": 2.958149245512385e-06, + "loss": 100.3779, + "step": 210550 + }, + { + "epoch": 0.850689043580845, + "grad_norm": 407.91241455078125, + "learning_rate": 2.9566878277920084e-06, + "loss": 59.8139, + "step": 210560 + }, + { + "epoch": 0.8507294448462126, + "grad_norm": 575.3163452148438, + "learning_rate": 2.9552267423397673e-06, + "loss": 75.0929, + "step": 210570 + }, + { + "epoch": 0.8507698461115802, + "grad_norm": 1603.8231201171875, + "learning_rate": 2.9537659891841676e-06, + "loss": 110.1446, + "step": 210580 + }, + { + "epoch": 0.8508102473769479, + "grad_norm": 865.5337524414062, + "learning_rate": 2.9523055683536684e-06, + "loss": 52.9035, + "step": 210590 + }, + { + "epoch": 0.8508506486423155, + "grad_norm": 489.96002197265625, + "learning_rate": 2.9508454798767516e-06, + "loss": 53.1287, + "step": 210600 + }, + { + "epoch": 0.8508910499076832, + "grad_norm": 406.34820556640625, + "learning_rate": 2.9493857237818792e-06, + "loss": 59.7023, + "step": 210610 + }, + { + "epoch": 0.8509314511730507, + "grad_norm": 1407.731201171875, + "learning_rate": 2.9479263000975078e-06, + "loss": 58.5641, + "step": 210620 + }, + { + "epoch": 0.8509718524384183, + "grad_norm": 809.0394287109375, + "learning_rate": 2.9464672088520996e-06, + "loss": 77.9141, + "step": 210630 + }, + { + "epoch": 0.851012253703786, + "grad_norm": 984.0232543945312, + "learning_rate": 2.945008450074087e-06, + "loss": 74.9387, + "step": 210640 + }, + { + "epoch": 0.8510526549691536, + "grad_norm": 879.7416381835938, + "learning_rate": 2.9435500237919167e-06, + "loss": 51.562, + "step": 210650 + }, + { + "epoch": 0.8510930562345213, + "grad_norm": 425.63580322265625, + "learning_rate": 2.942091930034019e-06, + "loss": 65.9339, + "step": 210660 + }, + { + "epoch": 0.8511334574998889, + "grad_norm": 1188.41845703125, + "learning_rate": 2.9406341688288197e-06, + "loss": 87.1894, + "step": 210670 + }, + { + "epoch": 0.8511738587652565, + "grad_norm": 603.1038208007812, + "learning_rate": 2.939176740204741e-06, + "loss": 63.7787, + "step": 210680 + }, + { + "epoch": 0.8512142600306242, + "grad_norm": 784.6964721679688, + "learning_rate": 2.9377196441902025e-06, + "loss": 66.6404, + "step": 210690 + }, + { + "epoch": 0.8512546612959918, + "grad_norm": 608.3253173828125, + "learning_rate": 2.936262880813596e-06, + "loss": 70.4195, + "step": 210700 + }, + { + "epoch": 0.8512950625613595, + "grad_norm": 768.5592651367188, + "learning_rate": 2.9348064501033293e-06, + "loss": 77.1009, + "step": 210710 + }, + { + "epoch": 0.8513354638267271, + "grad_norm": 577.6045532226562, + "learning_rate": 2.9333503520878026e-06, + "loss": 119.1697, + "step": 210720 + }, + { + "epoch": 0.8513758650920947, + "grad_norm": 698.369140625, + "learning_rate": 2.9318945867953875e-06, + "loss": 60.5559, + "step": 210730 + }, + { + "epoch": 0.8514162663574624, + "grad_norm": 284.7377014160156, + "learning_rate": 2.9304391542544854e-06, + "loss": 106.2629, + "step": 210740 + }, + { + "epoch": 0.8514566676228299, + "grad_norm": 936.4934692382812, + "learning_rate": 2.928984054493449e-06, + "loss": 84.6785, + "step": 210750 + }, + { + "epoch": 0.8514970688881975, + "grad_norm": 350.8067932128906, + "learning_rate": 2.927529287540667e-06, + "loss": 82.8383, + "step": 210760 + }, + { + "epoch": 0.8515374701535652, + "grad_norm": 768.6255493164062, + "learning_rate": 2.9260748534244876e-06, + "loss": 69.3574, + "step": 210770 + }, + { + "epoch": 0.8515778714189328, + "grad_norm": 576.8447265625, + "learning_rate": 2.9246207521732684e-06, + "loss": 74.7007, + "step": 210780 + }, + { + "epoch": 0.8516182726843005, + "grad_norm": 1460.17138671875, + "learning_rate": 2.923166983815362e-06, + "loss": 66.3684, + "step": 210790 + }, + { + "epoch": 0.8516586739496681, + "grad_norm": 731.5225830078125, + "learning_rate": 2.9217135483790993e-06, + "loss": 70.8186, + "step": 210800 + }, + { + "epoch": 0.8516990752150357, + "grad_norm": 487.74407958984375, + "learning_rate": 2.920260445892831e-06, + "loss": 70.5723, + "step": 210810 + }, + { + "epoch": 0.8517394764804034, + "grad_norm": 1085.71826171875, + "learning_rate": 2.918807676384876e-06, + "loss": 68.1555, + "step": 210820 + }, + { + "epoch": 0.851779877745771, + "grad_norm": 672.2816772460938, + "learning_rate": 2.9173552398835567e-06, + "loss": 61.062, + "step": 210830 + }, + { + "epoch": 0.8518202790111387, + "grad_norm": 504.22161865234375, + "learning_rate": 2.9159031364171953e-06, + "loss": 82.25, + "step": 210840 + }, + { + "epoch": 0.8518606802765063, + "grad_norm": 774.2008056640625, + "learning_rate": 2.9144513660140995e-06, + "loss": 61.7128, + "step": 210850 + }, + { + "epoch": 0.8519010815418739, + "grad_norm": 644.2448120117188, + "learning_rate": 2.912999928702567e-06, + "loss": 48.4258, + "step": 210860 + }, + { + "epoch": 0.8519414828072415, + "grad_norm": 961.9378662109375, + "learning_rate": 2.911548824510899e-06, + "loss": 85.4306, + "step": 210870 + }, + { + "epoch": 0.8519818840726091, + "grad_norm": 642.962646484375, + "learning_rate": 2.910098053467383e-06, + "loss": 87.7441, + "step": 210880 + }, + { + "epoch": 0.8520222853379767, + "grad_norm": 717.9309692382812, + "learning_rate": 2.908647615600304e-06, + "loss": 52.9915, + "step": 210890 + }, + { + "epoch": 0.8520626866033444, + "grad_norm": 753.1732177734375, + "learning_rate": 2.9071975109379424e-06, + "loss": 79.4592, + "step": 210900 + }, + { + "epoch": 0.852103087868712, + "grad_norm": 755.7115478515625, + "learning_rate": 2.9057477395085578e-06, + "loss": 97.186, + "step": 210910 + }, + { + "epoch": 0.8521434891340797, + "grad_norm": 531.7388916015625, + "learning_rate": 2.9042983013404314e-06, + "loss": 68.9213, + "step": 210920 + }, + { + "epoch": 0.8521838903994473, + "grad_norm": 2065.753173828125, + "learning_rate": 2.9028491964618055e-06, + "loss": 84.1364, + "step": 210930 + }, + { + "epoch": 0.852224291664815, + "grad_norm": 521.0538330078125, + "learning_rate": 2.9014004249009353e-06, + "loss": 60.248, + "step": 210940 + }, + { + "epoch": 0.8522646929301826, + "grad_norm": 397.6254577636719, + "learning_rate": 2.8999519866860695e-06, + "loss": 65.9604, + "step": 210950 + }, + { + "epoch": 0.8523050941955502, + "grad_norm": 699.9982299804688, + "learning_rate": 2.8985038818454425e-06, + "loss": 50.0175, + "step": 210960 + }, + { + "epoch": 0.8523454954609179, + "grad_norm": 1121.232421875, + "learning_rate": 2.8970561104072903e-06, + "loss": 72.4288, + "step": 210970 + }, + { + "epoch": 0.8523858967262855, + "grad_norm": 498.86907958984375, + "learning_rate": 2.89560867239983e-06, + "loss": 61.1535, + "step": 210980 + }, + { + "epoch": 0.8524262979916531, + "grad_norm": 233.2322540283203, + "learning_rate": 2.8941615678512857e-06, + "loss": 44.859, + "step": 210990 + }, + { + "epoch": 0.8524666992570207, + "grad_norm": 671.632080078125, + "learning_rate": 2.892714796789868e-06, + "loss": 91.0819, + "step": 211000 + }, + { + "epoch": 0.8525071005223883, + "grad_norm": 384.2439270019531, + "learning_rate": 2.891268359243786e-06, + "loss": 74.8801, + "step": 211010 + }, + { + "epoch": 0.852547501787756, + "grad_norm": 535.1035766601562, + "learning_rate": 2.889822255241228e-06, + "loss": 63.9547, + "step": 211020 + }, + { + "epoch": 0.8525879030531236, + "grad_norm": 696.11328125, + "learning_rate": 2.888376484810402e-06, + "loss": 40.5266, + "step": 211030 + }, + { + "epoch": 0.8526283043184912, + "grad_norm": 655.9032592773438, + "learning_rate": 2.886931047979482e-06, + "loss": 49.6726, + "step": 211040 + }, + { + "epoch": 0.8526687055838589, + "grad_norm": 787.1405639648438, + "learning_rate": 2.8854859447766513e-06, + "loss": 61.2599, + "step": 211050 + }, + { + "epoch": 0.8527091068492265, + "grad_norm": 477.2098388671875, + "learning_rate": 2.88404117523009e-06, + "loss": 45.8865, + "step": 211060 + }, + { + "epoch": 0.8527495081145942, + "grad_norm": 1013.1619873046875, + "learning_rate": 2.8825967393679467e-06, + "loss": 82.0028, + "step": 211070 + }, + { + "epoch": 0.8527899093799618, + "grad_norm": 662.4928588867188, + "learning_rate": 2.8811526372184007e-06, + "loss": 74.8367, + "step": 211080 + }, + { + "epoch": 0.8528303106453294, + "grad_norm": 894.12158203125, + "learning_rate": 2.8797088688095942e-06, + "loss": 101.3774, + "step": 211090 + }, + { + "epoch": 0.8528707119106971, + "grad_norm": 683.4735107421875, + "learning_rate": 2.878265434169678e-06, + "loss": 63.3621, + "step": 211100 + }, + { + "epoch": 0.8529111131760647, + "grad_norm": 424.0350036621094, + "learning_rate": 2.876822333326792e-06, + "loss": 43.4446, + "step": 211110 + }, + { + "epoch": 0.8529515144414324, + "grad_norm": 591.7438354492188, + "learning_rate": 2.875379566309069e-06, + "loss": 49.1222, + "step": 211120 + }, + { + "epoch": 0.8529919157067999, + "grad_norm": 653.3576049804688, + "learning_rate": 2.873937133144644e-06, + "loss": 62.5558, + "step": 211130 + }, + { + "epoch": 0.8530323169721675, + "grad_norm": 881.9593505859375, + "learning_rate": 2.8724950338616266e-06, + "loss": 38.4239, + "step": 211140 + }, + { + "epoch": 0.8530727182375352, + "grad_norm": 680.30322265625, + "learning_rate": 2.8710532684881356e-06, + "loss": 65.7709, + "step": 211150 + }, + { + "epoch": 0.8531131195029028, + "grad_norm": 1005.051025390625, + "learning_rate": 2.86961183705228e-06, + "loss": 100.5303, + "step": 211160 + }, + { + "epoch": 0.8531535207682704, + "grad_norm": 515.0177612304688, + "learning_rate": 2.8681707395821634e-06, + "loss": 78.5797, + "step": 211170 + }, + { + "epoch": 0.8531939220336381, + "grad_norm": 1573.47802734375, + "learning_rate": 2.866729976105873e-06, + "loss": 76.1813, + "step": 211180 + }, + { + "epoch": 0.8532343232990057, + "grad_norm": 828.0487670898438, + "learning_rate": 2.8652895466515084e-06, + "loss": 47.1076, + "step": 211190 + }, + { + "epoch": 0.8532747245643734, + "grad_norm": 187.1006317138672, + "learning_rate": 2.8638494512471425e-06, + "loss": 62.8537, + "step": 211200 + }, + { + "epoch": 0.853315125829741, + "grad_norm": 552.9530639648438, + "learning_rate": 2.8624096899208555e-06, + "loss": 49.7504, + "step": 211210 + }, + { + "epoch": 0.8533555270951086, + "grad_norm": 534.5563354492188, + "learning_rate": 2.8609702627007175e-06, + "loss": 78.5661, + "step": 211220 + }, + { + "epoch": 0.8533959283604763, + "grad_norm": 324.0417175292969, + "learning_rate": 2.8595311696147776e-06, + "loss": 61.7479, + "step": 211230 + }, + { + "epoch": 0.8534363296258439, + "grad_norm": 943.4789428710938, + "learning_rate": 2.8580924106911132e-06, + "loss": 80.4259, + "step": 211240 + }, + { + "epoch": 0.8534767308912116, + "grad_norm": 735.6602783203125, + "learning_rate": 2.8566539859577558e-06, + "loss": 53.4011, + "step": 211250 + }, + { + "epoch": 0.8535171321565791, + "grad_norm": 847.1959228515625, + "learning_rate": 2.8552158954427576e-06, + "loss": 74.8833, + "step": 211260 + }, + { + "epoch": 0.8535575334219467, + "grad_norm": 889.412841796875, + "learning_rate": 2.8537781391741505e-06, + "loss": 81.7834, + "step": 211270 + }, + { + "epoch": 0.8535979346873144, + "grad_norm": 309.37872314453125, + "learning_rate": 2.8523407171799665e-06, + "loss": 44.2704, + "step": 211280 + }, + { + "epoch": 0.853638335952682, + "grad_norm": 242.2577362060547, + "learning_rate": 2.8509036294882285e-06, + "loss": 55.7702, + "step": 211290 + }, + { + "epoch": 0.8536787372180497, + "grad_norm": 445.3377380371094, + "learning_rate": 2.8494668761269585e-06, + "loss": 100.2872, + "step": 211300 + }, + { + "epoch": 0.8537191384834173, + "grad_norm": 586.1709594726562, + "learning_rate": 2.848030457124156e-06, + "loss": 79.2647, + "step": 211310 + }, + { + "epoch": 0.8537595397487849, + "grad_norm": 1257.5870361328125, + "learning_rate": 2.846594372507829e-06, + "loss": 59.5682, + "step": 211320 + }, + { + "epoch": 0.8537999410141526, + "grad_norm": 554.2776489257812, + "learning_rate": 2.845158622305977e-06, + "loss": 49.6526, + "step": 211330 + }, + { + "epoch": 0.8538403422795202, + "grad_norm": 637.1689453125, + "learning_rate": 2.843723206546589e-06, + "loss": 56.6917, + "step": 211340 + }, + { + "epoch": 0.8538807435448879, + "grad_norm": 429.1963195800781, + "learning_rate": 2.842288125257657e-06, + "loss": 48.5594, + "step": 211350 + }, + { + "epoch": 0.8539211448102555, + "grad_norm": 904.2242431640625, + "learning_rate": 2.840853378467139e-06, + "loss": 89.3434, + "step": 211360 + }, + { + "epoch": 0.8539615460756231, + "grad_norm": 1699.865478515625, + "learning_rate": 2.839418966203029e-06, + "loss": 61.4251, + "step": 211370 + }, + { + "epoch": 0.8540019473409908, + "grad_norm": 390.3568115234375, + "learning_rate": 2.837984888493277e-06, + "loss": 63.106, + "step": 211380 + }, + { + "epoch": 0.8540423486063583, + "grad_norm": 553.29150390625, + "learning_rate": 2.8365511453658443e-06, + "loss": 64.5539, + "step": 211390 + }, + { + "epoch": 0.8540827498717259, + "grad_norm": 1096.0364990234375, + "learning_rate": 2.8351177368486895e-06, + "loss": 48.1635, + "step": 211400 + }, + { + "epoch": 0.8541231511370936, + "grad_norm": 1075.26220703125, + "learning_rate": 2.83368466296974e-06, + "loss": 68.7906, + "step": 211410 + }, + { + "epoch": 0.8541635524024612, + "grad_norm": 888.93994140625, + "learning_rate": 2.8322519237569567e-06, + "loss": 68.5268, + "step": 211420 + }, + { + "epoch": 0.8542039536678289, + "grad_norm": 835.8689575195312, + "learning_rate": 2.830819519238255e-06, + "loss": 89.9954, + "step": 211430 + }, + { + "epoch": 0.8542443549331965, + "grad_norm": 873.8251953125, + "learning_rate": 2.8293874494415672e-06, + "loss": 65.1059, + "step": 211440 + }, + { + "epoch": 0.8542847561985641, + "grad_norm": 323.7314758300781, + "learning_rate": 2.82795571439481e-06, + "loss": 64.2235, + "step": 211450 + }, + { + "epoch": 0.8543251574639318, + "grad_norm": 774.1565551757812, + "learning_rate": 2.8265243141259024e-06, + "loss": 50.9839, + "step": 211460 + }, + { + "epoch": 0.8543655587292994, + "grad_norm": 471.2906799316406, + "learning_rate": 2.825093248662738e-06, + "loss": 99.9965, + "step": 211470 + }, + { + "epoch": 0.8544059599946671, + "grad_norm": 328.929443359375, + "learning_rate": 2.8236625180332257e-06, + "loss": 63.5256, + "step": 211480 + }, + { + "epoch": 0.8544463612600347, + "grad_norm": 192.4799041748047, + "learning_rate": 2.822232122265254e-06, + "loss": 69.0915, + "step": 211490 + }, + { + "epoch": 0.8544867625254023, + "grad_norm": 927.644775390625, + "learning_rate": 2.8208020613867115e-06, + "loss": 60.2174, + "step": 211500 + }, + { + "epoch": 0.8545271637907699, + "grad_norm": 1796.424560546875, + "learning_rate": 2.819372335425483e-06, + "loss": 92.7467, + "step": 211510 + }, + { + "epoch": 0.8545675650561375, + "grad_norm": 1057.01611328125, + "learning_rate": 2.817942944409424e-06, + "loss": 83.9213, + "step": 211520 + }, + { + "epoch": 0.8546079663215052, + "grad_norm": 489.2111511230469, + "learning_rate": 2.8165138883664256e-06, + "loss": 45.9555, + "step": 211530 + }, + { + "epoch": 0.8546483675868728, + "grad_norm": 1425.2894287109375, + "learning_rate": 2.8150851673243273e-06, + "loss": 62.5798, + "step": 211540 + }, + { + "epoch": 0.8546887688522404, + "grad_norm": 582.3185424804688, + "learning_rate": 2.8136567813109937e-06, + "loss": 66.9228, + "step": 211550 + }, + { + "epoch": 0.8547291701176081, + "grad_norm": 882.193603515625, + "learning_rate": 2.812228730354274e-06, + "loss": 57.3357, + "step": 211560 + }, + { + "epoch": 0.8547695713829757, + "grad_norm": 689.9861450195312, + "learning_rate": 2.8108010144819943e-06, + "loss": 60.2499, + "step": 211570 + }, + { + "epoch": 0.8548099726483434, + "grad_norm": 673.0390014648438, + "learning_rate": 2.809373633722008e-06, + "loss": 48.6293, + "step": 211580 + }, + { + "epoch": 0.854850373913711, + "grad_norm": 1322.0545654296875, + "learning_rate": 2.807946588102126e-06, + "loss": 62.0169, + "step": 211590 + }, + { + "epoch": 0.8548907751790786, + "grad_norm": 1210.6063232421875, + "learning_rate": 2.80651987765018e-06, + "loss": 53.3354, + "step": 211600 + }, + { + "epoch": 0.8549311764444463, + "grad_norm": 373.3837585449219, + "learning_rate": 2.8050935023939783e-06, + "loss": 38.8228, + "step": 211610 + }, + { + "epoch": 0.8549715777098139, + "grad_norm": 645.8424682617188, + "learning_rate": 2.803667462361337e-06, + "loss": 56.2453, + "step": 211620 + }, + { + "epoch": 0.8550119789751816, + "grad_norm": 1225.9747314453125, + "learning_rate": 2.8022417575800463e-06, + "loss": 58.4534, + "step": 211630 + }, + { + "epoch": 0.8550523802405491, + "grad_norm": 585.3136596679688, + "learning_rate": 2.8008163880779072e-06, + "loss": 44.4471, + "step": 211640 + }, + { + "epoch": 0.8550927815059167, + "grad_norm": 689.4835205078125, + "learning_rate": 2.799391353882708e-06, + "loss": 97.301, + "step": 211650 + }, + { + "epoch": 0.8551331827712844, + "grad_norm": 952.3156127929688, + "learning_rate": 2.7979666550222283e-06, + "loss": 51.0105, + "step": 211660 + }, + { + "epoch": 0.855173584036652, + "grad_norm": 753.5968627929688, + "learning_rate": 2.796542291524249e-06, + "loss": 69.4401, + "step": 211670 + }, + { + "epoch": 0.8552139853020196, + "grad_norm": 562.6250610351562, + "learning_rate": 2.7951182634165276e-06, + "loss": 59.202, + "step": 211680 + }, + { + "epoch": 0.8552543865673873, + "grad_norm": 942.421142578125, + "learning_rate": 2.7936945707268415e-06, + "loss": 91.546, + "step": 211690 + }, + { + "epoch": 0.8552947878327549, + "grad_norm": 879.1728515625, + "learning_rate": 2.7922712134829333e-06, + "loss": 88.004, + "step": 211700 + }, + { + "epoch": 0.8553351890981226, + "grad_norm": 256.9096984863281, + "learning_rate": 2.7908481917125585e-06, + "loss": 67.1174, + "step": 211710 + }, + { + "epoch": 0.8553755903634902, + "grad_norm": 786.8397827148438, + "learning_rate": 2.7894255054434573e-06, + "loss": 84.063, + "step": 211720 + }, + { + "epoch": 0.8554159916288578, + "grad_norm": 573.1483154296875, + "learning_rate": 2.7880031547033648e-06, + "loss": 57.9122, + "step": 211730 + }, + { + "epoch": 0.8554563928942255, + "grad_norm": 573.3084106445312, + "learning_rate": 2.7865811395200173e-06, + "loss": 64.6225, + "step": 211740 + }, + { + "epoch": 0.8554967941595931, + "grad_norm": 577.4299926757812, + "learning_rate": 2.7851594599211297e-06, + "loss": 70.1822, + "step": 211750 + }, + { + "epoch": 0.8555371954249608, + "grad_norm": 761.611572265625, + "learning_rate": 2.78373811593442e-06, + "loss": 54.4052, + "step": 211760 + }, + { + "epoch": 0.8555775966903283, + "grad_norm": 761.0303955078125, + "learning_rate": 2.7823171075876e-06, + "loss": 52.3486, + "step": 211770 + }, + { + "epoch": 0.8556179979556959, + "grad_norm": 623.1323852539062, + "learning_rate": 2.7808964349083754e-06, + "loss": 48.9281, + "step": 211780 + }, + { + "epoch": 0.8556583992210636, + "grad_norm": 1073.128173828125, + "learning_rate": 2.7794760979244317e-06, + "loss": 69.0876, + "step": 211790 + }, + { + "epoch": 0.8556988004864312, + "grad_norm": 548.0878295898438, + "learning_rate": 2.778056096663475e-06, + "loss": 53.2816, + "step": 211800 + }, + { + "epoch": 0.8557392017517989, + "grad_norm": 285.0952453613281, + "learning_rate": 2.776636431153179e-06, + "loss": 74.8867, + "step": 211810 + }, + { + "epoch": 0.8557796030171665, + "grad_norm": 1085.22998046875, + "learning_rate": 2.7752171014212217e-06, + "loss": 93.5222, + "step": 211820 + }, + { + "epoch": 0.8558200042825341, + "grad_norm": 1009.8582763671875, + "learning_rate": 2.773798107495278e-06, + "loss": 74.9784, + "step": 211830 + }, + { + "epoch": 0.8558604055479018, + "grad_norm": 1166.1810302734375, + "learning_rate": 2.772379449403002e-06, + "loss": 60.2426, + "step": 211840 + }, + { + "epoch": 0.8559008068132694, + "grad_norm": 925.8228149414062, + "learning_rate": 2.770961127172067e-06, + "loss": 63.4001, + "step": 211850 + }, + { + "epoch": 0.855941208078637, + "grad_norm": 703.3089599609375, + "learning_rate": 2.7695431408301043e-06, + "loss": 66.9852, + "step": 211860 + }, + { + "epoch": 0.8559816093440047, + "grad_norm": 447.46368408203125, + "learning_rate": 2.768125490404778e-06, + "loss": 58.6075, + "step": 211870 + }, + { + "epoch": 0.8560220106093723, + "grad_norm": 761.7327880859375, + "learning_rate": 2.7667081759237134e-06, + "loss": 74.1438, + "step": 211880 + }, + { + "epoch": 0.85606241187474, + "grad_norm": 993.9388427734375, + "learning_rate": 2.7652911974145457e-06, + "loss": 70.411, + "step": 211890 + }, + { + "epoch": 0.8561028131401075, + "grad_norm": 698.4833984375, + "learning_rate": 2.763874554904902e-06, + "loss": 42.6253, + "step": 211900 + }, + { + "epoch": 0.8561432144054751, + "grad_norm": 1039.7340087890625, + "learning_rate": 2.762458248422395e-06, + "loss": 64.5293, + "step": 211910 + }, + { + "epoch": 0.8561836156708428, + "grad_norm": 774.6480712890625, + "learning_rate": 2.7610422779946368e-06, + "loss": 69.4962, + "step": 211920 + }, + { + "epoch": 0.8562240169362104, + "grad_norm": 565.7041625976562, + "learning_rate": 2.759626643649236e-06, + "loss": 57.1356, + "step": 211930 + }, + { + "epoch": 0.8562644182015781, + "grad_norm": 330.170654296875, + "learning_rate": 2.7582113454137905e-06, + "loss": 51.9476, + "step": 211940 + }, + { + "epoch": 0.8563048194669457, + "grad_norm": 871.0545043945312, + "learning_rate": 2.7567963833158896e-06, + "loss": 47.1018, + "step": 211950 + }, + { + "epoch": 0.8563452207323133, + "grad_norm": 1129.9847412109375, + "learning_rate": 2.7553817573831267e-06, + "loss": 90.6789, + "step": 211960 + }, + { + "epoch": 0.856385621997681, + "grad_norm": 653.4608154296875, + "learning_rate": 2.7539674676430683e-06, + "loss": 61.4198, + "step": 211970 + }, + { + "epoch": 0.8564260232630486, + "grad_norm": 683.3692016601562, + "learning_rate": 2.7525535141232927e-06, + "loss": 53.3279, + "step": 211980 + }, + { + "epoch": 0.8564664245284163, + "grad_norm": 429.3038635253906, + "learning_rate": 2.751139896851369e-06, + "loss": 53.4961, + "step": 211990 + }, + { + "epoch": 0.8565068257937839, + "grad_norm": 1023.3428344726562, + "learning_rate": 2.749726615854851e-06, + "loss": 77.5327, + "step": 212000 + }, + { + "epoch": 0.8565472270591515, + "grad_norm": 1891.357666015625, + "learning_rate": 2.7483136711612977e-06, + "loss": 112.2706, + "step": 212010 + }, + { + "epoch": 0.8565876283245192, + "grad_norm": 669.115234375, + "learning_rate": 2.746901062798244e-06, + "loss": 61.6489, + "step": 212020 + }, + { + "epoch": 0.8566280295898867, + "grad_norm": 685.6423950195312, + "learning_rate": 2.7454887907932447e-06, + "loss": 82.2429, + "step": 212030 + }, + { + "epoch": 0.8566684308552543, + "grad_norm": 630.857666015625, + "learning_rate": 2.7440768551738206e-06, + "loss": 68.659, + "step": 212040 + }, + { + "epoch": 0.856708832120622, + "grad_norm": 811.7713623046875, + "learning_rate": 2.7426652559675004e-06, + "loss": 47.7467, + "step": 212050 + }, + { + "epoch": 0.8567492333859896, + "grad_norm": 708.0504150390625, + "learning_rate": 2.7412539932018066e-06, + "loss": 59.0338, + "step": 212060 + }, + { + "epoch": 0.8567896346513573, + "grad_norm": 668.6560668945312, + "learning_rate": 2.7398430669042574e-06, + "loss": 66.2249, + "step": 212070 + }, + { + "epoch": 0.8568300359167249, + "grad_norm": 429.218505859375, + "learning_rate": 2.7384324771023486e-06, + "loss": 109.7741, + "step": 212080 + }, + { + "epoch": 0.8568704371820925, + "grad_norm": 535.3216552734375, + "learning_rate": 2.7370222238235846e-06, + "loss": 55.8309, + "step": 212090 + }, + { + "epoch": 0.8569108384474602, + "grad_norm": 1132.902099609375, + "learning_rate": 2.735612307095461e-06, + "loss": 67.7337, + "step": 212100 + }, + { + "epoch": 0.8569512397128278, + "grad_norm": 849.5535278320312, + "learning_rate": 2.7342027269454628e-06, + "loss": 77.8198, + "step": 212110 + }, + { + "epoch": 0.8569916409781955, + "grad_norm": 414.78240966796875, + "learning_rate": 2.7327934834010774e-06, + "loss": 63.9285, + "step": 212120 + }, + { + "epoch": 0.8570320422435631, + "grad_norm": 430.80291748046875, + "learning_rate": 2.731384576489762e-06, + "loss": 50.9511, + "step": 212130 + }, + { + "epoch": 0.8570724435089307, + "grad_norm": 918.1773681640625, + "learning_rate": 2.7299760062390057e-06, + "loss": 50.0876, + "step": 212140 + }, + { + "epoch": 0.8571128447742983, + "grad_norm": 550.6603393554688, + "learning_rate": 2.7285677726762516e-06, + "loss": 36.6575, + "step": 212150 + }, + { + "epoch": 0.8571532460396659, + "grad_norm": 886.1094360351562, + "learning_rate": 2.727159875828962e-06, + "loss": 52.8744, + "step": 212160 + }, + { + "epoch": 0.8571936473050336, + "grad_norm": 1043.1376953125, + "learning_rate": 2.725752315724588e-06, + "loss": 53.8178, + "step": 212170 + }, + { + "epoch": 0.8572340485704012, + "grad_norm": 1680.5909423828125, + "learning_rate": 2.7243450923905567e-06, + "loss": 39.2423, + "step": 212180 + }, + { + "epoch": 0.8572744498357688, + "grad_norm": 734.1829833984375, + "learning_rate": 2.722938205854322e-06, + "loss": 46.4089, + "step": 212190 + }, + { + "epoch": 0.8573148511011365, + "grad_norm": 420.1744384765625, + "learning_rate": 2.721531656143295e-06, + "loss": 56.8554, + "step": 212200 + }, + { + "epoch": 0.8573552523665041, + "grad_norm": 803.8202514648438, + "learning_rate": 2.7201254432849043e-06, + "loss": 66.1063, + "step": 212210 + }, + { + "epoch": 0.8573956536318718, + "grad_norm": 778.2275390625, + "learning_rate": 2.718719567306567e-06, + "loss": 57.1225, + "step": 212220 + }, + { + "epoch": 0.8574360548972394, + "grad_norm": 754.2857666015625, + "learning_rate": 2.7173140282356914e-06, + "loss": 66.9498, + "step": 212230 + }, + { + "epoch": 0.857476456162607, + "grad_norm": 562.5972900390625, + "learning_rate": 2.7159088260996714e-06, + "loss": 51.5129, + "step": 212240 + }, + { + "epoch": 0.8575168574279747, + "grad_norm": 297.0886535644531, + "learning_rate": 2.7145039609259074e-06, + "loss": 46.8059, + "step": 212250 + }, + { + "epoch": 0.8575572586933423, + "grad_norm": 623.0595703125, + "learning_rate": 2.7130994327417882e-06, + "loss": 70.4995, + "step": 212260 + }, + { + "epoch": 0.85759765995871, + "grad_norm": 540.5470581054688, + "learning_rate": 2.711695241574697e-06, + "loss": 45.5133, + "step": 212270 + }, + { + "epoch": 0.8576380612240775, + "grad_norm": 1013.1070556640625, + "learning_rate": 2.710291387452011e-06, + "loss": 57.4371, + "step": 212280 + }, + { + "epoch": 0.8576784624894451, + "grad_norm": 807.140869140625, + "learning_rate": 2.708887870401087e-06, + "loss": 84.7763, + "step": 212290 + }, + { + "epoch": 0.8577188637548128, + "grad_norm": 541.2731323242188, + "learning_rate": 2.7074846904493045e-06, + "loss": 48.0361, + "step": 212300 + }, + { + "epoch": 0.8577592650201804, + "grad_norm": 659.0614624023438, + "learning_rate": 2.7060818476240046e-06, + "loss": 77.0007, + "step": 212310 + }, + { + "epoch": 0.857799666285548, + "grad_norm": 251.841552734375, + "learning_rate": 2.7046793419525453e-06, + "loss": 94.1148, + "step": 212320 + }, + { + "epoch": 0.8578400675509157, + "grad_norm": 459.16595458984375, + "learning_rate": 2.7032771734622667e-06, + "loss": 74.5386, + "step": 212330 + }, + { + "epoch": 0.8578804688162833, + "grad_norm": 838.6129760742188, + "learning_rate": 2.7018753421805023e-06, + "loss": 58.6387, + "step": 212340 + }, + { + "epoch": 0.857920870081651, + "grad_norm": 583.3873291015625, + "learning_rate": 2.7004738481345883e-06, + "loss": 67.8117, + "step": 212350 + }, + { + "epoch": 0.8579612713470186, + "grad_norm": 734.2080078125, + "learning_rate": 2.6990726913518404e-06, + "loss": 41.5027, + "step": 212360 + }, + { + "epoch": 0.8580016726123862, + "grad_norm": 901.0987548828125, + "learning_rate": 2.6976718718595772e-06, + "loss": 70.1043, + "step": 212370 + }, + { + "epoch": 0.8580420738777539, + "grad_norm": 650.5167236328125, + "learning_rate": 2.696271389685108e-06, + "loss": 86.085, + "step": 212380 + }, + { + "epoch": 0.8580824751431215, + "grad_norm": 868.0001220703125, + "learning_rate": 2.6948712448557367e-06, + "loss": 59.9206, + "step": 212390 + }, + { + "epoch": 0.8581228764084892, + "grad_norm": 518.8601684570312, + "learning_rate": 2.6934714373987604e-06, + "loss": 69.352, + "step": 212400 + }, + { + "epoch": 0.8581632776738567, + "grad_norm": 726.7902221679688, + "learning_rate": 2.692071967341472e-06, + "loss": 44.6865, + "step": 212410 + }, + { + "epoch": 0.8582036789392243, + "grad_norm": 851.2356567382812, + "learning_rate": 2.6906728347111475e-06, + "loss": 72.52, + "step": 212420 + }, + { + "epoch": 0.858244080204592, + "grad_norm": 379.449462890625, + "learning_rate": 2.689274039535066e-06, + "loss": 97.7273, + "step": 212430 + }, + { + "epoch": 0.8582844814699596, + "grad_norm": 766.5020751953125, + "learning_rate": 2.6878755818405065e-06, + "loss": 100.2595, + "step": 212440 + }, + { + "epoch": 0.8583248827353273, + "grad_norm": 747.8497924804688, + "learning_rate": 2.6864774616547153e-06, + "loss": 64.7545, + "step": 212450 + }, + { + "epoch": 0.8583652840006949, + "grad_norm": 774.6596069335938, + "learning_rate": 2.685079679004967e-06, + "loss": 38.0294, + "step": 212460 + }, + { + "epoch": 0.8584056852660625, + "grad_norm": 537.4815063476562, + "learning_rate": 2.6836822339184963e-06, + "loss": 78.1079, + "step": 212470 + }, + { + "epoch": 0.8584460865314302, + "grad_norm": 793.8912353515625, + "learning_rate": 2.682285126422566e-06, + "loss": 45.5652, + "step": 212480 + }, + { + "epoch": 0.8584864877967978, + "grad_norm": 809.8209838867188, + "learning_rate": 2.6808883565443975e-06, + "loss": 44.0672, + "step": 212490 + }, + { + "epoch": 0.8585268890621655, + "grad_norm": 827.0248413085938, + "learning_rate": 2.679491924311226e-06, + "loss": 70.6012, + "step": 212500 + }, + { + "epoch": 0.8585672903275331, + "grad_norm": 207.4359588623047, + "learning_rate": 2.6780958297502826e-06, + "loss": 41.5323, + "step": 212510 + }, + { + "epoch": 0.8586076915929007, + "grad_norm": 739.281005859375, + "learning_rate": 2.676700072888774e-06, + "loss": 55.2231, + "step": 212520 + }, + { + "epoch": 0.8586480928582684, + "grad_norm": 735.2091674804688, + "learning_rate": 2.6753046537539164e-06, + "loss": 57.9511, + "step": 212530 + }, + { + "epoch": 0.8586884941236359, + "grad_norm": 586.7077026367188, + "learning_rate": 2.6739095723729125e-06, + "loss": 49.3772, + "step": 212540 + }, + { + "epoch": 0.8587288953890035, + "grad_norm": 715.5927124023438, + "learning_rate": 2.672514828772963e-06, + "loss": 92.1726, + "step": 212550 + }, + { + "epoch": 0.8587692966543712, + "grad_norm": 926.5059204101562, + "learning_rate": 2.671120422981257e-06, + "loss": 71.9004, + "step": 212560 + }, + { + "epoch": 0.8588096979197388, + "grad_norm": 933.6051635742188, + "learning_rate": 2.6697263550249862e-06, + "loss": 55.4729, + "step": 212570 + }, + { + "epoch": 0.8588500991851065, + "grad_norm": 512.9059448242188, + "learning_rate": 2.668332624931316e-06, + "loss": 78.0183, + "step": 212580 + }, + { + "epoch": 0.8588905004504741, + "grad_norm": 630.1448364257812, + "learning_rate": 2.6669392327274237e-06, + "loss": 90.3436, + "step": 212590 + }, + { + "epoch": 0.8589309017158417, + "grad_norm": 273.853759765625, + "learning_rate": 2.6655461784404768e-06, + "loss": 70.5312, + "step": 212600 + }, + { + "epoch": 0.8589713029812094, + "grad_norm": 391.1388854980469, + "learning_rate": 2.6641534620976297e-06, + "loss": 55.825, + "step": 212610 + }, + { + "epoch": 0.859011704246577, + "grad_norm": 744.1738891601562, + "learning_rate": 2.66276108372604e-06, + "loss": 76.723, + "step": 212620 + }, + { + "epoch": 0.8590521055119447, + "grad_norm": 713.3703002929688, + "learning_rate": 2.661369043352842e-06, + "loss": 95.7345, + "step": 212630 + }, + { + "epoch": 0.8590925067773123, + "grad_norm": 622.8103637695312, + "learning_rate": 2.6599773410051887e-06, + "loss": 52.8624, + "step": 212640 + }, + { + "epoch": 0.8591329080426799, + "grad_norm": 543.0191040039062, + "learning_rate": 2.6585859767101996e-06, + "loss": 54.4064, + "step": 212650 + }, + { + "epoch": 0.8591733093080475, + "grad_norm": 367.5849304199219, + "learning_rate": 2.6571949504950055e-06, + "loss": 82.8393, + "step": 212660 + }, + { + "epoch": 0.8592137105734151, + "grad_norm": 529.8073120117188, + "learning_rate": 2.6558042623867252e-06, + "loss": 56.1177, + "step": 212670 + }, + { + "epoch": 0.8592541118387828, + "grad_norm": 985.4187622070312, + "learning_rate": 2.6544139124124724e-06, + "loss": 54.5068, + "step": 212680 + }, + { + "epoch": 0.8592945131041504, + "grad_norm": 591.6640625, + "learning_rate": 2.6530239005993475e-06, + "loss": 69.7185, + "step": 212690 + }, + { + "epoch": 0.859334914369518, + "grad_norm": 616.4370727539062, + "learning_rate": 2.6516342269744534e-06, + "loss": 88.0003, + "step": 212700 + }, + { + "epoch": 0.8593753156348857, + "grad_norm": 247.0853729248047, + "learning_rate": 2.6502448915648794e-06, + "loss": 80.146, + "step": 212710 + }, + { + "epoch": 0.8594157169002533, + "grad_norm": 654.3287963867188, + "learning_rate": 2.648855894397715e-06, + "loss": 75.0042, + "step": 212720 + }, + { + "epoch": 0.859456118165621, + "grad_norm": 494.9658203125, + "learning_rate": 2.647467235500043e-06, + "loss": 51.8958, + "step": 212730 + }, + { + "epoch": 0.8594965194309886, + "grad_norm": 654.4570922851562, + "learning_rate": 2.6460789148989196e-06, + "loss": 56.1119, + "step": 212740 + }, + { + "epoch": 0.8595369206963562, + "grad_norm": 826.439208984375, + "learning_rate": 2.644690932621434e-06, + "loss": 54.1659, + "step": 212750 + }, + { + "epoch": 0.8595773219617239, + "grad_norm": 684.5174560546875, + "learning_rate": 2.6433032886946274e-06, + "loss": 55.2695, + "step": 212760 + }, + { + "epoch": 0.8596177232270915, + "grad_norm": 451.08746337890625, + "learning_rate": 2.6419159831455598e-06, + "loss": 86.7442, + "step": 212770 + }, + { + "epoch": 0.8596581244924592, + "grad_norm": 1931.4599609375, + "learning_rate": 2.640529016001281e-06, + "loss": 75.8517, + "step": 212780 + }, + { + "epoch": 0.8596985257578267, + "grad_norm": 843.9285278320312, + "learning_rate": 2.6391423872888153e-06, + "loss": 62.5947, + "step": 212790 + }, + { + "epoch": 0.8597389270231943, + "grad_norm": 783.4859619140625, + "learning_rate": 2.6377560970352178e-06, + "loss": 60.9137, + "step": 212800 + }, + { + "epoch": 0.859779328288562, + "grad_norm": 1355.9293212890625, + "learning_rate": 2.6363701452674997e-06, + "loss": 67.8047, + "step": 212810 + }, + { + "epoch": 0.8598197295539296, + "grad_norm": 717.248046875, + "learning_rate": 2.6349845320126856e-06, + "loss": 80.3834, + "step": 212820 + }, + { + "epoch": 0.8598601308192972, + "grad_norm": 409.9790344238281, + "learning_rate": 2.6335992572977853e-06, + "loss": 48.8814, + "step": 212830 + }, + { + "epoch": 0.8599005320846649, + "grad_norm": 709.8870239257812, + "learning_rate": 2.6322143211498153e-06, + "loss": 39.8722, + "step": 212840 + }, + { + "epoch": 0.8599409333500325, + "grad_norm": 1002.1025390625, + "learning_rate": 2.6308297235957625e-06, + "loss": 71.0381, + "step": 212850 + }, + { + "epoch": 0.8599813346154002, + "grad_norm": 642.5049438476562, + "learning_rate": 2.629445464662628e-06, + "loss": 62.423, + "step": 212860 + }, + { + "epoch": 0.8600217358807678, + "grad_norm": 787.6592407226562, + "learning_rate": 2.628061544377396e-06, + "loss": 72.5479, + "step": 212870 + }, + { + "epoch": 0.8600621371461354, + "grad_norm": 402.4248352050781, + "learning_rate": 2.626677962767048e-06, + "loss": 53.8744, + "step": 212880 + }, + { + "epoch": 0.8601025384115031, + "grad_norm": 564.3341674804688, + "learning_rate": 2.625294719858562e-06, + "loss": 84.5758, + "step": 212890 + }, + { + "epoch": 0.8601429396768707, + "grad_norm": 502.6056213378906, + "learning_rate": 2.6239118156788924e-06, + "loss": 57.8304, + "step": 212900 + }, + { + "epoch": 0.8601833409422384, + "grad_norm": 474.8277587890625, + "learning_rate": 2.622529250255017e-06, + "loss": 67.0613, + "step": 212910 + }, + { + "epoch": 0.8602237422076059, + "grad_norm": 733.3290405273438, + "learning_rate": 2.6211470236138746e-06, + "loss": 89.7072, + "step": 212920 + }, + { + "epoch": 0.8602641434729735, + "grad_norm": 595.9534912109375, + "learning_rate": 2.6197651357824193e-06, + "loss": 50.0314, + "step": 212930 + }, + { + "epoch": 0.8603045447383412, + "grad_norm": 262.5852355957031, + "learning_rate": 2.6183835867875896e-06, + "loss": 51.4687, + "step": 212940 + }, + { + "epoch": 0.8603449460037088, + "grad_norm": 404.4411315917969, + "learning_rate": 2.617002376656321e-06, + "loss": 97.2918, + "step": 212950 + }, + { + "epoch": 0.8603853472690765, + "grad_norm": 1019.1380004882812, + "learning_rate": 2.615621505415544e-06, + "loss": 82.0345, + "step": 212960 + }, + { + "epoch": 0.8604257485344441, + "grad_norm": 442.458740234375, + "learning_rate": 2.6142409730921726e-06, + "loss": 60.0228, + "step": 212970 + }, + { + "epoch": 0.8604661497998117, + "grad_norm": 624.9494018554688, + "learning_rate": 2.6128607797131244e-06, + "loss": 51.0893, + "step": 212980 + }, + { + "epoch": 0.8605065510651794, + "grad_norm": 599.1741333007812, + "learning_rate": 2.6114809253053055e-06, + "loss": 50.6376, + "step": 212990 + }, + { + "epoch": 0.860546952330547, + "grad_norm": 434.7032165527344, + "learning_rate": 2.61010140989562e-06, + "loss": 55.7742, + "step": 213000 + }, + { + "epoch": 0.8605873535959147, + "grad_norm": 906.6900634765625, + "learning_rate": 2.6087222335109584e-06, + "loss": 61.7247, + "step": 213010 + }, + { + "epoch": 0.8606277548612823, + "grad_norm": 845.65625, + "learning_rate": 2.607343396178217e-06, + "loss": 62.7022, + "step": 213020 + }, + { + "epoch": 0.8606681561266499, + "grad_norm": 477.52471923828125, + "learning_rate": 2.6059648979242647e-06, + "loss": 41.7173, + "step": 213030 + }, + { + "epoch": 0.8607085573920176, + "grad_norm": 960.2575073242188, + "learning_rate": 2.6045867387759825e-06, + "loss": 54.9021, + "step": 213040 + }, + { + "epoch": 0.8607489586573851, + "grad_norm": 899.6864624023438, + "learning_rate": 2.6032089187602403e-06, + "loss": 78.9325, + "step": 213050 + }, + { + "epoch": 0.8607893599227527, + "grad_norm": 332.3701477050781, + "learning_rate": 2.60183143790389e-06, + "loss": 35.3792, + "step": 213060 + }, + { + "epoch": 0.8608297611881204, + "grad_norm": 689.4830932617188, + "learning_rate": 2.6004542962338007e-06, + "loss": 75.8811, + "step": 213070 + }, + { + "epoch": 0.860870162453488, + "grad_norm": 690.4441528320312, + "learning_rate": 2.5990774937768027e-06, + "loss": 74.8655, + "step": 213080 + }, + { + "epoch": 0.8609105637188557, + "grad_norm": 224.739990234375, + "learning_rate": 2.597701030559758e-06, + "loss": 58.9457, + "step": 213090 + }, + { + "epoch": 0.8609509649842233, + "grad_norm": 564.0609130859375, + "learning_rate": 2.5963249066094863e-06, + "loss": 53.1778, + "step": 213100 + }, + { + "epoch": 0.8609913662495909, + "grad_norm": 1107.72412109375, + "learning_rate": 2.594949121952821e-06, + "loss": 56.2441, + "step": 213110 + }, + { + "epoch": 0.8610317675149586, + "grad_norm": 423.7530822753906, + "learning_rate": 2.5935736766165877e-06, + "loss": 69.3844, + "step": 213120 + }, + { + "epoch": 0.8610721687803262, + "grad_norm": 548.7913818359375, + "learning_rate": 2.592198570627593e-06, + "loss": 80.6637, + "step": 213130 + }, + { + "epoch": 0.8611125700456939, + "grad_norm": 312.2873840332031, + "learning_rate": 2.5908238040126477e-06, + "loss": 58.4978, + "step": 213140 + }, + { + "epoch": 0.8611529713110615, + "grad_norm": 604.404541015625, + "learning_rate": 2.589449376798556e-06, + "loss": 49.1175, + "step": 213150 + }, + { + "epoch": 0.8611933725764291, + "grad_norm": 836.6262817382812, + "learning_rate": 2.588075289012113e-06, + "loss": 53.9465, + "step": 213160 + }, + { + "epoch": 0.8612337738417968, + "grad_norm": 499.5501403808594, + "learning_rate": 2.5867015406801054e-06, + "loss": 66.014, + "step": 213170 + }, + { + "epoch": 0.8612741751071643, + "grad_norm": 465.3304138183594, + "learning_rate": 2.585328131829321e-06, + "loss": 59.7762, + "step": 213180 + }, + { + "epoch": 0.861314576372532, + "grad_norm": 449.5309143066406, + "learning_rate": 2.5839550624865272e-06, + "loss": 65.7894, + "step": 213190 + }, + { + "epoch": 0.8613549776378996, + "grad_norm": 438.50341796875, + "learning_rate": 2.5825823326784936e-06, + "loss": 62.8599, + "step": 213200 + }, + { + "epoch": 0.8613953789032672, + "grad_norm": 409.2837829589844, + "learning_rate": 2.5812099424319904e-06, + "loss": 79.47, + "step": 213210 + }, + { + "epoch": 0.8614357801686349, + "grad_norm": 264.65386962890625, + "learning_rate": 2.5798378917737576e-06, + "loss": 72.0528, + "step": 213220 + }, + { + "epoch": 0.8614761814340025, + "grad_norm": 364.5224304199219, + "learning_rate": 2.5784661807305635e-06, + "loss": 57.8086, + "step": 213230 + }, + { + "epoch": 0.8615165826993701, + "grad_norm": 739.784912109375, + "learning_rate": 2.5770948093291303e-06, + "loss": 70.6686, + "step": 213240 + }, + { + "epoch": 0.8615569839647378, + "grad_norm": 589.4148559570312, + "learning_rate": 2.575723777596213e-06, + "loss": 72.7636, + "step": 213250 + }, + { + "epoch": 0.8615973852301054, + "grad_norm": 736.6611938476562, + "learning_rate": 2.5743530855585274e-06, + "loss": 71.3113, + "step": 213260 + }, + { + "epoch": 0.8616377864954731, + "grad_norm": 729.9423217773438, + "learning_rate": 2.572982733242799e-06, + "loss": 82.8393, + "step": 213270 + }, + { + "epoch": 0.8616781877608407, + "grad_norm": 656.0527954101562, + "learning_rate": 2.5716127206757447e-06, + "loss": 98.2497, + "step": 213280 + }, + { + "epoch": 0.8617185890262083, + "grad_norm": 476.73004150390625, + "learning_rate": 2.570243047884078e-06, + "loss": 53.5177, + "step": 213290 + }, + { + "epoch": 0.8617589902915759, + "grad_norm": 1091.01123046875, + "learning_rate": 2.5688737148944907e-06, + "loss": 64.3082, + "step": 213300 + }, + { + "epoch": 0.8617993915569435, + "grad_norm": 691.090087890625, + "learning_rate": 2.5675047217336848e-06, + "loss": 75.6401, + "step": 213310 + }, + { + "epoch": 0.8618397928223112, + "grad_norm": 594.0259399414062, + "learning_rate": 2.5661360684283508e-06, + "loss": 63.9046, + "step": 213320 + }, + { + "epoch": 0.8618801940876788, + "grad_norm": 886.27734375, + "learning_rate": 2.5647677550051686e-06, + "loss": 70.3753, + "step": 213330 + }, + { + "epoch": 0.8619205953530464, + "grad_norm": 525.9151000976562, + "learning_rate": 2.5633997814908186e-06, + "loss": 109.4198, + "step": 213340 + }, + { + "epoch": 0.8619609966184141, + "grad_norm": 703.6453857421875, + "learning_rate": 2.5620321479119593e-06, + "loss": 56.3496, + "step": 213350 + }, + { + "epoch": 0.8620013978837817, + "grad_norm": 490.38079833984375, + "learning_rate": 2.5606648542952694e-06, + "loss": 60.3769, + "step": 213360 + }, + { + "epoch": 0.8620417991491494, + "grad_norm": 493.4593811035156, + "learning_rate": 2.5592979006673923e-06, + "loss": 68.6555, + "step": 213370 + }, + { + "epoch": 0.862082200414517, + "grad_norm": 609.3057250976562, + "learning_rate": 2.557931287054982e-06, + "loss": 75.8783, + "step": 213380 + }, + { + "epoch": 0.8621226016798846, + "grad_norm": 1173.009033203125, + "learning_rate": 2.5565650134846844e-06, + "loss": 96.7418, + "step": 213390 + }, + { + "epoch": 0.8621630029452523, + "grad_norm": 132.4680938720703, + "learning_rate": 2.555199079983126e-06, + "loss": 53.8957, + "step": 213400 + }, + { + "epoch": 0.8622034042106199, + "grad_norm": 661.011474609375, + "learning_rate": 2.5538334865769486e-06, + "loss": 59.3874, + "step": 213410 + }, + { + "epoch": 0.8622438054759876, + "grad_norm": 1677.81298828125, + "learning_rate": 2.5524682332927663e-06, + "loss": 59.5529, + "step": 213420 + }, + { + "epoch": 0.8622842067413551, + "grad_norm": 1806.615234375, + "learning_rate": 2.5511033201571977e-06, + "loss": 110.4898, + "step": 213430 + }, + { + "epoch": 0.8623246080067227, + "grad_norm": 603.495361328125, + "learning_rate": 2.549738747196855e-06, + "loss": 44.0681, + "step": 213440 + }, + { + "epoch": 0.8623650092720904, + "grad_norm": 606.192626953125, + "learning_rate": 2.5483745144383367e-06, + "loss": 66.8289, + "step": 213450 + }, + { + "epoch": 0.862405410537458, + "grad_norm": 426.07354736328125, + "learning_rate": 2.547010621908248e-06, + "loss": 78.5562, + "step": 213460 + }, + { + "epoch": 0.8624458118028256, + "grad_norm": 857.9183349609375, + "learning_rate": 2.545647069633168e-06, + "loss": 91.447, + "step": 213470 + }, + { + "epoch": 0.8624862130681933, + "grad_norm": 821.9861450195312, + "learning_rate": 2.5442838576396845e-06, + "loss": 80.2705, + "step": 213480 + }, + { + "epoch": 0.8625266143335609, + "grad_norm": 503.3575439453125, + "learning_rate": 2.5429209859543737e-06, + "loss": 60.6429, + "step": 213490 + }, + { + "epoch": 0.8625670155989286, + "grad_norm": 1050.619384765625, + "learning_rate": 2.5415584546038098e-06, + "loss": 87.1681, + "step": 213500 + }, + { + "epoch": 0.8626074168642962, + "grad_norm": 963.6275634765625, + "learning_rate": 2.540196263614545e-06, + "loss": 68.8504, + "step": 213510 + }, + { + "epoch": 0.8626478181296638, + "grad_norm": 515.0169677734375, + "learning_rate": 2.5388344130131493e-06, + "loss": 49.0266, + "step": 213520 + }, + { + "epoch": 0.8626882193950315, + "grad_norm": 480.4032287597656, + "learning_rate": 2.5374729028261633e-06, + "loss": 47.2351, + "step": 213530 + }, + { + "epoch": 0.8627286206603991, + "grad_norm": 773.4510498046875, + "learning_rate": 2.5361117330801334e-06, + "loss": 97.2514, + "step": 213540 + }, + { + "epoch": 0.8627690219257668, + "grad_norm": 653.6296997070312, + "learning_rate": 2.5347509038016016e-06, + "loss": 42.9806, + "step": 213550 + }, + { + "epoch": 0.8628094231911343, + "grad_norm": 984.06396484375, + "learning_rate": 2.5333904150170828e-06, + "loss": 75.7174, + "step": 213560 + }, + { + "epoch": 0.8628498244565019, + "grad_norm": 758.1597900390625, + "learning_rate": 2.532030266753118e-06, + "loss": 77.8595, + "step": 213570 + }, + { + "epoch": 0.8628902257218696, + "grad_norm": 962.9981079101562, + "learning_rate": 2.530670459036213e-06, + "loss": 57.3791, + "step": 213580 + }, + { + "epoch": 0.8629306269872372, + "grad_norm": 1291.114013671875, + "learning_rate": 2.5293109918928814e-06, + "loss": 56.4976, + "step": 213590 + }, + { + "epoch": 0.8629710282526049, + "grad_norm": 720.0390625, + "learning_rate": 2.5279518653496272e-06, + "loss": 74.563, + "step": 213600 + }, + { + "epoch": 0.8630114295179725, + "grad_norm": 713.3284912109375, + "learning_rate": 2.526593079432946e-06, + "loss": 58.0822, + "step": 213610 + }, + { + "epoch": 0.8630518307833401, + "grad_norm": 1111.64794921875, + "learning_rate": 2.5252346341693266e-06, + "loss": 77.1627, + "step": 213620 + }, + { + "epoch": 0.8630922320487078, + "grad_norm": 493.80810546875, + "learning_rate": 2.5238765295852608e-06, + "loss": 73.8149, + "step": 213630 + }, + { + "epoch": 0.8631326333140754, + "grad_norm": 644.5321655273438, + "learning_rate": 2.522518765707216e-06, + "loss": 121.5767, + "step": 213640 + }, + { + "epoch": 0.863173034579443, + "grad_norm": 1311.71630859375, + "learning_rate": 2.5211613425616644e-06, + "loss": 64.7876, + "step": 213650 + }, + { + "epoch": 0.8632134358448107, + "grad_norm": 681.6768798828125, + "learning_rate": 2.519804260175076e-06, + "loss": 54.5151, + "step": 213660 + }, + { + "epoch": 0.8632538371101783, + "grad_norm": 1093.8486328125, + "learning_rate": 2.518447518573894e-06, + "loss": 75.2177, + "step": 213670 + }, + { + "epoch": 0.863294238375546, + "grad_norm": 537.4435424804688, + "learning_rate": 2.517091117784587e-06, + "loss": 51.1548, + "step": 213680 + }, + { + "epoch": 0.8633346396409135, + "grad_norm": 509.9681091308594, + "learning_rate": 2.5157350578335794e-06, + "loss": 69.6127, + "step": 213690 + }, + { + "epoch": 0.8633750409062811, + "grad_norm": 672.5347900390625, + "learning_rate": 2.514379338747328e-06, + "loss": 63.4772, + "step": 213700 + }, + { + "epoch": 0.8634154421716488, + "grad_norm": 1193.46826171875, + "learning_rate": 2.51302396055225e-06, + "loss": 51.9098, + "step": 213710 + }, + { + "epoch": 0.8634558434370164, + "grad_norm": 805.4098510742188, + "learning_rate": 2.5116689232747725e-06, + "loss": 59.694, + "step": 213720 + }, + { + "epoch": 0.8634962447023841, + "grad_norm": 677.0099487304688, + "learning_rate": 2.510314226941317e-06, + "loss": 70.3065, + "step": 213730 + }, + { + "epoch": 0.8635366459677517, + "grad_norm": 677.3787231445312, + "learning_rate": 2.508959871578285e-06, + "loss": 38.5686, + "step": 213740 + }, + { + "epoch": 0.8635770472331193, + "grad_norm": 1131.1002197265625, + "learning_rate": 2.5076058572120855e-06, + "loss": 76.2743, + "step": 213750 + }, + { + "epoch": 0.863617448498487, + "grad_norm": 628.3687133789062, + "learning_rate": 2.5062521838691154e-06, + "loss": 61.3164, + "step": 213760 + }, + { + "epoch": 0.8636578497638546, + "grad_norm": 474.9469299316406, + "learning_rate": 2.5048988515757657e-06, + "loss": 52.5757, + "step": 213770 + }, + { + "epoch": 0.8636982510292223, + "grad_norm": 642.8936767578125, + "learning_rate": 2.5035458603584205e-06, + "loss": 62.1137, + "step": 213780 + }, + { + "epoch": 0.8637386522945899, + "grad_norm": 733.5830078125, + "learning_rate": 2.50219321024346e-06, + "loss": 73.2107, + "step": 213790 + }, + { + "epoch": 0.8637790535599575, + "grad_norm": 776.955322265625, + "learning_rate": 2.500840901257249e-06, + "loss": 63.6862, + "step": 213800 + }, + { + "epoch": 0.8638194548253252, + "grad_norm": 1337.1160888671875, + "learning_rate": 2.4994889334261507e-06, + "loss": 67.9603, + "step": 213810 + }, + { + "epoch": 0.8638598560906927, + "grad_norm": 519.6663818359375, + "learning_rate": 2.4981373067765313e-06, + "loss": 81.8386, + "step": 213820 + }, + { + "epoch": 0.8639002573560604, + "grad_norm": 1115.418701171875, + "learning_rate": 2.496786021334727e-06, + "loss": 52.8369, + "step": 213830 + }, + { + "epoch": 0.863940658621428, + "grad_norm": 786.0202026367188, + "learning_rate": 2.4954350771270998e-06, + "loss": 54.5212, + "step": 213840 + }, + { + "epoch": 0.8639810598867956, + "grad_norm": 842.4290771484375, + "learning_rate": 2.4940844741799676e-06, + "loss": 46.683, + "step": 213850 + }, + { + "epoch": 0.8640214611521633, + "grad_norm": 1898.479736328125, + "learning_rate": 2.492734212519681e-06, + "loss": 82.6025, + "step": 213860 + }, + { + "epoch": 0.8640618624175309, + "grad_norm": 569.0027465820312, + "learning_rate": 2.4913842921725497e-06, + "loss": 58.1221, + "step": 213870 + }, + { + "epoch": 0.8641022636828986, + "grad_norm": 442.1947326660156, + "learning_rate": 2.4900347131648948e-06, + "loss": 56.5011, + "step": 213880 + }, + { + "epoch": 0.8641426649482662, + "grad_norm": 709.284423828125, + "learning_rate": 2.488685475523034e-06, + "loss": 62.3482, + "step": 213890 + }, + { + "epoch": 0.8641830662136338, + "grad_norm": 552.6509399414062, + "learning_rate": 2.4873365792732563e-06, + "loss": 59.2152, + "step": 213900 + }, + { + "epoch": 0.8642234674790015, + "grad_norm": 552.5208129882812, + "learning_rate": 2.4859880244418744e-06, + "loss": 67.953, + "step": 213910 + }, + { + "epoch": 0.8642638687443691, + "grad_norm": 589.4375, + "learning_rate": 2.484639811055172e-06, + "loss": 73.6501, + "step": 213920 + }, + { + "epoch": 0.8643042700097368, + "grad_norm": 612.3671264648438, + "learning_rate": 2.483291939139432e-06, + "loss": 85.3815, + "step": 213930 + }, + { + "epoch": 0.8643446712751043, + "grad_norm": 258.6935729980469, + "learning_rate": 2.481944408720933e-06, + "loss": 54.9246, + "step": 213940 + }, + { + "epoch": 0.8643850725404719, + "grad_norm": 1224.2034912109375, + "learning_rate": 2.480597219825953e-06, + "loss": 62.7683, + "step": 213950 + }, + { + "epoch": 0.8644254738058396, + "grad_norm": 642.9288940429688, + "learning_rate": 2.479250372480739e-06, + "loss": 58.4726, + "step": 213960 + }, + { + "epoch": 0.8644658750712072, + "grad_norm": 1034.157470703125, + "learning_rate": 2.4779038667115706e-06, + "loss": 75.9781, + "step": 213970 + }, + { + "epoch": 0.8645062763365748, + "grad_norm": 929.4231567382812, + "learning_rate": 2.4765577025446797e-06, + "loss": 66.518, + "step": 213980 + }, + { + "epoch": 0.8645466776019425, + "grad_norm": 617.2093505859375, + "learning_rate": 2.475211880006321e-06, + "loss": 45.5596, + "step": 213990 + }, + { + "epoch": 0.8645870788673101, + "grad_norm": 1632.9136962890625, + "learning_rate": 2.473866399122733e-06, + "loss": 87.0063, + "step": 214000 + }, + { + "epoch": 0.8646274801326778, + "grad_norm": 697.252685546875, + "learning_rate": 2.4725212599201333e-06, + "loss": 82.2878, + "step": 214010 + }, + { + "epoch": 0.8646678813980454, + "grad_norm": 535.873046875, + "learning_rate": 2.4711764624247626e-06, + "loss": 47.263, + "step": 214020 + }, + { + "epoch": 0.864708282663413, + "grad_norm": 631.8673095703125, + "learning_rate": 2.469832006662829e-06, + "loss": 60.8686, + "step": 214030 + }, + { + "epoch": 0.8647486839287807, + "grad_norm": 766.1358642578125, + "learning_rate": 2.468487892660545e-06, + "loss": 76.1073, + "step": 214040 + }, + { + "epoch": 0.8647890851941483, + "grad_norm": 531.3762817382812, + "learning_rate": 2.467144120444114e-06, + "loss": 60.6989, + "step": 214050 + }, + { + "epoch": 0.864829486459516, + "grad_norm": 941.3184204101562, + "learning_rate": 2.4658006900397368e-06, + "loss": 76.8706, + "step": 214060 + }, + { + "epoch": 0.8648698877248835, + "grad_norm": 1035.9136962890625, + "learning_rate": 2.4644576014736067e-06, + "loss": 106.0434, + "step": 214070 + }, + { + "epoch": 0.8649102889902511, + "grad_norm": 360.080322265625, + "learning_rate": 2.4631148547718974e-06, + "loss": 49.5587, + "step": 214080 + }, + { + "epoch": 0.8649506902556188, + "grad_norm": 613.396240234375, + "learning_rate": 2.461772449960793e-06, + "loss": 60.519, + "step": 214090 + }, + { + "epoch": 0.8649910915209864, + "grad_norm": 556.5308227539062, + "learning_rate": 2.460430387066466e-06, + "loss": 61.9364, + "step": 214100 + }, + { + "epoch": 0.865031492786354, + "grad_norm": 735.5319213867188, + "learning_rate": 2.4590886661150813e-06, + "loss": 43.9904, + "step": 214110 + }, + { + "epoch": 0.8650718940517217, + "grad_norm": 375.3471374511719, + "learning_rate": 2.457747287132786e-06, + "loss": 47.197, + "step": 214120 + }, + { + "epoch": 0.8651122953170893, + "grad_norm": 599.934326171875, + "learning_rate": 2.4564062501457463e-06, + "loss": 55.3678, + "step": 214130 + }, + { + "epoch": 0.865152696582457, + "grad_norm": 407.7931823730469, + "learning_rate": 2.455065555180094e-06, + "loss": 51.9443, + "step": 214140 + }, + { + "epoch": 0.8651930978478246, + "grad_norm": 867.297119140625, + "learning_rate": 2.4537252022619717e-06, + "loss": 63.2817, + "step": 214150 + }, + { + "epoch": 0.8652334991131923, + "grad_norm": 585.3037719726562, + "learning_rate": 2.4523851914175167e-06, + "loss": 78.4905, + "step": 214160 + }, + { + "epoch": 0.8652739003785599, + "grad_norm": 667.13525390625, + "learning_rate": 2.4510455226728348e-06, + "loss": 56.8015, + "step": 214170 + }, + { + "epoch": 0.8653143016439275, + "grad_norm": 370.436767578125, + "learning_rate": 2.4497061960540645e-06, + "loss": 48.5273, + "step": 214180 + }, + { + "epoch": 0.8653547029092952, + "grad_norm": 1437.2149658203125, + "learning_rate": 2.448367211587306e-06, + "loss": 53.9669, + "step": 214190 + }, + { + "epoch": 0.8653951041746627, + "grad_norm": 651.3876342773438, + "learning_rate": 2.447028569298662e-06, + "loss": 64.343, + "step": 214200 + }, + { + "epoch": 0.8654355054400303, + "grad_norm": 1453.9736328125, + "learning_rate": 2.4456902692142336e-06, + "loss": 61.1977, + "step": 214210 + }, + { + "epoch": 0.865475906705398, + "grad_norm": 581.97216796875, + "learning_rate": 2.44435231136011e-06, + "loss": 67.8365, + "step": 214220 + }, + { + "epoch": 0.8655163079707656, + "grad_norm": 709.2313232421875, + "learning_rate": 2.4430146957623824e-06, + "loss": 61.9854, + "step": 214230 + }, + { + "epoch": 0.8655567092361333, + "grad_norm": 657.964599609375, + "learning_rate": 2.4416774224471172e-06, + "loss": 86.6565, + "step": 214240 + }, + { + "epoch": 0.8655971105015009, + "grad_norm": 441.1000061035156, + "learning_rate": 2.4403404914403896e-06, + "loss": 87.3933, + "step": 214250 + }, + { + "epoch": 0.8656375117668685, + "grad_norm": 722.6664428710938, + "learning_rate": 2.4390039027682668e-06, + "loss": 76.8646, + "step": 214260 + }, + { + "epoch": 0.8656779130322362, + "grad_norm": 826.8425903320312, + "learning_rate": 2.4376676564568057e-06, + "loss": 69.5923, + "step": 214270 + }, + { + "epoch": 0.8657183142976038, + "grad_norm": 847.0662841796875, + "learning_rate": 2.4363317525320462e-06, + "loss": 62.8131, + "step": 214280 + }, + { + "epoch": 0.8657587155629715, + "grad_norm": 610.6043090820312, + "learning_rate": 2.4349961910200516e-06, + "loss": 62.1955, + "step": 214290 + }, + { + "epoch": 0.8657991168283391, + "grad_norm": 464.18951416015625, + "learning_rate": 2.4336609719468453e-06, + "loss": 58.419, + "step": 214300 + }, + { + "epoch": 0.8658395180937067, + "grad_norm": 603.0712280273438, + "learning_rate": 2.432326095338462e-06, + "loss": 61.4302, + "step": 214310 + }, + { + "epoch": 0.8658799193590744, + "grad_norm": 630.1835327148438, + "learning_rate": 2.4309915612209256e-06, + "loss": 49.5413, + "step": 214320 + }, + { + "epoch": 0.8659203206244419, + "grad_norm": 385.4344177246094, + "learning_rate": 2.429657369620255e-06, + "loss": 62.492, + "step": 214330 + }, + { + "epoch": 0.8659607218898095, + "grad_norm": 760.1458129882812, + "learning_rate": 2.4283235205624632e-06, + "loss": 65.518, + "step": 214340 + }, + { + "epoch": 0.8660011231551772, + "grad_norm": 652.802734375, + "learning_rate": 2.4269900140735447e-06, + "loss": 110.9676, + "step": 214350 + }, + { + "epoch": 0.8660415244205448, + "grad_norm": 687.947021484375, + "learning_rate": 2.4256568501795052e-06, + "loss": 79.6452, + "step": 214360 + }, + { + "epoch": 0.8660819256859125, + "grad_norm": 723.8616333007812, + "learning_rate": 2.424324028906333e-06, + "loss": 48.3588, + "step": 214370 + }, + { + "epoch": 0.8661223269512801, + "grad_norm": 857.3963623046875, + "learning_rate": 2.422991550280014e-06, + "loss": 94.5976, + "step": 214380 + }, + { + "epoch": 0.8661627282166477, + "grad_norm": 905.8336181640625, + "learning_rate": 2.4216594143265206e-06, + "loss": 89.9502, + "step": 214390 + }, + { + "epoch": 0.8662031294820154, + "grad_norm": 1069.02294921875, + "learning_rate": 2.420327621071834e-06, + "loss": 81.5599, + "step": 214400 + }, + { + "epoch": 0.866243530747383, + "grad_norm": 1015.7953491210938, + "learning_rate": 2.4189961705419053e-06, + "loss": 109.0808, + "step": 214410 + }, + { + "epoch": 0.8662839320127507, + "grad_norm": 643.4905395507812, + "learning_rate": 2.4176650627626975e-06, + "loss": 65.0978, + "step": 214420 + }, + { + "epoch": 0.8663243332781183, + "grad_norm": 498.3371887207031, + "learning_rate": 2.416334297760161e-06, + "loss": 46.9656, + "step": 214430 + }, + { + "epoch": 0.866364734543486, + "grad_norm": 710.1370239257812, + "learning_rate": 2.4150038755602423e-06, + "loss": 68.6118, + "step": 214440 + }, + { + "epoch": 0.8664051358088536, + "grad_norm": 946.150146484375, + "learning_rate": 2.413673796188878e-06, + "loss": 60.6937, + "step": 214450 + }, + { + "epoch": 0.8664455370742211, + "grad_norm": 712.5261840820312, + "learning_rate": 2.4123440596719894e-06, + "loss": 85.866, + "step": 214460 + }, + { + "epoch": 0.8664859383395888, + "grad_norm": 393.5907897949219, + "learning_rate": 2.411014666035518e-06, + "loss": 62.7832, + "step": 214470 + }, + { + "epoch": 0.8665263396049564, + "grad_norm": 634.47509765625, + "learning_rate": 2.409685615305366e-06, + "loss": 74.7885, + "step": 214480 + }, + { + "epoch": 0.866566740870324, + "grad_norm": 543.7593383789062, + "learning_rate": 2.4083569075074474e-06, + "loss": 61.1061, + "step": 214490 + }, + { + "epoch": 0.8666071421356917, + "grad_norm": 863.0595703125, + "learning_rate": 2.4070285426676733e-06, + "loss": 76.9623, + "step": 214500 + }, + { + "epoch": 0.8666475434010593, + "grad_norm": 1022.35546875, + "learning_rate": 2.4057005208119267e-06, + "loss": 73.3154, + "step": 214510 + }, + { + "epoch": 0.866687944666427, + "grad_norm": 569.7796020507812, + "learning_rate": 2.404372841966116e-06, + "loss": 63.7508, + "step": 214520 + }, + { + "epoch": 0.8667283459317946, + "grad_norm": 689.01123046875, + "learning_rate": 2.4030455061561097e-06, + "loss": 85.6918, + "step": 214530 + }, + { + "epoch": 0.8667687471971622, + "grad_norm": 670.8472290039062, + "learning_rate": 2.4017185134077912e-06, + "loss": 43.7613, + "step": 214540 + }, + { + "epoch": 0.8668091484625299, + "grad_norm": 1268.99462890625, + "learning_rate": 2.4003918637470315e-06, + "loss": 86.4378, + "step": 214550 + }, + { + "epoch": 0.8668495497278975, + "grad_norm": 582.9794311523438, + "learning_rate": 2.3990655571996956e-06, + "loss": 78.5276, + "step": 214560 + }, + { + "epoch": 0.8668899509932652, + "grad_norm": 389.926025390625, + "learning_rate": 2.397739593791637e-06, + "loss": 54.5638, + "step": 214570 + }, + { + "epoch": 0.8669303522586327, + "grad_norm": 1057.823486328125, + "learning_rate": 2.3964139735487056e-06, + "loss": 72.4601, + "step": 214580 + }, + { + "epoch": 0.8669707535240003, + "grad_norm": 744.4671630859375, + "learning_rate": 2.3950886964967457e-06, + "loss": 76.1384, + "step": 214590 + }, + { + "epoch": 0.867011154789368, + "grad_norm": 657.7601928710938, + "learning_rate": 2.393763762661596e-06, + "loss": 41.6329, + "step": 214600 + }, + { + "epoch": 0.8670515560547356, + "grad_norm": 584.9659423828125, + "learning_rate": 2.39243917206909e-06, + "loss": 54.0219, + "step": 214610 + }, + { + "epoch": 0.8670919573201032, + "grad_norm": 1131.398193359375, + "learning_rate": 2.3911149247450394e-06, + "loss": 86.2661, + "step": 214620 + }, + { + "epoch": 0.8671323585854709, + "grad_norm": 922.1690063476562, + "learning_rate": 2.3897910207152774e-06, + "loss": 49.4731, + "step": 214630 + }, + { + "epoch": 0.8671727598508385, + "grad_norm": 561.60986328125, + "learning_rate": 2.3884674600056033e-06, + "loss": 67.3732, + "step": 214640 + }, + { + "epoch": 0.8672131611162062, + "grad_norm": 466.9993896484375, + "learning_rate": 2.387144242641821e-06, + "loss": 46.2435, + "step": 214650 + }, + { + "epoch": 0.8672535623815738, + "grad_norm": 683.8718872070312, + "learning_rate": 2.38582136864973e-06, + "loss": 67.3647, + "step": 214660 + }, + { + "epoch": 0.8672939636469414, + "grad_norm": 616.3511352539062, + "learning_rate": 2.384498838055118e-06, + "loss": 50.5326, + "step": 214670 + }, + { + "epoch": 0.8673343649123091, + "grad_norm": 845.4418334960938, + "learning_rate": 2.383176650883776e-06, + "loss": 80.5816, + "step": 214680 + }, + { + "epoch": 0.8673747661776767, + "grad_norm": 329.435302734375, + "learning_rate": 2.381854807161472e-06, + "loss": 38.9608, + "step": 214690 + }, + { + "epoch": 0.8674151674430444, + "grad_norm": 590.8565063476562, + "learning_rate": 2.3805333069139767e-06, + "loss": 38.4877, + "step": 214700 + }, + { + "epoch": 0.8674555687084119, + "grad_norm": 890.7872924804688, + "learning_rate": 2.3792121501670585e-06, + "loss": 61.0311, + "step": 214710 + }, + { + "epoch": 0.8674959699737795, + "grad_norm": 587.3173828125, + "learning_rate": 2.3778913369464717e-06, + "loss": 54.6149, + "step": 214720 + }, + { + "epoch": 0.8675363712391472, + "grad_norm": 779.9354248046875, + "learning_rate": 2.3765708672779608e-06, + "loss": 56.6354, + "step": 214730 + }, + { + "epoch": 0.8675767725045148, + "grad_norm": 688.2056274414062, + "learning_rate": 2.3752507411872806e-06, + "loss": 61.7261, + "step": 214740 + }, + { + "epoch": 0.8676171737698825, + "grad_norm": 619.2838134765625, + "learning_rate": 2.3739309587001567e-06, + "loss": 71.9245, + "step": 214750 + }, + { + "epoch": 0.8676575750352501, + "grad_norm": 484.7203369140625, + "learning_rate": 2.372611519842325e-06, + "loss": 65.934, + "step": 214760 + }, + { + "epoch": 0.8676979763006177, + "grad_norm": 1405.2235107421875, + "learning_rate": 2.3712924246395087e-06, + "loss": 80.7589, + "step": 214770 + }, + { + "epoch": 0.8677383775659854, + "grad_norm": 857.8512573242188, + "learning_rate": 2.369973673117416e-06, + "loss": 58.6313, + "step": 214780 + }, + { + "epoch": 0.867778778831353, + "grad_norm": 787.8434448242188, + "learning_rate": 2.368655265301769e-06, + "loss": 61.3619, + "step": 214790 + }, + { + "epoch": 0.8678191800967207, + "grad_norm": 576.6287841796875, + "learning_rate": 2.3673372012182625e-06, + "loss": 48.6967, + "step": 214800 + }, + { + "epoch": 0.8678595813620883, + "grad_norm": 817.066162109375, + "learning_rate": 2.3660194808925964e-06, + "loss": 83.574, + "step": 214810 + }, + { + "epoch": 0.8678999826274559, + "grad_norm": 743.0673828125, + "learning_rate": 2.364702104350458e-06, + "loss": 41.4377, + "step": 214820 + }, + { + "epoch": 0.8679403838928236, + "grad_norm": 700.9609985351562, + "learning_rate": 2.36338507161753e-06, + "loss": 68.763, + "step": 214830 + }, + { + "epoch": 0.8679807851581911, + "grad_norm": 567.9506225585938, + "learning_rate": 2.3620683827194957e-06, + "loss": 58.6302, + "step": 214840 + }, + { + "epoch": 0.8680211864235587, + "grad_norm": 1132.594970703125, + "learning_rate": 2.3607520376820147e-06, + "loss": 81.9053, + "step": 214850 + }, + { + "epoch": 0.8680615876889264, + "grad_norm": 584.0321655273438, + "learning_rate": 2.3594360365307535e-06, + "loss": 59.9398, + "step": 214860 + }, + { + "epoch": 0.868101988954294, + "grad_norm": 586.8623657226562, + "learning_rate": 2.3581203792913686e-06, + "loss": 65.7276, + "step": 214870 + }, + { + "epoch": 0.8681423902196617, + "grad_norm": 470.0870056152344, + "learning_rate": 2.3568050659895137e-06, + "loss": 51.5795, + "step": 214880 + }, + { + "epoch": 0.8681827914850293, + "grad_norm": 907.8560180664062, + "learning_rate": 2.355490096650819e-06, + "loss": 80.0981, + "step": 214890 + }, + { + "epoch": 0.8682231927503969, + "grad_norm": 358.7883605957031, + "learning_rate": 2.3541754713009367e-06, + "loss": 47.0762, + "step": 214900 + }, + { + "epoch": 0.8682635940157646, + "grad_norm": 400.4853820800781, + "learning_rate": 2.352861189965485e-06, + "loss": 39.0077, + "step": 214910 + }, + { + "epoch": 0.8683039952811322, + "grad_norm": 868.1592407226562, + "learning_rate": 2.35154725267009e-06, + "loss": 74.379, + "step": 214920 + }, + { + "epoch": 0.8683443965464999, + "grad_norm": 694.79248046875, + "learning_rate": 2.3502336594403663e-06, + "loss": 41.588, + "step": 214930 + }, + { + "epoch": 0.8683847978118675, + "grad_norm": 615.990478515625, + "learning_rate": 2.3489204103019247e-06, + "loss": 45.5693, + "step": 214940 + }, + { + "epoch": 0.8684251990772351, + "grad_norm": 982.6193237304688, + "learning_rate": 2.3476075052803715e-06, + "loss": 75.6657, + "step": 214950 + }, + { + "epoch": 0.8684656003426028, + "grad_norm": 818.004150390625, + "learning_rate": 2.3462949444012905e-06, + "loss": 73.0347, + "step": 214960 + }, + { + "epoch": 0.8685060016079703, + "grad_norm": 1536.380615234375, + "learning_rate": 2.344982727690286e-06, + "loss": 71.7693, + "step": 214970 + }, + { + "epoch": 0.868546402873338, + "grad_norm": 604.5709228515625, + "learning_rate": 2.343670855172926e-06, + "loss": 69.3901, + "step": 214980 + }, + { + "epoch": 0.8685868041387056, + "grad_norm": 488.80194091796875, + "learning_rate": 2.342359326874797e-06, + "loss": 41.767, + "step": 214990 + }, + { + "epoch": 0.8686272054040732, + "grad_norm": 754.4036865234375, + "learning_rate": 2.3410481428214602e-06, + "loss": 74.5994, + "step": 215000 + }, + { + "epoch": 0.8686676066694409, + "grad_norm": 630.849365234375, + "learning_rate": 2.339737303038487e-06, + "loss": 84.3933, + "step": 215010 + }, + { + "epoch": 0.8687080079348085, + "grad_norm": 599.41748046875, + "learning_rate": 2.338426807551424e-06, + "loss": 59.7481, + "step": 215020 + }, + { + "epoch": 0.8687484092001762, + "grad_norm": 689.2133178710938, + "learning_rate": 2.3371166563858204e-06, + "loss": 57.2279, + "step": 215030 + }, + { + "epoch": 0.8687888104655438, + "grad_norm": 833.7686157226562, + "learning_rate": 2.3358068495672236e-06, + "loss": 53.0576, + "step": 215040 + }, + { + "epoch": 0.8688292117309114, + "grad_norm": 826.0081787109375, + "learning_rate": 2.334497387121166e-06, + "loss": 46.5808, + "step": 215050 + }, + { + "epoch": 0.8688696129962791, + "grad_norm": 848.0030517578125, + "learning_rate": 2.333188269073179e-06, + "loss": 74.1687, + "step": 215060 + }, + { + "epoch": 0.8689100142616467, + "grad_norm": 548.231689453125, + "learning_rate": 2.3318794954487766e-06, + "loss": 87.1363, + "step": 215070 + }, + { + "epoch": 0.8689504155270144, + "grad_norm": 443.6682434082031, + "learning_rate": 2.330571066273486e-06, + "loss": 54.9263, + "step": 215080 + }, + { + "epoch": 0.868990816792382, + "grad_norm": 614.8939208984375, + "learning_rate": 2.329262981572806e-06, + "loss": 69.2552, + "step": 215090 + }, + { + "epoch": 0.8690312180577495, + "grad_norm": 1367.61865234375, + "learning_rate": 2.3279552413722394e-06, + "loss": 85.6544, + "step": 215100 + }, + { + "epoch": 0.8690716193231172, + "grad_norm": 1130.0067138671875, + "learning_rate": 2.3266478456972895e-06, + "loss": 73.4847, + "step": 215110 + }, + { + "epoch": 0.8691120205884848, + "grad_norm": 808.6988525390625, + "learning_rate": 2.325340794573432e-06, + "loss": 71.4893, + "step": 215120 + }, + { + "epoch": 0.8691524218538524, + "grad_norm": 1047.3289794921875, + "learning_rate": 2.3240340880261616e-06, + "loss": 116.8341, + "step": 215130 + }, + { + "epoch": 0.8691928231192201, + "grad_norm": 580.5315551757812, + "learning_rate": 2.3227277260809444e-06, + "loss": 52.9355, + "step": 215140 + }, + { + "epoch": 0.8692332243845877, + "grad_norm": 709.7540893554688, + "learning_rate": 2.3214217087632494e-06, + "loss": 44.9691, + "step": 215150 + }, + { + "epoch": 0.8692736256499554, + "grad_norm": 739.0968017578125, + "learning_rate": 2.3201160360985385e-06, + "loss": 47.3105, + "step": 215160 + }, + { + "epoch": 0.869314026915323, + "grad_norm": 805.6949462890625, + "learning_rate": 2.3188107081122734e-06, + "loss": 47.6578, + "step": 215170 + }, + { + "epoch": 0.8693544281806906, + "grad_norm": 425.768310546875, + "learning_rate": 2.317505724829894e-06, + "loss": 52.6963, + "step": 215180 + }, + { + "epoch": 0.8693948294460583, + "grad_norm": 803.8025512695312, + "learning_rate": 2.3162010862768416e-06, + "loss": 70.4285, + "step": 215190 + }, + { + "epoch": 0.8694352307114259, + "grad_norm": 815.6970825195312, + "learning_rate": 2.3148967924785536e-06, + "loss": 61.3465, + "step": 215200 + }, + { + "epoch": 0.8694756319767936, + "grad_norm": 653.969482421875, + "learning_rate": 2.313592843460459e-06, + "loss": 43.0242, + "step": 215210 + }, + { + "epoch": 0.8695160332421611, + "grad_norm": 839.3866577148438, + "learning_rate": 2.3122892392479802e-06, + "loss": 77.974, + "step": 215220 + }, + { + "epoch": 0.8695564345075287, + "grad_norm": 1517.8165283203125, + "learning_rate": 2.3109859798665224e-06, + "loss": 85.3589, + "step": 215230 + }, + { + "epoch": 0.8695968357728964, + "grad_norm": 293.1151428222656, + "learning_rate": 2.309683065341506e-06, + "loss": 95.6914, + "step": 215240 + }, + { + "epoch": 0.869637237038264, + "grad_norm": 612.3598022460938, + "learning_rate": 2.3083804956983214e-06, + "loss": 81.7249, + "step": 215250 + }, + { + "epoch": 0.8696776383036317, + "grad_norm": 622.033203125, + "learning_rate": 2.3070782709623684e-06, + "loss": 86.3531, + "step": 215260 + }, + { + "epoch": 0.8697180395689993, + "grad_norm": 525.5162963867188, + "learning_rate": 2.3057763911590334e-06, + "loss": 84.0628, + "step": 215270 + }, + { + "epoch": 0.8697584408343669, + "grad_norm": 525.29150390625, + "learning_rate": 2.3044748563136986e-06, + "loss": 45.896, + "step": 215280 + }, + { + "epoch": 0.8697988420997346, + "grad_norm": 544.2476806640625, + "learning_rate": 2.303173666451739e-06, + "loss": 71.7941, + "step": 215290 + }, + { + "epoch": 0.8698392433651022, + "grad_norm": 465.78607177734375, + "learning_rate": 2.3018728215985164e-06, + "loss": 75.8639, + "step": 215300 + }, + { + "epoch": 0.8698796446304699, + "grad_norm": 936.0291748046875, + "learning_rate": 2.300572321779395e-06, + "loss": 59.1492, + "step": 215310 + }, + { + "epoch": 0.8699200458958375, + "grad_norm": 328.4707946777344, + "learning_rate": 2.2992721670197283e-06, + "loss": 41.8098, + "step": 215320 + }, + { + "epoch": 0.8699604471612051, + "grad_norm": 931.3463745117188, + "learning_rate": 2.2979723573448663e-06, + "loss": 83.3553, + "step": 215330 + }, + { + "epoch": 0.8700008484265728, + "grad_norm": 608.3905029296875, + "learning_rate": 2.2966728927801405e-06, + "loss": 65.4767, + "step": 215340 + }, + { + "epoch": 0.8700412496919403, + "grad_norm": 93.29224395751953, + "learning_rate": 2.2953737733508975e-06, + "loss": 87.0009, + "step": 215350 + }, + { + "epoch": 0.8700816509573079, + "grad_norm": 1174.744140625, + "learning_rate": 2.294074999082454e-06, + "loss": 99.9246, + "step": 215360 + }, + { + "epoch": 0.8701220522226756, + "grad_norm": 1206.567138671875, + "learning_rate": 2.292776570000135e-06, + "loss": 77.5266, + "step": 215370 + }, + { + "epoch": 0.8701624534880432, + "grad_norm": 841.7874145507812, + "learning_rate": 2.2914784861292573e-06, + "loss": 39.2411, + "step": 215380 + }, + { + "epoch": 0.8702028547534109, + "grad_norm": 742.867919921875, + "learning_rate": 2.2901807474951143e-06, + "loss": 53.2319, + "step": 215390 + }, + { + "epoch": 0.8702432560187785, + "grad_norm": 629.37890625, + "learning_rate": 2.2888833541230237e-06, + "loss": 45.7605, + "step": 215400 + }, + { + "epoch": 0.8702836572841461, + "grad_norm": 1465.152587890625, + "learning_rate": 2.2875863060382677e-06, + "loss": 52.3812, + "step": 215410 + }, + { + "epoch": 0.8703240585495138, + "grad_norm": 443.52423095703125, + "learning_rate": 2.286289603266134e-06, + "loss": 66.659, + "step": 215420 + }, + { + "epoch": 0.8703644598148814, + "grad_norm": 654.7299194335938, + "learning_rate": 2.2849932458319033e-06, + "loss": 63.583, + "step": 215430 + }, + { + "epoch": 0.8704048610802491, + "grad_norm": 432.7762756347656, + "learning_rate": 2.283697233760851e-06, + "loss": 52.5453, + "step": 215440 + }, + { + "epoch": 0.8704452623456167, + "grad_norm": 937.6286010742188, + "learning_rate": 2.2824015670782474e-06, + "loss": 72.3073, + "step": 215450 + }, + { + "epoch": 0.8704856636109843, + "grad_norm": 1081.889892578125, + "learning_rate": 2.2811062458093414e-06, + "loss": 91.0127, + "step": 215460 + }, + { + "epoch": 0.870526064876352, + "grad_norm": 1074.7474365234375, + "learning_rate": 2.2798112699793906e-06, + "loss": 93.0141, + "step": 215470 + }, + { + "epoch": 0.8705664661417195, + "grad_norm": 1067.006103515625, + "learning_rate": 2.2785166396136417e-06, + "loss": 109.6581, + "step": 215480 + }, + { + "epoch": 0.8706068674070871, + "grad_norm": 553.7022705078125, + "learning_rate": 2.2772223547373362e-06, + "loss": 58.9318, + "step": 215490 + }, + { + "epoch": 0.8706472686724548, + "grad_norm": 780.5904541015625, + "learning_rate": 2.2759284153757056e-06, + "loss": 89.2569, + "step": 215500 + }, + { + "epoch": 0.8706876699378224, + "grad_norm": 391.3018798828125, + "learning_rate": 2.2746348215539784e-06, + "loss": 54.671, + "step": 215510 + }, + { + "epoch": 0.8707280712031901, + "grad_norm": 762.379638671875, + "learning_rate": 2.2733415732973673e-06, + "loss": 73.6983, + "step": 215520 + }, + { + "epoch": 0.8707684724685577, + "grad_norm": 459.10955810546875, + "learning_rate": 2.2720486706310884e-06, + "loss": 44.891, + "step": 215530 + }, + { + "epoch": 0.8708088737339253, + "grad_norm": 488.31463623046875, + "learning_rate": 2.2707561135803525e-06, + "loss": 74.238, + "step": 215540 + }, + { + "epoch": 0.870849274999293, + "grad_norm": 865.5275268554688, + "learning_rate": 2.2694639021703436e-06, + "loss": 62.8644, + "step": 215550 + }, + { + "epoch": 0.8708896762646606, + "grad_norm": 522.427734375, + "learning_rate": 2.2681720364262727e-06, + "loss": 57.7095, + "step": 215560 + }, + { + "epoch": 0.8709300775300283, + "grad_norm": 823.7078247070312, + "learning_rate": 2.266880516373311e-06, + "loss": 54.5453, + "step": 215570 + }, + { + "epoch": 0.8709704787953959, + "grad_norm": 372.4303894042969, + "learning_rate": 2.2655893420366495e-06, + "loss": 72.7621, + "step": 215580 + }, + { + "epoch": 0.8710108800607635, + "grad_norm": 688.9720458984375, + "learning_rate": 2.264298513441452e-06, + "loss": 70.9504, + "step": 215590 + }, + { + "epoch": 0.8710512813261312, + "grad_norm": 1062.2611083984375, + "learning_rate": 2.2630080306128833e-06, + "loss": 58.9978, + "step": 215600 + }, + { + "epoch": 0.8710916825914987, + "grad_norm": 884.4033203125, + "learning_rate": 2.261717893576105e-06, + "loss": 75.5359, + "step": 215610 + }, + { + "epoch": 0.8711320838568664, + "grad_norm": 681.3526611328125, + "learning_rate": 2.2604281023562757e-06, + "loss": 47.704, + "step": 215620 + }, + { + "epoch": 0.871172485122234, + "grad_norm": 445.34588623046875, + "learning_rate": 2.2591386569785256e-06, + "loss": 67.4436, + "step": 215630 + }, + { + "epoch": 0.8712128863876016, + "grad_norm": 697.7828369140625, + "learning_rate": 2.257849557468004e-06, + "loss": 47.4825, + "step": 215640 + }, + { + "epoch": 0.8712532876529693, + "grad_norm": 636.9715576171875, + "learning_rate": 2.2565608038498366e-06, + "loss": 112.4566, + "step": 215650 + }, + { + "epoch": 0.8712936889183369, + "grad_norm": 412.90716552734375, + "learning_rate": 2.255272396149153e-06, + "loss": 83.4115, + "step": 215660 + }, + { + "epoch": 0.8713340901837046, + "grad_norm": 945.768310546875, + "learning_rate": 2.253984334391075e-06, + "loss": 48.6789, + "step": 215670 + }, + { + "epoch": 0.8713744914490722, + "grad_norm": 682.9679565429688, + "learning_rate": 2.2526966186006983e-06, + "loss": 48.7557, + "step": 215680 + }, + { + "epoch": 0.8714148927144398, + "grad_norm": 387.5283203125, + "learning_rate": 2.2514092488031467e-06, + "loss": 74.7945, + "step": 215690 + }, + { + "epoch": 0.8714552939798075, + "grad_norm": 670.1620483398438, + "learning_rate": 2.2501222250235055e-06, + "loss": 63.5238, + "step": 215700 + }, + { + "epoch": 0.8714956952451751, + "grad_norm": 480.83538818359375, + "learning_rate": 2.2488355472868717e-06, + "loss": 42.3651, + "step": 215710 + }, + { + "epoch": 0.8715360965105428, + "grad_norm": 360.1119079589844, + "learning_rate": 2.24754921561833e-06, + "loss": 59.0382, + "step": 215720 + }, + { + "epoch": 0.8715764977759103, + "grad_norm": 488.12225341796875, + "learning_rate": 2.2462632300429465e-06, + "loss": 88.0592, + "step": 215730 + }, + { + "epoch": 0.8716168990412779, + "grad_norm": 447.1756286621094, + "learning_rate": 2.244977590585811e-06, + "loss": 57.0003, + "step": 215740 + }, + { + "epoch": 0.8716573003066456, + "grad_norm": 744.7769165039062, + "learning_rate": 2.2436922972719756e-06, + "loss": 59.6473, + "step": 215750 + }, + { + "epoch": 0.8716977015720132, + "grad_norm": 596.3184204101562, + "learning_rate": 2.2424073501264985e-06, + "loss": 42.1662, + "step": 215760 + }, + { + "epoch": 0.8717381028373808, + "grad_norm": 801.6654663085938, + "learning_rate": 2.2411227491744315e-06, + "loss": 56.8878, + "step": 215770 + }, + { + "epoch": 0.8717785041027485, + "grad_norm": 823.9082641601562, + "learning_rate": 2.239838494440827e-06, + "loss": 64.0584, + "step": 215780 + }, + { + "epoch": 0.8718189053681161, + "grad_norm": 795.6690063476562, + "learning_rate": 2.2385545859507076e-06, + "loss": 60.2843, + "step": 215790 + }, + { + "epoch": 0.8718593066334838, + "grad_norm": 905.3572998046875, + "learning_rate": 2.2372710237291105e-06, + "loss": 86.4683, + "step": 215800 + }, + { + "epoch": 0.8718997078988514, + "grad_norm": 1066.0255126953125, + "learning_rate": 2.235987807801061e-06, + "loss": 75.0372, + "step": 215810 + }, + { + "epoch": 0.871940109164219, + "grad_norm": 695.7855224609375, + "learning_rate": 2.234704938191574e-06, + "loss": 49.7491, + "step": 215820 + }, + { + "epoch": 0.8719805104295867, + "grad_norm": 599.32470703125, + "learning_rate": 2.2334224149256654e-06, + "loss": 48.5998, + "step": 215830 + }, + { + "epoch": 0.8720209116949543, + "grad_norm": 460.1656188964844, + "learning_rate": 2.232140238028324e-06, + "loss": 40.7648, + "step": 215840 + }, + { + "epoch": 0.872061312960322, + "grad_norm": 998.3419799804688, + "learning_rate": 2.2308584075245676e-06, + "loss": 65.8199, + "step": 215850 + }, + { + "epoch": 0.8721017142256895, + "grad_norm": 717.9755249023438, + "learning_rate": 2.229576923439367e-06, + "loss": 73.6022, + "step": 215860 + }, + { + "epoch": 0.8721421154910571, + "grad_norm": 185.8402557373047, + "learning_rate": 2.228295785797714e-06, + "loss": 104.2553, + "step": 215870 + }, + { + "epoch": 0.8721825167564248, + "grad_norm": 475.386962890625, + "learning_rate": 2.2270149946245902e-06, + "loss": 35.0379, + "step": 215880 + }, + { + "epoch": 0.8722229180217924, + "grad_norm": 292.3280029296875, + "learning_rate": 2.225734549944949e-06, + "loss": 50.3958, + "step": 215890 + }, + { + "epoch": 0.87226331928716, + "grad_norm": 421.2606506347656, + "learning_rate": 2.224454451783773e-06, + "loss": 43.0944, + "step": 215900 + }, + { + "epoch": 0.8723037205525277, + "grad_norm": 529.28271484375, + "learning_rate": 2.2231747001660043e-06, + "loss": 45.4796, + "step": 215910 + }, + { + "epoch": 0.8723441218178953, + "grad_norm": 786.8468627929688, + "learning_rate": 2.221895295116596e-06, + "loss": 51.7943, + "step": 215920 + }, + { + "epoch": 0.872384523083263, + "grad_norm": 1080.238037109375, + "learning_rate": 2.2206162366604934e-06, + "loss": 62.308, + "step": 215930 + }, + { + "epoch": 0.8724249243486306, + "grad_norm": 1614.94677734375, + "learning_rate": 2.2193375248226336e-06, + "loss": 87.0608, + "step": 215940 + }, + { + "epoch": 0.8724653256139983, + "grad_norm": 444.54302978515625, + "learning_rate": 2.2180591596279345e-06, + "loss": 39.1841, + "step": 215950 + }, + { + "epoch": 0.8725057268793659, + "grad_norm": 442.08966064453125, + "learning_rate": 2.2167811411013363e-06, + "loss": 59.4074, + "step": 215960 + }, + { + "epoch": 0.8725461281447335, + "grad_norm": 428.5460205078125, + "learning_rate": 2.215503469267739e-06, + "loss": 66.9463, + "step": 215970 + }, + { + "epoch": 0.8725865294101012, + "grad_norm": 1725.53857421875, + "learning_rate": 2.2142261441520584e-06, + "loss": 104.7104, + "step": 215980 + }, + { + "epoch": 0.8726269306754687, + "grad_norm": 923.6148681640625, + "learning_rate": 2.2129491657792012e-06, + "loss": 65.4332, + "step": 215990 + }, + { + "epoch": 0.8726673319408363, + "grad_norm": 756.5881958007812, + "learning_rate": 2.2116725341740496e-06, + "loss": 72.7663, + "step": 216000 + }, + { + "epoch": 0.872707733206204, + "grad_norm": 102.41825866699219, + "learning_rate": 2.210396249361506e-06, + "loss": 84.9721, + "step": 216010 + }, + { + "epoch": 0.8727481344715716, + "grad_norm": 580.0397338867188, + "learning_rate": 2.2091203113664394e-06, + "loss": 90.953, + "step": 216020 + }, + { + "epoch": 0.8727885357369393, + "grad_norm": 384.55010986328125, + "learning_rate": 2.207844720213739e-06, + "loss": 79.2664, + "step": 216030 + }, + { + "epoch": 0.8728289370023069, + "grad_norm": 452.2008972167969, + "learning_rate": 2.206569475928262e-06, + "loss": 72.5443, + "step": 216040 + }, + { + "epoch": 0.8728693382676745, + "grad_norm": 510.2738037109375, + "learning_rate": 2.205294578534876e-06, + "loss": 37.4415, + "step": 216050 + }, + { + "epoch": 0.8729097395330422, + "grad_norm": 523.017822265625, + "learning_rate": 2.204020028058433e-06, + "loss": 68.5967, + "step": 216060 + }, + { + "epoch": 0.8729501407984098, + "grad_norm": 379.525146484375, + "learning_rate": 2.202745824523782e-06, + "loss": 62.2823, + "step": 216070 + }, + { + "epoch": 0.8729905420637775, + "grad_norm": 490.6965637207031, + "learning_rate": 2.2014719679557616e-06, + "loss": 58.0313, + "step": 216080 + }, + { + "epoch": 0.8730309433291451, + "grad_norm": 530.0121459960938, + "learning_rate": 2.200198458379208e-06, + "loss": 70.363, + "step": 216090 + }, + { + "epoch": 0.8730713445945127, + "grad_norm": 958.5704956054688, + "learning_rate": 2.1989252958189498e-06, + "loss": 62.5752, + "step": 216100 + }, + { + "epoch": 0.8731117458598804, + "grad_norm": 591.5958251953125, + "learning_rate": 2.197652480299808e-06, + "loss": 68.8508, + "step": 216110 + }, + { + "epoch": 0.8731521471252479, + "grad_norm": 723.391845703125, + "learning_rate": 2.1963800118466017e-06, + "loss": 61.8803, + "step": 216120 + }, + { + "epoch": 0.8731925483906156, + "grad_norm": 1139.695556640625, + "learning_rate": 2.195107890484127e-06, + "loss": 72.2975, + "step": 216130 + }, + { + "epoch": 0.8732329496559832, + "grad_norm": 565.4959106445312, + "learning_rate": 2.1938361162371915e-06, + "loss": 57.0819, + "step": 216140 + }, + { + "epoch": 0.8732733509213508, + "grad_norm": 775.2278442382812, + "learning_rate": 2.192564689130592e-06, + "loss": 101.1908, + "step": 216150 + }, + { + "epoch": 0.8733137521867185, + "grad_norm": 747.7669677734375, + "learning_rate": 2.1912936091891023e-06, + "loss": 75.4092, + "step": 216160 + }, + { + "epoch": 0.8733541534520861, + "grad_norm": 753.2837524414062, + "learning_rate": 2.190022876437523e-06, + "loss": 55.1563, + "step": 216170 + }, + { + "epoch": 0.8733945547174538, + "grad_norm": 602.8887329101562, + "learning_rate": 2.1887524909006073e-06, + "loss": 62.2574, + "step": 216180 + }, + { + "epoch": 0.8734349559828214, + "grad_norm": 446.68798828125, + "learning_rate": 2.18748245260314e-06, + "loss": 57.6857, + "step": 216190 + }, + { + "epoch": 0.873475357248189, + "grad_norm": 680.189697265625, + "learning_rate": 2.186212761569868e-06, + "loss": 72.4822, + "step": 216200 + }, + { + "epoch": 0.8735157585135567, + "grad_norm": 429.24871826171875, + "learning_rate": 2.184943417825549e-06, + "loss": 38.2351, + "step": 216210 + }, + { + "epoch": 0.8735561597789243, + "grad_norm": 690.9927978515625, + "learning_rate": 2.1836744213949344e-06, + "loss": 53.5066, + "step": 216220 + }, + { + "epoch": 0.873596561044292, + "grad_norm": 379.00982666015625, + "learning_rate": 2.182405772302758e-06, + "loss": 60.7689, + "step": 216230 + }, + { + "epoch": 0.8736369623096596, + "grad_norm": 632.3671875, + "learning_rate": 2.181137470573751e-06, + "loss": 80.3872, + "step": 216240 + }, + { + "epoch": 0.8736773635750271, + "grad_norm": 1172.7337646484375, + "learning_rate": 2.1798695162326444e-06, + "loss": 82.9495, + "step": 216250 + }, + { + "epoch": 0.8737177648403948, + "grad_norm": 622.9329833984375, + "learning_rate": 2.1786019093041545e-06, + "loss": 70.1951, + "step": 216260 + }, + { + "epoch": 0.8737581661057624, + "grad_norm": 999.8385620117188, + "learning_rate": 2.1773346498129967e-06, + "loss": 98.6795, + "step": 216270 + }, + { + "epoch": 0.87379856737113, + "grad_norm": 710.2994384765625, + "learning_rate": 2.1760677377838803e-06, + "loss": 105.6964, + "step": 216280 + }, + { + "epoch": 0.8738389686364977, + "grad_norm": 903.8643188476562, + "learning_rate": 2.1748011732414898e-06, + "loss": 56.9792, + "step": 216290 + }, + { + "epoch": 0.8738793699018653, + "grad_norm": 435.7991943359375, + "learning_rate": 2.1735349562105366e-06, + "loss": 67.7464, + "step": 216300 + }, + { + "epoch": 0.873919771167233, + "grad_norm": 491.9228820800781, + "learning_rate": 2.1722690867156927e-06, + "loss": 33.9622, + "step": 216310 + }, + { + "epoch": 0.8739601724326006, + "grad_norm": 989.9915161132812, + "learning_rate": 2.171003564781642e-06, + "loss": 44.2588, + "step": 216320 + }, + { + "epoch": 0.8740005736979682, + "grad_norm": 689.0279541015625, + "learning_rate": 2.169738390433058e-06, + "loss": 58.9831, + "step": 216330 + }, + { + "epoch": 0.8740409749633359, + "grad_norm": 786.5889892578125, + "learning_rate": 2.1684735636945975e-06, + "loss": 80.0603, + "step": 216340 + }, + { + "epoch": 0.8740813762287035, + "grad_norm": 765.1282958984375, + "learning_rate": 2.167209084590933e-06, + "loss": 81.0178, + "step": 216350 + }, + { + "epoch": 0.8741217774940712, + "grad_norm": 667.5206909179688, + "learning_rate": 2.1659449531467038e-06, + "loss": 88.9576, + "step": 216360 + }, + { + "epoch": 0.8741621787594387, + "grad_norm": 737.6148071289062, + "learning_rate": 2.16468116938656e-06, + "loss": 44.0084, + "step": 216370 + }, + { + "epoch": 0.8742025800248063, + "grad_norm": 606.1256103515625, + "learning_rate": 2.163417733335138e-06, + "loss": 46.7804, + "step": 216380 + }, + { + "epoch": 0.874242981290174, + "grad_norm": 785.3816528320312, + "learning_rate": 2.162154645017074e-06, + "loss": 42.8543, + "step": 216390 + }, + { + "epoch": 0.8742833825555416, + "grad_norm": 506.9963073730469, + "learning_rate": 2.1608919044569855e-06, + "loss": 41.9613, + "step": 216400 + }, + { + "epoch": 0.8743237838209093, + "grad_norm": 564.9407348632812, + "learning_rate": 2.159629511679493e-06, + "loss": 52.4348, + "step": 216410 + }, + { + "epoch": 0.8743641850862769, + "grad_norm": 452.93536376953125, + "learning_rate": 2.1583674667092104e-06, + "loss": 69.4393, + "step": 216420 + }, + { + "epoch": 0.8744045863516445, + "grad_norm": 1259.5477294921875, + "learning_rate": 2.1571057695707377e-06, + "loss": 60.0339, + "step": 216430 + }, + { + "epoch": 0.8744449876170122, + "grad_norm": 585.0986328125, + "learning_rate": 2.1558444202886777e-06, + "loss": 60.5952, + "step": 216440 + }, + { + "epoch": 0.8744853888823798, + "grad_norm": 341.6156921386719, + "learning_rate": 2.1545834188876104e-06, + "loss": 55.8892, + "step": 216450 + }, + { + "epoch": 0.8745257901477475, + "grad_norm": 1878.2969970703125, + "learning_rate": 2.153322765392134e-06, + "loss": 67.312, + "step": 216460 + }, + { + "epoch": 0.8745661914131151, + "grad_norm": 869.1055908203125, + "learning_rate": 2.1520624598268134e-06, + "loss": 76.2425, + "step": 216470 + }, + { + "epoch": 0.8746065926784827, + "grad_norm": 557.3641357421875, + "learning_rate": 2.150802502216225e-06, + "loss": 67.3011, + "step": 216480 + }, + { + "epoch": 0.8746469939438504, + "grad_norm": 829.6607055664062, + "learning_rate": 2.1495428925849348e-06, + "loss": 82.6796, + "step": 216490 + }, + { + "epoch": 0.8746873952092179, + "grad_norm": 261.81719970703125, + "learning_rate": 2.1482836309574885e-06, + "loss": 86.6276, + "step": 216500 + }, + { + "epoch": 0.8747277964745855, + "grad_norm": 497.7942199707031, + "learning_rate": 2.1470247173584526e-06, + "loss": 71.8646, + "step": 216510 + }, + { + "epoch": 0.8747681977399532, + "grad_norm": 548.4462890625, + "learning_rate": 2.1457661518123563e-06, + "loss": 77.3226, + "step": 216520 + }, + { + "epoch": 0.8748085990053208, + "grad_norm": 525.7841796875, + "learning_rate": 2.14450793434374e-06, + "loss": 46.4875, + "step": 216530 + }, + { + "epoch": 0.8748490002706885, + "grad_norm": 768.0385131835938, + "learning_rate": 2.143250064977136e-06, + "loss": 72.6042, + "step": 216540 + }, + { + "epoch": 0.8748894015360561, + "grad_norm": 438.46539306640625, + "learning_rate": 2.1419925437370637e-06, + "loss": 76.0305, + "step": 216550 + }, + { + "epoch": 0.8749298028014237, + "grad_norm": 530.2626342773438, + "learning_rate": 2.140735370648044e-06, + "loss": 43.7704, + "step": 216560 + }, + { + "epoch": 0.8749702040667914, + "grad_norm": 1163.406005859375, + "learning_rate": 2.139478545734579e-06, + "loss": 45.6786, + "step": 216570 + }, + { + "epoch": 0.875010605332159, + "grad_norm": 449.6617126464844, + "learning_rate": 2.1382220690211763e-06, + "loss": 95.2701, + "step": 216580 + }, + { + "epoch": 0.8750510065975267, + "grad_norm": 1128.8885498046875, + "learning_rate": 2.1369659405323294e-06, + "loss": 69.3665, + "step": 216590 + }, + { + "epoch": 0.8750914078628943, + "grad_norm": 480.495849609375, + "learning_rate": 2.1357101602925323e-06, + "loss": 76.7282, + "step": 216600 + }, + { + "epoch": 0.8751318091282619, + "grad_norm": 742.1851196289062, + "learning_rate": 2.134454728326256e-06, + "loss": 54.7408, + "step": 216610 + }, + { + "epoch": 0.8751722103936296, + "grad_norm": 867.841064453125, + "learning_rate": 2.1331996446579885e-06, + "loss": 77.7765, + "step": 216620 + }, + { + "epoch": 0.8752126116589971, + "grad_norm": 933.4296875, + "learning_rate": 2.1319449093121846e-06, + "loss": 76.9202, + "step": 216630 + }, + { + "epoch": 0.8752530129243647, + "grad_norm": 703.680908203125, + "learning_rate": 2.1306905223133233e-06, + "loss": 67.9471, + "step": 216640 + }, + { + "epoch": 0.8752934141897324, + "grad_norm": 420.23455810546875, + "learning_rate": 2.129436483685845e-06, + "loss": 64.3969, + "step": 216650 + }, + { + "epoch": 0.8753338154551, + "grad_norm": 692.1060180664062, + "learning_rate": 2.128182793454203e-06, + "loss": 50.0137, + "step": 216660 + }, + { + "epoch": 0.8753742167204677, + "grad_norm": 374.3869934082031, + "learning_rate": 2.126929451642843e-06, + "loss": 54.2513, + "step": 216670 + }, + { + "epoch": 0.8754146179858353, + "grad_norm": 512.1610107421875, + "learning_rate": 2.1256764582761893e-06, + "loss": 78.0153, + "step": 216680 + }, + { + "epoch": 0.875455019251203, + "grad_norm": 915.1660766601562, + "learning_rate": 2.1244238133786778e-06, + "loss": 59.5488, + "step": 216690 + }, + { + "epoch": 0.8754954205165706, + "grad_norm": 1099.463623046875, + "learning_rate": 2.1231715169747247e-06, + "loss": 60.2033, + "step": 216700 + }, + { + "epoch": 0.8755358217819382, + "grad_norm": 1023.3216552734375, + "learning_rate": 2.1219195690887484e-06, + "loss": 81.2005, + "step": 216710 + }, + { + "epoch": 0.8755762230473059, + "grad_norm": 479.7607727050781, + "learning_rate": 2.1206679697451536e-06, + "loss": 59.7432, + "step": 216720 + }, + { + "epoch": 0.8756166243126735, + "grad_norm": 582.180419921875, + "learning_rate": 2.1194167189683457e-06, + "loss": 67.4479, + "step": 216730 + }, + { + "epoch": 0.8756570255780411, + "grad_norm": 1005.8095703125, + "learning_rate": 2.1181658167827092e-06, + "loss": 62.5646, + "step": 216740 + }, + { + "epoch": 0.8756974268434088, + "grad_norm": 408.1492004394531, + "learning_rate": 2.116915263212638e-06, + "loss": 45.6877, + "step": 216750 + }, + { + "epoch": 0.8757378281087763, + "grad_norm": 466.759521484375, + "learning_rate": 2.115665058282512e-06, + "loss": 62.3927, + "step": 216760 + }, + { + "epoch": 0.875778229374144, + "grad_norm": 831.8069458007812, + "learning_rate": 2.1144152020166975e-06, + "loss": 77.1025, + "step": 216770 + }, + { + "epoch": 0.8758186306395116, + "grad_norm": 791.7064819335938, + "learning_rate": 2.113165694439576e-06, + "loss": 85.8304, + "step": 216780 + }, + { + "epoch": 0.8758590319048792, + "grad_norm": 235.5809783935547, + "learning_rate": 2.111916535575487e-06, + "loss": 60.9459, + "step": 216790 + }, + { + "epoch": 0.8758994331702469, + "grad_norm": 510.8736572265625, + "learning_rate": 2.110667725448805e-06, + "loss": 69.2578, + "step": 216800 + }, + { + "epoch": 0.8759398344356145, + "grad_norm": 820.9615478515625, + "learning_rate": 2.1094192640838606e-06, + "loss": 75.0417, + "step": 216810 + }, + { + "epoch": 0.8759802357009822, + "grad_norm": 621.6105346679688, + "learning_rate": 2.1081711515049986e-06, + "loss": 68.1804, + "step": 216820 + }, + { + "epoch": 0.8760206369663498, + "grad_norm": 505.3234558105469, + "learning_rate": 2.1069233877365548e-06, + "loss": 61.3681, + "step": 216830 + }, + { + "epoch": 0.8760610382317174, + "grad_norm": 540.8740844726562, + "learning_rate": 2.1056759728028476e-06, + "loss": 66.174, + "step": 216840 + }, + { + "epoch": 0.8761014394970851, + "grad_norm": 814.3193969726562, + "learning_rate": 2.1044289067282e-06, + "loss": 68.174, + "step": 216850 + }, + { + "epoch": 0.8761418407624527, + "grad_norm": 775.93896484375, + "learning_rate": 2.1031821895369254e-06, + "loss": 61.5703, + "step": 216860 + }, + { + "epoch": 0.8761822420278204, + "grad_norm": 942.7702026367188, + "learning_rate": 2.1019358212533247e-06, + "loss": 68.8192, + "step": 216870 + }, + { + "epoch": 0.876222643293188, + "grad_norm": 723.8602905273438, + "learning_rate": 2.1006898019017032e-06, + "loss": 69.122, + "step": 216880 + }, + { + "epoch": 0.8762630445585555, + "grad_norm": 703.6035766601562, + "learning_rate": 2.099444131506352e-06, + "loss": 76.4005, + "step": 216890 + }, + { + "epoch": 0.8763034458239232, + "grad_norm": 529.3009643554688, + "learning_rate": 2.0981988100915497e-06, + "loss": 95.725, + "step": 216900 + }, + { + "epoch": 0.8763438470892908, + "grad_norm": 805.760009765625, + "learning_rate": 2.0969538376815766e-06, + "loss": 67.9229, + "step": 216910 + }, + { + "epoch": 0.8763842483546584, + "grad_norm": 346.9089050292969, + "learning_rate": 2.095709214300705e-06, + "loss": 62.3822, + "step": 216920 + }, + { + "epoch": 0.8764246496200261, + "grad_norm": 560.2156372070312, + "learning_rate": 2.094464939973202e-06, + "loss": 53.8619, + "step": 216930 + }, + { + "epoch": 0.8764650508853937, + "grad_norm": 1221.45947265625, + "learning_rate": 2.093221014723328e-06, + "loss": 59.0382, + "step": 216940 + }, + { + "epoch": 0.8765054521507614, + "grad_norm": 344.9273376464844, + "learning_rate": 2.091977438575319e-06, + "loss": 53.1646, + "step": 216950 + }, + { + "epoch": 0.876545853416129, + "grad_norm": 332.82354736328125, + "learning_rate": 2.09073421155344e-06, + "loss": 42.2456, + "step": 216960 + }, + { + "epoch": 0.8765862546814966, + "grad_norm": 1399.7581787109375, + "learning_rate": 2.089491333681912e-06, + "loss": 104.0067, + "step": 216970 + }, + { + "epoch": 0.8766266559468643, + "grad_norm": 1072.566162109375, + "learning_rate": 2.0882488049849716e-06, + "loss": 61.9397, + "step": 216980 + }, + { + "epoch": 0.8766670572122319, + "grad_norm": 784.4332275390625, + "learning_rate": 2.0870066254868427e-06, + "loss": 78.1606, + "step": 216990 + }, + { + "epoch": 0.8767074584775996, + "grad_norm": 677.109130859375, + "learning_rate": 2.085764795211742e-06, + "loss": 55.987, + "step": 217000 + }, + { + "epoch": 0.8767478597429671, + "grad_norm": 867.0872802734375, + "learning_rate": 2.084523314183884e-06, + "loss": 47.395, + "step": 217010 + }, + { + "epoch": 0.8767882610083347, + "grad_norm": 228.57984924316406, + "learning_rate": 2.083282182427462e-06, + "loss": 68.019, + "step": 217020 + }, + { + "epoch": 0.8768286622737024, + "grad_norm": 424.5111999511719, + "learning_rate": 2.082041399966679e-06, + "loss": 80.3946, + "step": 217030 + }, + { + "epoch": 0.87686906353907, + "grad_norm": 621.4895629882812, + "learning_rate": 2.0808009668257224e-06, + "loss": 73.8771, + "step": 217040 + }, + { + "epoch": 0.8769094648044377, + "grad_norm": 494.4961853027344, + "learning_rate": 2.079560883028782e-06, + "loss": 66.5328, + "step": 217050 + }, + { + "epoch": 0.8769498660698053, + "grad_norm": 994.2697143554688, + "learning_rate": 2.07832114860002e-06, + "loss": 85.2991, + "step": 217060 + }, + { + "epoch": 0.8769902673351729, + "grad_norm": 1576.3046875, + "learning_rate": 2.077081763563622e-06, + "loss": 63.1353, + "step": 217070 + }, + { + "epoch": 0.8770306686005406, + "grad_norm": 1169.0662841796875, + "learning_rate": 2.0758427279437376e-06, + "loss": 54.292, + "step": 217080 + }, + { + "epoch": 0.8770710698659082, + "grad_norm": 1018.1236572265625, + "learning_rate": 2.0746040417645274e-06, + "loss": 76.0867, + "step": 217090 + }, + { + "epoch": 0.8771114711312759, + "grad_norm": 626.8131103515625, + "learning_rate": 2.0733657050501455e-06, + "loss": 50.8619, + "step": 217100 + }, + { + "epoch": 0.8771518723966435, + "grad_norm": 690.0748291015625, + "learning_rate": 2.072127717824719e-06, + "loss": 53.3438, + "step": 217110 + }, + { + "epoch": 0.8771922736620111, + "grad_norm": 979.889892578125, + "learning_rate": 2.0708900801124e-06, + "loss": 52.8676, + "step": 217120 + }, + { + "epoch": 0.8772326749273788, + "grad_norm": 718.6712646484375, + "learning_rate": 2.0696527919373066e-06, + "loss": 67.5304, + "step": 217130 + }, + { + "epoch": 0.8772730761927463, + "grad_norm": 716.427978515625, + "learning_rate": 2.068415853323562e-06, + "loss": 64.5781, + "step": 217140 + }, + { + "epoch": 0.8773134774581139, + "grad_norm": 547.9963989257812, + "learning_rate": 2.0671792642952825e-06, + "loss": 41.8436, + "step": 217150 + }, + { + "epoch": 0.8773538787234816, + "grad_norm": 646.4237060546875, + "learning_rate": 2.0659430248765755e-06, + "loss": 55.3651, + "step": 217160 + }, + { + "epoch": 0.8773942799888492, + "grad_norm": 736.8685302734375, + "learning_rate": 2.0647071350915482e-06, + "loss": 95.7178, + "step": 217170 + }, + { + "epoch": 0.8774346812542169, + "grad_norm": 1046.317138671875, + "learning_rate": 2.063471594964284e-06, + "loss": 54.3431, + "step": 217180 + }, + { + "epoch": 0.8774750825195845, + "grad_norm": 644.4703979492188, + "learning_rate": 2.0622364045188736e-06, + "loss": 82.7326, + "step": 217190 + }, + { + "epoch": 0.8775154837849521, + "grad_norm": 636.0122680664062, + "learning_rate": 2.0610015637794013e-06, + "loss": 50.9649, + "step": 217200 + }, + { + "epoch": 0.8775558850503198, + "grad_norm": 607.09619140625, + "learning_rate": 2.0597670727699425e-06, + "loss": 68.6057, + "step": 217210 + }, + { + "epoch": 0.8775962863156874, + "grad_norm": 867.0662841796875, + "learning_rate": 2.0585329315145543e-06, + "loss": 69.1695, + "step": 217220 + }, + { + "epoch": 0.8776366875810551, + "grad_norm": 896.5477294921875, + "learning_rate": 2.0572991400373097e-06, + "loss": 84.3616, + "step": 217230 + }, + { + "epoch": 0.8776770888464227, + "grad_norm": 757.0889892578125, + "learning_rate": 2.056065698362253e-06, + "loss": 85.0974, + "step": 217240 + }, + { + "epoch": 0.8777174901117903, + "grad_norm": 741.3732299804688, + "learning_rate": 2.054832606513433e-06, + "loss": 67.9323, + "step": 217250 + }, + { + "epoch": 0.877757891377158, + "grad_norm": 465.0661315917969, + "learning_rate": 2.05359986451489e-06, + "loss": 66.8212, + "step": 217260 + }, + { + "epoch": 0.8777982926425255, + "grad_norm": 253.58169555664062, + "learning_rate": 2.0523674723906573e-06, + "loss": 69.4101, + "step": 217270 + }, + { + "epoch": 0.8778386939078932, + "grad_norm": 463.1896667480469, + "learning_rate": 2.0511354301647633e-06, + "loss": 57.4876, + "step": 217280 + }, + { + "epoch": 0.8778790951732608, + "grad_norm": 546.6627197265625, + "learning_rate": 2.049903737861223e-06, + "loss": 51.6959, + "step": 217290 + }, + { + "epoch": 0.8779194964386284, + "grad_norm": 848.9699096679688, + "learning_rate": 2.04867239550405e-06, + "loss": 72.3666, + "step": 217300 + }, + { + "epoch": 0.8779598977039961, + "grad_norm": 1228.735107421875, + "learning_rate": 2.047441403117252e-06, + "loss": 95.3457, + "step": 217310 + }, + { + "epoch": 0.8780002989693637, + "grad_norm": 1454.2503662109375, + "learning_rate": 2.046210760724825e-06, + "loss": 74.4062, + "step": 217320 + }, + { + "epoch": 0.8780407002347314, + "grad_norm": 778.6484985351562, + "learning_rate": 2.044980468350761e-06, + "loss": 57.5572, + "step": 217330 + }, + { + "epoch": 0.878081101500099, + "grad_norm": 947.4103393554688, + "learning_rate": 2.043750526019053e-06, + "loss": 75.5401, + "step": 217340 + }, + { + "epoch": 0.8781215027654666, + "grad_norm": 593.4321899414062, + "learning_rate": 2.0425209337536687e-06, + "loss": 82.8141, + "step": 217350 + }, + { + "epoch": 0.8781619040308343, + "grad_norm": 599.8587646484375, + "learning_rate": 2.041291691578582e-06, + "loss": 66.5024, + "step": 217360 + }, + { + "epoch": 0.8782023052962019, + "grad_norm": 866.545166015625, + "learning_rate": 2.040062799517766e-06, + "loss": 71.9015, + "step": 217370 + }, + { + "epoch": 0.8782427065615696, + "grad_norm": 518.690185546875, + "learning_rate": 2.038834257595164e-06, + "loss": 68.441, + "step": 217380 + }, + { + "epoch": 0.8782831078269372, + "grad_norm": 956.8067626953125, + "learning_rate": 2.037606065834741e-06, + "loss": 62.6595, + "step": 217390 + }, + { + "epoch": 0.8783235090923047, + "grad_norm": 409.8570556640625, + "learning_rate": 2.036378224260429e-06, + "loss": 63.8256, + "step": 217400 + }, + { + "epoch": 0.8783639103576724, + "grad_norm": 807.6354370117188, + "learning_rate": 2.0351507328961782e-06, + "loss": 49.1786, + "step": 217410 + }, + { + "epoch": 0.87840431162304, + "grad_norm": 175.0247039794922, + "learning_rate": 2.0339235917659116e-06, + "loss": 33.7403, + "step": 217420 + }, + { + "epoch": 0.8784447128884076, + "grad_norm": 547.5797119140625, + "learning_rate": 2.0326968008935523e-06, + "loss": 79.2417, + "step": 217430 + }, + { + "epoch": 0.8784851141537753, + "grad_norm": 470.294189453125, + "learning_rate": 2.0314703603030226e-06, + "loss": 73.9628, + "step": 217440 + }, + { + "epoch": 0.8785255154191429, + "grad_norm": 1224.11083984375, + "learning_rate": 2.030244270018227e-06, + "loss": 57.0461, + "step": 217450 + }, + { + "epoch": 0.8785659166845106, + "grad_norm": 688.5800170898438, + "learning_rate": 2.0290185300630693e-06, + "loss": 73.5322, + "step": 217460 + }, + { + "epoch": 0.8786063179498782, + "grad_norm": 560.5717163085938, + "learning_rate": 2.027793140461447e-06, + "loss": 68.4851, + "step": 217470 + }, + { + "epoch": 0.8786467192152458, + "grad_norm": 610.2653198242188, + "learning_rate": 2.026568101237252e-06, + "loss": 61.3501, + "step": 217480 + }, + { + "epoch": 0.8786871204806135, + "grad_norm": 545.2369384765625, + "learning_rate": 2.0253434124143624e-06, + "loss": 81.1678, + "step": 217490 + }, + { + "epoch": 0.8787275217459811, + "grad_norm": 1262.38671875, + "learning_rate": 2.024119074016664e-06, + "loss": 70.0438, + "step": 217500 + }, + { + "epoch": 0.8787679230113488, + "grad_norm": 522.9012451171875, + "learning_rate": 2.022895086068013e-06, + "loss": 66.7408, + "step": 217510 + }, + { + "epoch": 0.8788083242767164, + "grad_norm": 631.0394897460938, + "learning_rate": 2.0216714485922774e-06, + "loss": 64.6847, + "step": 217520 + }, + { + "epoch": 0.8788487255420839, + "grad_norm": 468.2138366699219, + "learning_rate": 2.020448161613313e-06, + "loss": 70.7034, + "step": 217530 + }, + { + "epoch": 0.8788891268074516, + "grad_norm": 861.0613403320312, + "learning_rate": 2.019225225154968e-06, + "loss": 80.0578, + "step": 217540 + }, + { + "epoch": 0.8789295280728192, + "grad_norm": 668.44873046875, + "learning_rate": 2.01800263924109e-06, + "loss": 64.1123, + "step": 217550 + }, + { + "epoch": 0.8789699293381869, + "grad_norm": 509.4530944824219, + "learning_rate": 2.0167804038955e-06, + "loss": 58.2724, + "step": 217560 + }, + { + "epoch": 0.8790103306035545, + "grad_norm": 677.3477783203125, + "learning_rate": 2.01555851914204e-06, + "loss": 58.0485, + "step": 217570 + }, + { + "epoch": 0.8790507318689221, + "grad_norm": 584.3658447265625, + "learning_rate": 2.0143369850045237e-06, + "loss": 93.1805, + "step": 217580 + }, + { + "epoch": 0.8790911331342898, + "grad_norm": 827.1117553710938, + "learning_rate": 2.0131158015067687e-06, + "loss": 70.3215, + "step": 217590 + }, + { + "epoch": 0.8791315343996574, + "grad_norm": 569.8656005859375, + "learning_rate": 2.0118949686725786e-06, + "loss": 81.2688, + "step": 217600 + }, + { + "epoch": 0.879171935665025, + "grad_norm": 694.5744018554688, + "learning_rate": 2.010674486525759e-06, + "loss": 71.894, + "step": 217610 + }, + { + "epoch": 0.8792123369303927, + "grad_norm": 890.7739868164062, + "learning_rate": 2.0094543550901037e-06, + "loss": 59.1602, + "step": 217620 + }, + { + "epoch": 0.8792527381957603, + "grad_norm": 531.1614379882812, + "learning_rate": 2.0082345743893962e-06, + "loss": 78.7147, + "step": 217630 + }, + { + "epoch": 0.879293139461128, + "grad_norm": 1550.711669921875, + "learning_rate": 2.007015144447417e-06, + "loss": 77.1547, + "step": 217640 + }, + { + "epoch": 0.8793335407264955, + "grad_norm": 638.3569946289062, + "learning_rate": 2.0057960652879416e-06, + "loss": 56.7052, + "step": 217650 + }, + { + "epoch": 0.8793739419918631, + "grad_norm": 563.8785400390625, + "learning_rate": 2.004577336934741e-06, + "loss": 66.86, + "step": 217660 + }, + { + "epoch": 0.8794143432572308, + "grad_norm": 889.8778076171875, + "learning_rate": 2.003358959411561e-06, + "loss": 47.3373, + "step": 217670 + }, + { + "epoch": 0.8794547445225984, + "grad_norm": 2797.9990234375, + "learning_rate": 2.002140932742169e-06, + "loss": 106.2031, + "step": 217680 + }, + { + "epoch": 0.8794951457879661, + "grad_norm": 531.5037841796875, + "learning_rate": 2.0009232569503047e-06, + "loss": 62.5402, + "step": 217690 + }, + { + "epoch": 0.8795355470533337, + "grad_norm": 581.2844848632812, + "learning_rate": 1.999705932059708e-06, + "loss": 43.4103, + "step": 217700 + }, + { + "epoch": 0.8795759483187013, + "grad_norm": 433.6938781738281, + "learning_rate": 1.9984889580941134e-06, + "loss": 69.3275, + "step": 217710 + }, + { + "epoch": 0.879616349584069, + "grad_norm": 718.2384033203125, + "learning_rate": 1.997272335077236e-06, + "loss": 56.6923, + "step": 217720 + }, + { + "epoch": 0.8796567508494366, + "grad_norm": 429.95416259765625, + "learning_rate": 1.9960560630328143e-06, + "loss": 52.5624, + "step": 217730 + }, + { + "epoch": 0.8796971521148043, + "grad_norm": 342.8020935058594, + "learning_rate": 1.994840141984542e-06, + "loss": 54.8996, + "step": 217740 + }, + { + "epoch": 0.8797375533801719, + "grad_norm": 501.9993896484375, + "learning_rate": 1.9936245719561297e-06, + "loss": 54.5643, + "step": 217750 + }, + { + "epoch": 0.8797779546455395, + "grad_norm": 505.25823974609375, + "learning_rate": 1.9924093529712785e-06, + "loss": 76.5253, + "step": 217760 + }, + { + "epoch": 0.8798183559109072, + "grad_norm": 885.3126831054688, + "learning_rate": 1.991194485053678e-06, + "loss": 74.8109, + "step": 217770 + }, + { + "epoch": 0.8798587571762747, + "grad_norm": 697.8914794921875, + "learning_rate": 1.989979968227016e-06, + "loss": 59.5019, + "step": 217780 + }, + { + "epoch": 0.8798991584416423, + "grad_norm": 478.22296142578125, + "learning_rate": 1.9887658025149627e-06, + "loss": 92.977, + "step": 217790 + }, + { + "epoch": 0.87993955970701, + "grad_norm": 902.0533447265625, + "learning_rate": 1.987551987941194e-06, + "loss": 93.9721, + "step": 217800 + }, + { + "epoch": 0.8799799609723776, + "grad_norm": 689.9537963867188, + "learning_rate": 1.9863385245293722e-06, + "loss": 77.4895, + "step": 217810 + }, + { + "epoch": 0.8800203622377453, + "grad_norm": 383.7301025390625, + "learning_rate": 1.985125412303157e-06, + "loss": 92.8261, + "step": 217820 + }, + { + "epoch": 0.8800607635031129, + "grad_norm": 1118.137939453125, + "learning_rate": 1.9839126512861904e-06, + "loss": 62.0713, + "step": 217830 + }, + { + "epoch": 0.8801011647684805, + "grad_norm": 311.56488037109375, + "learning_rate": 1.9827002415021313e-06, + "loss": 68.7875, + "step": 217840 + }, + { + "epoch": 0.8801415660338482, + "grad_norm": 1174.8587646484375, + "learning_rate": 1.9814881829746e-06, + "loss": 73.2301, + "step": 217850 + }, + { + "epoch": 0.8801819672992158, + "grad_norm": 509.7713317871094, + "learning_rate": 1.9802764757272343e-06, + "loss": 64.224, + "step": 217860 + }, + { + "epoch": 0.8802223685645835, + "grad_norm": 459.357421875, + "learning_rate": 1.979065119783661e-06, + "loss": 87.821, + "step": 217870 + }, + { + "epoch": 0.8802627698299511, + "grad_norm": 522.51318359375, + "learning_rate": 1.977854115167481e-06, + "loss": 59.1691, + "step": 217880 + }, + { + "epoch": 0.8803031710953187, + "grad_norm": 459.1293029785156, + "learning_rate": 1.9766434619023233e-06, + "loss": 59.3548, + "step": 217890 + }, + { + "epoch": 0.8803435723606864, + "grad_norm": 501.29998779296875, + "learning_rate": 1.975433160011775e-06, + "loss": 73.981, + "step": 217900 + }, + { + "epoch": 0.8803839736260539, + "grad_norm": 827.984375, + "learning_rate": 1.974223209519437e-06, + "loss": 67.9468, + "step": 217910 + }, + { + "epoch": 0.8804243748914216, + "grad_norm": 750.7166748046875, + "learning_rate": 1.9730136104488995e-06, + "loss": 43.3158, + "step": 217920 + }, + { + "epoch": 0.8804647761567892, + "grad_norm": 636.977783203125, + "learning_rate": 1.9718043628237415e-06, + "loss": 81.4934, + "step": 217930 + }, + { + "epoch": 0.8805051774221568, + "grad_norm": 168.40960693359375, + "learning_rate": 1.9705954666675374e-06, + "loss": 41.9432, + "step": 217940 + }, + { + "epoch": 0.8805455786875245, + "grad_norm": 582.017822265625, + "learning_rate": 1.969386922003864e-06, + "loss": 49.2542, + "step": 217950 + }, + { + "epoch": 0.8805859799528921, + "grad_norm": 268.8949890136719, + "learning_rate": 1.9681787288562694e-06, + "loss": 39.0946, + "step": 217960 + }, + { + "epoch": 0.8806263812182598, + "grad_norm": 576.0370483398438, + "learning_rate": 1.966970887248314e-06, + "loss": 60.4711, + "step": 217970 + }, + { + "epoch": 0.8806667824836274, + "grad_norm": 490.67974853515625, + "learning_rate": 1.9657633972035484e-06, + "loss": 72.9215, + "step": 217980 + }, + { + "epoch": 0.880707183748995, + "grad_norm": 1156.4620361328125, + "learning_rate": 1.9645562587455026e-06, + "loss": 57.7047, + "step": 217990 + }, + { + "epoch": 0.8807475850143627, + "grad_norm": 548.9749755859375, + "learning_rate": 1.9633494718977265e-06, + "loss": 63.5047, + "step": 218000 + }, + { + "epoch": 0.8807879862797303, + "grad_norm": 678.4771118164062, + "learning_rate": 1.96214303668373e-06, + "loss": 92.2336, + "step": 218010 + }, + { + "epoch": 0.880828387545098, + "grad_norm": 1124.528076171875, + "learning_rate": 1.9609369531270505e-06, + "loss": 88.8433, + "step": 218020 + }, + { + "epoch": 0.8808687888104656, + "grad_norm": 833.3672485351562, + "learning_rate": 1.9597312212511865e-06, + "loss": 63.4495, + "step": 218030 + }, + { + "epoch": 0.8809091900758331, + "grad_norm": 1000.0946655273438, + "learning_rate": 1.9585258410796503e-06, + "loss": 54.3596, + "step": 218040 + }, + { + "epoch": 0.8809495913412008, + "grad_norm": 440.4555358886719, + "learning_rate": 1.9573208126359455e-06, + "loss": 63.4668, + "step": 218050 + }, + { + "epoch": 0.8809899926065684, + "grad_norm": 764.7147216796875, + "learning_rate": 1.956116135943553e-06, + "loss": 94.4948, + "step": 218060 + }, + { + "epoch": 0.881030393871936, + "grad_norm": 563.7904663085938, + "learning_rate": 1.9549118110259746e-06, + "loss": 41.6732, + "step": 218070 + }, + { + "epoch": 0.8810707951373037, + "grad_norm": 715.0960083007812, + "learning_rate": 1.9537078379066755e-06, + "loss": 59.1646, + "step": 218080 + }, + { + "epoch": 0.8811111964026713, + "grad_norm": 740.7771606445312, + "learning_rate": 1.9525042166091323e-06, + "loss": 74.9337, + "step": 218090 + }, + { + "epoch": 0.881151597668039, + "grad_norm": 744.4171142578125, + "learning_rate": 1.9513009471568133e-06, + "loss": 62.9204, + "step": 218100 + }, + { + "epoch": 0.8811919989334066, + "grad_norm": 720.346435546875, + "learning_rate": 1.9500980295731776e-06, + "loss": 62.2227, + "step": 218110 + }, + { + "epoch": 0.8812324001987742, + "grad_norm": 303.45928955078125, + "learning_rate": 1.948895463881668e-06, + "loss": 52.7696, + "step": 218120 + }, + { + "epoch": 0.8812728014641419, + "grad_norm": 325.7408142089844, + "learning_rate": 1.9476932501057354e-06, + "loss": 45.1895, + "step": 218130 + }, + { + "epoch": 0.8813132027295095, + "grad_norm": 620.3606567382812, + "learning_rate": 1.9464913882688165e-06, + "loss": 52.274, + "step": 218140 + }, + { + "epoch": 0.8813536039948772, + "grad_norm": 592.4925537109375, + "learning_rate": 1.945289878394343e-06, + "loss": 80.3454, + "step": 218150 + }, + { + "epoch": 0.8813940052602448, + "grad_norm": 777.3637084960938, + "learning_rate": 1.9440887205057434e-06, + "loss": 52.8748, + "step": 218160 + }, + { + "epoch": 0.8814344065256123, + "grad_norm": 629.170654296875, + "learning_rate": 1.942887914626421e-06, + "loss": 89.1182, + "step": 218170 + }, + { + "epoch": 0.88147480779098, + "grad_norm": 726.7734375, + "learning_rate": 1.941687460779804e-06, + "loss": 74.9238, + "step": 218180 + }, + { + "epoch": 0.8815152090563476, + "grad_norm": 2195.578857421875, + "learning_rate": 1.9404873589892824e-06, + "loss": 106.1247, + "step": 218190 + }, + { + "epoch": 0.8815556103217153, + "grad_norm": 320.05242919921875, + "learning_rate": 1.9392876092782576e-06, + "loss": 52.2515, + "step": 218200 + }, + { + "epoch": 0.8815960115870829, + "grad_norm": 644.116455078125, + "learning_rate": 1.9380882116701215e-06, + "loss": 54.1043, + "step": 218210 + }, + { + "epoch": 0.8816364128524505, + "grad_norm": 267.45196533203125, + "learning_rate": 1.936889166188247e-06, + "loss": 58.0607, + "step": 218220 + }, + { + "epoch": 0.8816768141178182, + "grad_norm": 584.6798095703125, + "learning_rate": 1.935690472856029e-06, + "loss": 67.0135, + "step": 218230 + }, + { + "epoch": 0.8817172153831858, + "grad_norm": 553.1689453125, + "learning_rate": 1.934492131696817e-06, + "loss": 74.71, + "step": 218240 + }, + { + "epoch": 0.8817576166485535, + "grad_norm": 473.9559326171875, + "learning_rate": 1.9332941427339836e-06, + "loss": 55.5203, + "step": 218250 + }, + { + "epoch": 0.8817980179139211, + "grad_norm": 675.8111572265625, + "learning_rate": 1.9320965059908837e-06, + "loss": 66.4716, + "step": 218260 + }, + { + "epoch": 0.8818384191792887, + "grad_norm": 836.8706665039062, + "learning_rate": 1.930899221490867e-06, + "loss": 68.7837, + "step": 218270 + }, + { + "epoch": 0.8818788204446564, + "grad_norm": 648.0088500976562, + "learning_rate": 1.9297022892572647e-06, + "loss": 56.9843, + "step": 218280 + }, + { + "epoch": 0.8819192217100239, + "grad_norm": 530.7066040039062, + "learning_rate": 1.9285057093134264e-06, + "loss": 40.9868, + "step": 218290 + }, + { + "epoch": 0.8819596229753915, + "grad_norm": 312.19140625, + "learning_rate": 1.927309481682671e-06, + "loss": 66.661, + "step": 218300 + }, + { + "epoch": 0.8820000242407592, + "grad_norm": 381.5133972167969, + "learning_rate": 1.9261136063883202e-06, + "loss": 51.5127, + "step": 218310 + }, + { + "epoch": 0.8820404255061268, + "grad_norm": 466.1578063964844, + "learning_rate": 1.924918083453695e-06, + "loss": 55.8605, + "step": 218320 + }, + { + "epoch": 0.8820808267714945, + "grad_norm": 543.40576171875, + "learning_rate": 1.9237229129020884e-06, + "loss": 71.7712, + "step": 218330 + }, + { + "epoch": 0.8821212280368621, + "grad_norm": 855.2295532226562, + "learning_rate": 1.922528094756819e-06, + "loss": 75.7386, + "step": 218340 + }, + { + "epoch": 0.8821616293022297, + "grad_norm": 722.1248779296875, + "learning_rate": 1.9213336290411667e-06, + "loss": 57.5406, + "step": 218350 + }, + { + "epoch": 0.8822020305675974, + "grad_norm": 1131.4835205078125, + "learning_rate": 1.920139515778423e-06, + "loss": 65.9918, + "step": 218360 + }, + { + "epoch": 0.882242431832965, + "grad_norm": 660.2716674804688, + "learning_rate": 1.9189457549918655e-06, + "loss": 93.0559, + "step": 218370 + }, + { + "epoch": 0.8822828330983327, + "grad_norm": 969.179443359375, + "learning_rate": 1.917752346704771e-06, + "loss": 47.8845, + "step": 218380 + }, + { + "epoch": 0.8823232343637003, + "grad_norm": 773.4944458007812, + "learning_rate": 1.9165592909404098e-06, + "loss": 42.7063, + "step": 218390 + }, + { + "epoch": 0.8823636356290679, + "grad_norm": 1025.668212890625, + "learning_rate": 1.9153665877220273e-06, + "loss": 90.6888, + "step": 218400 + }, + { + "epoch": 0.8824040368944356, + "grad_norm": 2115.72119140625, + "learning_rate": 1.914174237072888e-06, + "loss": 67.0828, + "step": 218410 + }, + { + "epoch": 0.8824444381598031, + "grad_norm": 944.6837158203125, + "learning_rate": 1.91298223901623e-06, + "loss": 96.6182, + "step": 218420 + }, + { + "epoch": 0.8824848394251708, + "grad_norm": 1069.904052734375, + "learning_rate": 1.9117905935753003e-06, + "loss": 53.6346, + "step": 218430 + }, + { + "epoch": 0.8825252406905384, + "grad_norm": 767.0722045898438, + "learning_rate": 1.910599300773317e-06, + "loss": 69.4682, + "step": 218440 + }, + { + "epoch": 0.882565641955906, + "grad_norm": 332.8385314941406, + "learning_rate": 1.9094083606335202e-06, + "loss": 85.4108, + "step": 218450 + }, + { + "epoch": 0.8826060432212737, + "grad_norm": 1157.61083984375, + "learning_rate": 1.9082177731791197e-06, + "loss": 67.5022, + "step": 218460 + }, + { + "epoch": 0.8826464444866413, + "grad_norm": 772.9024658203125, + "learning_rate": 1.9070275384333258e-06, + "loss": 69.1009, + "step": 218470 + }, + { + "epoch": 0.882686845752009, + "grad_norm": 691.2853393554688, + "learning_rate": 1.9058376564193493e-06, + "loss": 72.21, + "step": 218480 + }, + { + "epoch": 0.8827272470173766, + "grad_norm": 469.6791076660156, + "learning_rate": 1.9046481271603778e-06, + "loss": 75.0289, + "step": 218490 + }, + { + "epoch": 0.8827676482827442, + "grad_norm": 518.1749877929688, + "learning_rate": 1.903458950679613e-06, + "loss": 37.6692, + "step": 218500 + }, + { + "epoch": 0.8828080495481119, + "grad_norm": 1436.873291015625, + "learning_rate": 1.9022701270002276e-06, + "loss": 75.8061, + "step": 218510 + }, + { + "epoch": 0.8828484508134795, + "grad_norm": 616.2140502929688, + "learning_rate": 1.901081656145405e-06, + "loss": 83.9909, + "step": 218520 + }, + { + "epoch": 0.8828888520788472, + "grad_norm": 704.9470825195312, + "learning_rate": 1.8998935381383131e-06, + "loss": 88.5046, + "step": 218530 + }, + { + "epoch": 0.8829292533442148, + "grad_norm": 578.09619140625, + "learning_rate": 1.8987057730021142e-06, + "loss": 75.7494, + "step": 218540 + }, + { + "epoch": 0.8829696546095823, + "grad_norm": 907.3269653320312, + "learning_rate": 1.8975183607599712e-06, + "loss": 62.5192, + "step": 218550 + }, + { + "epoch": 0.88301005587495, + "grad_norm": 623.1795654296875, + "learning_rate": 1.8963313014350237e-06, + "loss": 86.6187, + "step": 218560 + }, + { + "epoch": 0.8830504571403176, + "grad_norm": 369.73272705078125, + "learning_rate": 1.8951445950504156e-06, + "loss": 55.7623, + "step": 218570 + }, + { + "epoch": 0.8830908584056852, + "grad_norm": 775.5799560546875, + "learning_rate": 1.8939582416292856e-06, + "loss": 72.4958, + "step": 218580 + }, + { + "epoch": 0.8831312596710529, + "grad_norm": 324.7620849609375, + "learning_rate": 1.8927722411947624e-06, + "loss": 77.4224, + "step": 218590 + }, + { + "epoch": 0.8831716609364205, + "grad_norm": 833.0493774414062, + "learning_rate": 1.8915865937699652e-06, + "loss": 74.2546, + "step": 218600 + }, + { + "epoch": 0.8832120622017882, + "grad_norm": 811.6068115234375, + "learning_rate": 1.8904012993780152e-06, + "loss": 55.327, + "step": 218610 + }, + { + "epoch": 0.8832524634671558, + "grad_norm": 714.2890014648438, + "learning_rate": 1.8892163580420076e-06, + "loss": 68.2799, + "step": 218620 + }, + { + "epoch": 0.8832928647325234, + "grad_norm": 947.650634765625, + "learning_rate": 1.8880317697850593e-06, + "loss": 63.1588, + "step": 218630 + }, + { + "epoch": 0.8833332659978911, + "grad_norm": 513.7490234375, + "learning_rate": 1.8868475346302494e-06, + "loss": 63.9742, + "step": 218640 + }, + { + "epoch": 0.8833736672632587, + "grad_norm": 413.03466796875, + "learning_rate": 1.8856636526006756e-06, + "loss": 46.6912, + "step": 218650 + }, + { + "epoch": 0.8834140685286264, + "grad_norm": 519.8155517578125, + "learning_rate": 1.8844801237194165e-06, + "loss": 59.888, + "step": 218660 + }, + { + "epoch": 0.883454469793994, + "grad_norm": 931.6119384765625, + "learning_rate": 1.8832969480095365e-06, + "loss": 53.3875, + "step": 218670 + }, + { + "epoch": 0.8834948710593615, + "grad_norm": 499.6033630371094, + "learning_rate": 1.8821141254941188e-06, + "loss": 41.1392, + "step": 218680 + }, + { + "epoch": 0.8835352723247292, + "grad_norm": 590.7174682617188, + "learning_rate": 1.8809316561962098e-06, + "loss": 57.9793, + "step": 218690 + }, + { + "epoch": 0.8835756735900968, + "grad_norm": 492.726318359375, + "learning_rate": 1.8797495401388644e-06, + "loss": 88.9588, + "step": 218700 + }, + { + "epoch": 0.8836160748554645, + "grad_norm": 632.5693969726562, + "learning_rate": 1.8785677773451327e-06, + "loss": 56.0121, + "step": 218710 + }, + { + "epoch": 0.8836564761208321, + "grad_norm": 481.33087158203125, + "learning_rate": 1.8773863678380544e-06, + "loss": 41.7848, + "step": 218720 + }, + { + "epoch": 0.8836968773861997, + "grad_norm": 662.3269653320312, + "learning_rate": 1.8762053116406553e-06, + "loss": 57.3313, + "step": 218730 + }, + { + "epoch": 0.8837372786515674, + "grad_norm": 703.5881958007812, + "learning_rate": 1.875024608775966e-06, + "loss": 66.2646, + "step": 218740 + }, + { + "epoch": 0.883777679916935, + "grad_norm": 1138.2371826171875, + "learning_rate": 1.8738442592670014e-06, + "loss": 107.459, + "step": 218750 + }, + { + "epoch": 0.8838180811823027, + "grad_norm": 800.0794067382812, + "learning_rate": 1.8726642631367765e-06, + "loss": 52.4641, + "step": 218760 + }, + { + "epoch": 0.8838584824476703, + "grad_norm": 612.7518920898438, + "learning_rate": 1.871484620408297e-06, + "loss": 62.9186, + "step": 218770 + }, + { + "epoch": 0.8838988837130379, + "grad_norm": 873.3214111328125, + "learning_rate": 1.8703053311045494e-06, + "loss": 63.3053, + "step": 218780 + }, + { + "epoch": 0.8839392849784056, + "grad_norm": 845.8798217773438, + "learning_rate": 1.8691263952485417e-06, + "loss": 59.0463, + "step": 218790 + }, + { + "epoch": 0.8839796862437731, + "grad_norm": 2248.568359375, + "learning_rate": 1.8679478128632466e-06, + "loss": 84.9312, + "step": 218800 + }, + { + "epoch": 0.8840200875091407, + "grad_norm": 591.7395629882812, + "learning_rate": 1.8667695839716437e-06, + "loss": 54.4059, + "step": 218810 + }, + { + "epoch": 0.8840604887745084, + "grad_norm": 882.0786743164062, + "learning_rate": 1.8655917085967057e-06, + "loss": 68.5469, + "step": 218820 + }, + { + "epoch": 0.884100890039876, + "grad_norm": 1056.4039306640625, + "learning_rate": 1.8644141867613874e-06, + "loss": 57.9366, + "step": 218830 + }, + { + "epoch": 0.8841412913052437, + "grad_norm": 934.5703735351562, + "learning_rate": 1.8632370184886595e-06, + "loss": 69.9249, + "step": 218840 + }, + { + "epoch": 0.8841816925706113, + "grad_norm": 288.17901611328125, + "learning_rate": 1.8620602038014567e-06, + "loss": 58.9598, + "step": 218850 + }, + { + "epoch": 0.8842220938359789, + "grad_norm": 503.69775390625, + "learning_rate": 1.8608837427227323e-06, + "loss": 51.4405, + "step": 218860 + }, + { + "epoch": 0.8842624951013466, + "grad_norm": 1145.10693359375, + "learning_rate": 1.859707635275414e-06, + "loss": 53.2904, + "step": 218870 + }, + { + "epoch": 0.8843028963667142, + "grad_norm": 1161.69091796875, + "learning_rate": 1.8585318814824416e-06, + "loss": 102.824, + "step": 218880 + }, + { + "epoch": 0.8843432976320819, + "grad_norm": 356.7070617675781, + "learning_rate": 1.8573564813667233e-06, + "loss": 39.6626, + "step": 218890 + }, + { + "epoch": 0.8843836988974495, + "grad_norm": 791.9981689453125, + "learning_rate": 1.8561814349511832e-06, + "loss": 55.8657, + "step": 218900 + }, + { + "epoch": 0.8844241001628171, + "grad_norm": 804.9954833984375, + "learning_rate": 1.855006742258727e-06, + "loss": 71.9756, + "step": 218910 + }, + { + "epoch": 0.8844645014281848, + "grad_norm": 970.7328491210938, + "learning_rate": 1.8538324033122545e-06, + "loss": 63.8711, + "step": 218920 + }, + { + "epoch": 0.8845049026935523, + "grad_norm": 552.07373046875, + "learning_rate": 1.8526584181346651e-06, + "loss": 63.0552, + "step": 218930 + }, + { + "epoch": 0.88454530395892, + "grad_norm": 1009.2742309570312, + "learning_rate": 1.851484786748836e-06, + "loss": 74.4164, + "step": 218940 + }, + { + "epoch": 0.8845857052242876, + "grad_norm": 364.85150146484375, + "learning_rate": 1.8503115091776624e-06, + "loss": 59.6309, + "step": 218950 + }, + { + "epoch": 0.8846261064896552, + "grad_norm": 810.5731811523438, + "learning_rate": 1.849138585444008e-06, + "loss": 42.3496, + "step": 218960 + }, + { + "epoch": 0.8846665077550229, + "grad_norm": 672.5568237304688, + "learning_rate": 1.847966015570739e-06, + "loss": 57.2029, + "step": 218970 + }, + { + "epoch": 0.8847069090203905, + "grad_norm": 284.84027099609375, + "learning_rate": 1.8467937995807172e-06, + "loss": 79.9977, + "step": 218980 + }, + { + "epoch": 0.8847473102857581, + "grad_norm": 643.2084350585938, + "learning_rate": 1.8456219374967975e-06, + "loss": 48.8362, + "step": 218990 + }, + { + "epoch": 0.8847877115511258, + "grad_norm": 913.6743774414062, + "learning_rate": 1.8444504293418286e-06, + "loss": 80.7242, + "step": 219000 + }, + { + "epoch": 0.8848281128164934, + "grad_norm": 691.2134399414062, + "learning_rate": 1.843279275138643e-06, + "loss": 82.7857, + "step": 219010 + }, + { + "epoch": 0.8848685140818611, + "grad_norm": 677.5723266601562, + "learning_rate": 1.8421084749100737e-06, + "loss": 49.5957, + "step": 219020 + }, + { + "epoch": 0.8849089153472287, + "grad_norm": 386.1488342285156, + "learning_rate": 1.8409380286789491e-06, + "loss": 68.6602, + "step": 219030 + }, + { + "epoch": 0.8849493166125963, + "grad_norm": 788.1815795898438, + "learning_rate": 1.8397679364680864e-06, + "loss": 64.6757, + "step": 219040 + }, + { + "epoch": 0.884989717877964, + "grad_norm": 671.1532592773438, + "learning_rate": 1.8385981983002966e-06, + "loss": 83.013, + "step": 219050 + }, + { + "epoch": 0.8850301191433315, + "grad_norm": 1143.5823974609375, + "learning_rate": 1.8374288141983875e-06, + "loss": 45.7933, + "step": 219060 + }, + { + "epoch": 0.8850705204086992, + "grad_norm": 700.924560546875, + "learning_rate": 1.8362597841851526e-06, + "loss": 47.8671, + "step": 219070 + }, + { + "epoch": 0.8851109216740668, + "grad_norm": 792.8190307617188, + "learning_rate": 1.8350911082833843e-06, + "loss": 44.2541, + "step": 219080 + }, + { + "epoch": 0.8851513229394344, + "grad_norm": 731.8705444335938, + "learning_rate": 1.8339227865158714e-06, + "loss": 69.4105, + "step": 219090 + }, + { + "epoch": 0.8851917242048021, + "grad_norm": 1169.003662109375, + "learning_rate": 1.832754818905378e-06, + "loss": 87.8177, + "step": 219100 + }, + { + "epoch": 0.8852321254701697, + "grad_norm": 1363.623046875, + "learning_rate": 1.8315872054746898e-06, + "loss": 44.619, + "step": 219110 + }, + { + "epoch": 0.8852725267355374, + "grad_norm": 1066.1195068359375, + "learning_rate": 1.8304199462465554e-06, + "loss": 63.5499, + "step": 219120 + }, + { + "epoch": 0.885312928000905, + "grad_norm": 1076.030029296875, + "learning_rate": 1.8292530412437458e-06, + "loss": 78.8125, + "step": 219130 + }, + { + "epoch": 0.8853533292662726, + "grad_norm": 505.265625, + "learning_rate": 1.8280864904889983e-06, + "loss": 60.3862, + "step": 219140 + }, + { + "epoch": 0.8853937305316403, + "grad_norm": 1192.3934326171875, + "learning_rate": 1.8269202940050612e-06, + "loss": 74.0261, + "step": 219150 + }, + { + "epoch": 0.8854341317970079, + "grad_norm": 1098.207763671875, + "learning_rate": 1.8257544518146742e-06, + "loss": 59.914, + "step": 219160 + }, + { + "epoch": 0.8854745330623756, + "grad_norm": 760.2313232421875, + "learning_rate": 1.8245889639405544e-06, + "loss": 78.6848, + "step": 219170 + }, + { + "epoch": 0.8855149343277432, + "grad_norm": 345.79302978515625, + "learning_rate": 1.823423830405433e-06, + "loss": 60.0513, + "step": 219180 + }, + { + "epoch": 0.8855553355931107, + "grad_norm": 656.2962646484375, + "learning_rate": 1.8222590512320204e-06, + "loss": 62.6351, + "step": 219190 + }, + { + "epoch": 0.8855957368584784, + "grad_norm": 429.9870910644531, + "learning_rate": 1.8210946264430251e-06, + "loss": 51.0363, + "step": 219200 + }, + { + "epoch": 0.885636138123846, + "grad_norm": 421.1434631347656, + "learning_rate": 1.8199305560611514e-06, + "loss": 33.9487, + "step": 219210 + }, + { + "epoch": 0.8856765393892136, + "grad_norm": 525.7288208007812, + "learning_rate": 1.818766840109094e-06, + "loss": 54.3183, + "step": 219220 + }, + { + "epoch": 0.8857169406545813, + "grad_norm": 765.826416015625, + "learning_rate": 1.817603478609533e-06, + "loss": 52.2679, + "step": 219230 + }, + { + "epoch": 0.8857573419199489, + "grad_norm": 863.7898559570312, + "learning_rate": 1.8164404715851548e-06, + "loss": 59.5173, + "step": 219240 + }, + { + "epoch": 0.8857977431853166, + "grad_norm": 589.6265869140625, + "learning_rate": 1.8152778190586296e-06, + "loss": 74.7055, + "step": 219250 + }, + { + "epoch": 0.8858381444506842, + "grad_norm": 1343.1090087890625, + "learning_rate": 1.8141155210526262e-06, + "loss": 83.7314, + "step": 219260 + }, + { + "epoch": 0.8858785457160518, + "grad_norm": 672.1332397460938, + "learning_rate": 1.8129535775898065e-06, + "loss": 85.2594, + "step": 219270 + }, + { + "epoch": 0.8859189469814195, + "grad_norm": 241.79412841796875, + "learning_rate": 1.8117919886928125e-06, + "loss": 63.0523, + "step": 219280 + }, + { + "epoch": 0.8859593482467871, + "grad_norm": 582.9332885742188, + "learning_rate": 1.810630754384306e-06, + "loss": 53.7375, + "step": 219290 + }, + { + "epoch": 0.8859997495121548, + "grad_norm": 876.77197265625, + "learning_rate": 1.8094698746869132e-06, + "loss": 60.111, + "step": 219300 + }, + { + "epoch": 0.8860401507775224, + "grad_norm": 480.275146484375, + "learning_rate": 1.8083093496232717e-06, + "loss": 42.9149, + "step": 219310 + }, + { + "epoch": 0.8860805520428899, + "grad_norm": 1551.801025390625, + "learning_rate": 1.8071491792160034e-06, + "loss": 83.0445, + "step": 219320 + }, + { + "epoch": 0.8861209533082576, + "grad_norm": 419.2470703125, + "learning_rate": 1.8059893634877324e-06, + "loss": 70.819, + "step": 219330 + }, + { + "epoch": 0.8861613545736252, + "grad_norm": 912.100341796875, + "learning_rate": 1.8048299024610605e-06, + "loss": 135.4397, + "step": 219340 + }, + { + "epoch": 0.8862017558389929, + "grad_norm": 760.0975952148438, + "learning_rate": 1.8036707961585986e-06, + "loss": 64.5301, + "step": 219350 + }, + { + "epoch": 0.8862421571043605, + "grad_norm": 766.6199951171875, + "learning_rate": 1.8025120446029443e-06, + "loss": 90.1097, + "step": 219360 + }, + { + "epoch": 0.8862825583697281, + "grad_norm": 509.39202880859375, + "learning_rate": 1.8013536478166837e-06, + "loss": 49.3822, + "step": 219370 + }, + { + "epoch": 0.8863229596350958, + "grad_norm": 567.0195922851562, + "learning_rate": 1.800195605822408e-06, + "loss": 76.9777, + "step": 219380 + }, + { + "epoch": 0.8863633609004634, + "grad_norm": 1401.0185546875, + "learning_rate": 1.799037918642681e-06, + "loss": 57.5967, + "step": 219390 + }, + { + "epoch": 0.886403762165831, + "grad_norm": 1083.4351806640625, + "learning_rate": 1.797880586300087e-06, + "loss": 99.6541, + "step": 219400 + }, + { + "epoch": 0.8864441634311987, + "grad_norm": 552.1019287109375, + "learning_rate": 1.796723608817179e-06, + "loss": 47.0819, + "step": 219410 + }, + { + "epoch": 0.8864845646965663, + "grad_norm": 929.6210327148438, + "learning_rate": 1.795566986216517e-06, + "loss": 55.9206, + "step": 219420 + }, + { + "epoch": 0.886524965961934, + "grad_norm": 838.7513427734375, + "learning_rate": 1.7944107185206517e-06, + "loss": 67.4716, + "step": 219430 + }, + { + "epoch": 0.8865653672273015, + "grad_norm": 1256.2666015625, + "learning_rate": 1.793254805752116e-06, + "loss": 62.4839, + "step": 219440 + }, + { + "epoch": 0.8866057684926691, + "grad_norm": 601.4110717773438, + "learning_rate": 1.7920992479334564e-06, + "loss": 52.2131, + "step": 219450 + }, + { + "epoch": 0.8866461697580368, + "grad_norm": 748.6807861328125, + "learning_rate": 1.7909440450871928e-06, + "loss": 71.5313, + "step": 219460 + }, + { + "epoch": 0.8866865710234044, + "grad_norm": 826.4779663085938, + "learning_rate": 1.7897891972358515e-06, + "loss": 75.2757, + "step": 219470 + }, + { + "epoch": 0.8867269722887721, + "grad_norm": 468.3273620605469, + "learning_rate": 1.7886347044019436e-06, + "loss": 69.9665, + "step": 219480 + }, + { + "epoch": 0.8867673735541397, + "grad_norm": 406.4679870605469, + "learning_rate": 1.7874805666079819e-06, + "loss": 58.2305, + "step": 219490 + }, + { + "epoch": 0.8868077748195073, + "grad_norm": 752.6871948242188, + "learning_rate": 1.7863267838764597e-06, + "loss": 91.9557, + "step": 219500 + }, + { + "epoch": 0.886848176084875, + "grad_norm": 1669.9566650390625, + "learning_rate": 1.7851733562298746e-06, + "loss": 77.4623, + "step": 219510 + }, + { + "epoch": 0.8868885773502426, + "grad_norm": 404.0237731933594, + "learning_rate": 1.7840202836907107e-06, + "loss": 65.586, + "step": 219520 + }, + { + "epoch": 0.8869289786156103, + "grad_norm": 415.7674255371094, + "learning_rate": 1.7828675662814498e-06, + "loss": 44.0167, + "step": 219530 + }, + { + "epoch": 0.8869693798809779, + "grad_norm": 1645.96728515625, + "learning_rate": 1.78171520402457e-06, + "loss": 92.8938, + "step": 219540 + }, + { + "epoch": 0.8870097811463455, + "grad_norm": 573.544921875, + "learning_rate": 1.7805631969425241e-06, + "loss": 51.3447, + "step": 219550 + }, + { + "epoch": 0.8870501824117132, + "grad_norm": 694.6473999023438, + "learning_rate": 1.7794115450577832e-06, + "loss": 45.6466, + "step": 219560 + }, + { + "epoch": 0.8870905836770807, + "grad_norm": 929.429443359375, + "learning_rate": 1.7782602483927935e-06, + "loss": 67.6749, + "step": 219570 + }, + { + "epoch": 0.8871309849424484, + "grad_norm": 1290.4210205078125, + "learning_rate": 1.7771093069700018e-06, + "loss": 71.0574, + "step": 219580 + }, + { + "epoch": 0.887171386207816, + "grad_norm": 833.4881591796875, + "learning_rate": 1.7759587208118457e-06, + "loss": 52.6833, + "step": 219590 + }, + { + "epoch": 0.8872117874731836, + "grad_norm": 505.1868591308594, + "learning_rate": 1.7748084899407558e-06, + "loss": 69.5924, + "step": 219600 + }, + { + "epoch": 0.8872521887385513, + "grad_norm": 593.65380859375, + "learning_rate": 1.7736586143791612e-06, + "loss": 80.7429, + "step": 219610 + }, + { + "epoch": 0.8872925900039189, + "grad_norm": 487.65570068359375, + "learning_rate": 1.7725090941494727e-06, + "loss": 94.376, + "step": 219620 + }, + { + "epoch": 0.8873329912692866, + "grad_norm": 618.5480346679688, + "learning_rate": 1.7713599292741012e-06, + "loss": 63.45, + "step": 219630 + }, + { + "epoch": 0.8873733925346542, + "grad_norm": 726.8854370117188, + "learning_rate": 1.7702111197754557e-06, + "loss": 55.9806, + "step": 219640 + }, + { + "epoch": 0.8874137938000218, + "grad_norm": 858.5195922851562, + "learning_rate": 1.769062665675927e-06, + "loss": 65.7797, + "step": 219650 + }, + { + "epoch": 0.8874541950653895, + "grad_norm": 675.180908203125, + "learning_rate": 1.7679145669979102e-06, + "loss": 57.9891, + "step": 219660 + }, + { + "epoch": 0.8874945963307571, + "grad_norm": 964.15234375, + "learning_rate": 1.766766823763786e-06, + "loss": 70.8095, + "step": 219670 + }, + { + "epoch": 0.8875349975961248, + "grad_norm": 480.9530029296875, + "learning_rate": 1.7656194359959288e-06, + "loss": 52.1846, + "step": 219680 + }, + { + "epoch": 0.8875753988614924, + "grad_norm": 657.0711059570312, + "learning_rate": 1.764472403716706e-06, + "loss": 95.3098, + "step": 219690 + }, + { + "epoch": 0.8876158001268599, + "grad_norm": 1459.75634765625, + "learning_rate": 1.7633257269484883e-06, + "loss": 58.5825, + "step": 219700 + }, + { + "epoch": 0.8876562013922276, + "grad_norm": 525.1651000976562, + "learning_rate": 1.7621794057136155e-06, + "loss": 68.3107, + "step": 219710 + }, + { + "epoch": 0.8876966026575952, + "grad_norm": 1897.60400390625, + "learning_rate": 1.761033440034452e-06, + "loss": 85.7658, + "step": 219720 + }, + { + "epoch": 0.8877370039229628, + "grad_norm": 794.953125, + "learning_rate": 1.7598878299333245e-06, + "loss": 87.622, + "step": 219730 + }, + { + "epoch": 0.8877774051883305, + "grad_norm": 242.95852661132812, + "learning_rate": 1.7587425754325816e-06, + "loss": 80.1074, + "step": 219740 + }, + { + "epoch": 0.8878178064536981, + "grad_norm": 469.82281494140625, + "learning_rate": 1.757597676554541e-06, + "loss": 64.8517, + "step": 219750 + }, + { + "epoch": 0.8878582077190658, + "grad_norm": 429.98785400390625, + "learning_rate": 1.7564531333215229e-06, + "loss": 81.6408, + "step": 219760 + }, + { + "epoch": 0.8878986089844334, + "grad_norm": 446.5859680175781, + "learning_rate": 1.7553089457558491e-06, + "loss": 46.3842, + "step": 219770 + }, + { + "epoch": 0.887939010249801, + "grad_norm": 1444.1187744140625, + "learning_rate": 1.7541651138798176e-06, + "loss": 132.1356, + "step": 219780 + }, + { + "epoch": 0.8879794115151687, + "grad_norm": 608.3953857421875, + "learning_rate": 1.7530216377157282e-06, + "loss": 80.3679, + "step": 219790 + }, + { + "epoch": 0.8880198127805363, + "grad_norm": 977.6787719726562, + "learning_rate": 1.7518785172858787e-06, + "loss": 72.8887, + "step": 219800 + }, + { + "epoch": 0.888060214045904, + "grad_norm": 967.2461547851562, + "learning_rate": 1.750735752612549e-06, + "loss": 65.3866, + "step": 219810 + }, + { + "epoch": 0.8881006153112716, + "grad_norm": 413.30096435546875, + "learning_rate": 1.7495933437180235e-06, + "loss": 57.0584, + "step": 219820 + }, + { + "epoch": 0.8881410165766391, + "grad_norm": 730.2781372070312, + "learning_rate": 1.7484512906245754e-06, + "loss": 65.884, + "step": 219830 + }, + { + "epoch": 0.8881814178420068, + "grad_norm": 481.10980224609375, + "learning_rate": 1.7473095933544627e-06, + "loss": 63.5367, + "step": 219840 + }, + { + "epoch": 0.8882218191073744, + "grad_norm": 844.7787475585938, + "learning_rate": 1.746168251929945e-06, + "loss": 73.7228, + "step": 219850 + }, + { + "epoch": 0.888262220372742, + "grad_norm": 827.5298461914062, + "learning_rate": 1.7450272663732758e-06, + "loss": 69.1718, + "step": 219860 + }, + { + "epoch": 0.8883026216381097, + "grad_norm": 369.1411437988281, + "learning_rate": 1.7438866367066975e-06, + "loss": 52.7565, + "step": 219870 + }, + { + "epoch": 0.8883430229034773, + "grad_norm": 1028.748291015625, + "learning_rate": 1.7427463629524522e-06, + "loss": 70.243, + "step": 219880 + }, + { + "epoch": 0.888383424168845, + "grad_norm": 439.8973083496094, + "learning_rate": 1.7416064451327597e-06, + "loss": 51.4507, + "step": 219890 + }, + { + "epoch": 0.8884238254342126, + "grad_norm": 662.4431762695312, + "learning_rate": 1.7404668832698557e-06, + "loss": 49.3627, + "step": 219900 + }, + { + "epoch": 0.8884642266995803, + "grad_norm": 532.4296264648438, + "learning_rate": 1.739327677385947e-06, + "loss": 82.9827, + "step": 219910 + }, + { + "epoch": 0.8885046279649479, + "grad_norm": 701.12744140625, + "learning_rate": 1.7381888275032466e-06, + "loss": 67.568, + "step": 219920 + }, + { + "epoch": 0.8885450292303155, + "grad_norm": 446.3945007324219, + "learning_rate": 1.737050333643957e-06, + "loss": 34.9816, + "step": 219930 + }, + { + "epoch": 0.8885854304956832, + "grad_norm": 937.2544555664062, + "learning_rate": 1.7359121958302783e-06, + "loss": 84.8295, + "step": 219940 + }, + { + "epoch": 0.8886258317610508, + "grad_norm": 461.5389404296875, + "learning_rate": 1.734774414084388e-06, + "loss": 29.6401, + "step": 219950 + }, + { + "epoch": 0.8886662330264183, + "grad_norm": 601.3448486328125, + "learning_rate": 1.7336369884284777e-06, + "loss": 60.4686, + "step": 219960 + }, + { + "epoch": 0.888706634291786, + "grad_norm": 1199.8951416015625, + "learning_rate": 1.7324999188847158e-06, + "loss": 69.8319, + "step": 219970 + }, + { + "epoch": 0.8887470355571536, + "grad_norm": 488.5948486328125, + "learning_rate": 1.7313632054752739e-06, + "loss": 81.3995, + "step": 219980 + }, + { + "epoch": 0.8887874368225213, + "grad_norm": 579.460693359375, + "learning_rate": 1.730226848222314e-06, + "loss": 70.7519, + "step": 219990 + }, + { + "epoch": 0.8888278380878889, + "grad_norm": 366.9617004394531, + "learning_rate": 1.7290908471479805e-06, + "loss": 44.3186, + "step": 220000 + }, + { + "epoch": 0.8888682393532565, + "grad_norm": 752.8256225585938, + "learning_rate": 1.727955202274436e-06, + "loss": 72.8217, + "step": 220010 + }, + { + "epoch": 0.8889086406186242, + "grad_norm": 370.5260314941406, + "learning_rate": 1.7268199136238072e-06, + "loss": 61.5301, + "step": 220020 + }, + { + "epoch": 0.8889490418839918, + "grad_norm": 612.1559448242188, + "learning_rate": 1.725684981218232e-06, + "loss": 52.8512, + "step": 220030 + }, + { + "epoch": 0.8889894431493595, + "grad_norm": 1079.423095703125, + "learning_rate": 1.724550405079839e-06, + "loss": 72.9126, + "step": 220040 + }, + { + "epoch": 0.8890298444147271, + "grad_norm": 545.60693359375, + "learning_rate": 1.7234161852307397e-06, + "loss": 53.2746, + "step": 220050 + }, + { + "epoch": 0.8890702456800947, + "grad_norm": 272.39776611328125, + "learning_rate": 1.7222823216930563e-06, + "loss": 59.7147, + "step": 220060 + }, + { + "epoch": 0.8891106469454624, + "grad_norm": 545.389892578125, + "learning_rate": 1.7211488144888866e-06, + "loss": 52.8069, + "step": 220070 + }, + { + "epoch": 0.8891510482108299, + "grad_norm": 903.7786254882812, + "learning_rate": 1.7200156636403309e-06, + "loss": 78.1543, + "step": 220080 + }, + { + "epoch": 0.8891914494761975, + "grad_norm": 402.2043151855469, + "learning_rate": 1.7188828691694826e-06, + "loss": 43.8754, + "step": 220090 + }, + { + "epoch": 0.8892318507415652, + "grad_norm": 652.1101684570312, + "learning_rate": 1.717750431098424e-06, + "loss": 64.9136, + "step": 220100 + }, + { + "epoch": 0.8892722520069328, + "grad_norm": 882.0092163085938, + "learning_rate": 1.716618349449235e-06, + "loss": 70.6394, + "step": 220110 + }, + { + "epoch": 0.8893126532723005, + "grad_norm": 1252.4437255859375, + "learning_rate": 1.7154866242439805e-06, + "loss": 59.9626, + "step": 220120 + }, + { + "epoch": 0.8893530545376681, + "grad_norm": 505.98101806640625, + "learning_rate": 1.7143552555047294e-06, + "loss": 62.0798, + "step": 220130 + }, + { + "epoch": 0.8893934558030357, + "grad_norm": 863.2915649414062, + "learning_rate": 1.7132242432535372e-06, + "loss": 68.6994, + "step": 220140 + }, + { + "epoch": 0.8894338570684034, + "grad_norm": 650.9073486328125, + "learning_rate": 1.7120935875124556e-06, + "loss": 55.4039, + "step": 220150 + }, + { + "epoch": 0.889474258333771, + "grad_norm": 762.1167602539062, + "learning_rate": 1.7109632883035176e-06, + "loss": 60.6699, + "step": 220160 + }, + { + "epoch": 0.8895146595991387, + "grad_norm": 781.33642578125, + "learning_rate": 1.7098333456487727e-06, + "loss": 89.3206, + "step": 220170 + }, + { + "epoch": 0.8895550608645063, + "grad_norm": 769.5177001953125, + "learning_rate": 1.7087037595702404e-06, + "loss": 44.8922, + "step": 220180 + }, + { + "epoch": 0.889595462129874, + "grad_norm": 664.865966796875, + "learning_rate": 1.7075745300899438e-06, + "loss": 69.4555, + "step": 220190 + }, + { + "epoch": 0.8896358633952416, + "grad_norm": 1737.349365234375, + "learning_rate": 1.7064456572299004e-06, + "loss": 66.8827, + "step": 220200 + }, + { + "epoch": 0.8896762646606091, + "grad_norm": 800.5678100585938, + "learning_rate": 1.7053171410121173e-06, + "loss": 51.0341, + "step": 220210 + }, + { + "epoch": 0.8897166659259768, + "grad_norm": 1518.8480224609375, + "learning_rate": 1.7041889814585988e-06, + "loss": 59.9677, + "step": 220220 + }, + { + "epoch": 0.8897570671913444, + "grad_norm": 360.88140869140625, + "learning_rate": 1.703061178591332e-06, + "loss": 63.2974, + "step": 220230 + }, + { + "epoch": 0.889797468456712, + "grad_norm": 653.5325927734375, + "learning_rate": 1.7019337324323082e-06, + "loss": 59.6571, + "step": 220240 + }, + { + "epoch": 0.8898378697220797, + "grad_norm": 509.6645812988281, + "learning_rate": 1.7008066430035054e-06, + "loss": 57.7304, + "step": 220250 + }, + { + "epoch": 0.8898782709874473, + "grad_norm": 709.4850463867188, + "learning_rate": 1.6996799103268969e-06, + "loss": 58.0749, + "step": 220260 + }, + { + "epoch": 0.889918672252815, + "grad_norm": 801.1353759765625, + "learning_rate": 1.698553534424452e-06, + "loss": 80.9108, + "step": 220270 + }, + { + "epoch": 0.8899590735181826, + "grad_norm": 981.542236328125, + "learning_rate": 1.697427515318133e-06, + "loss": 85.4999, + "step": 220280 + }, + { + "epoch": 0.8899994747835502, + "grad_norm": 425.20294189453125, + "learning_rate": 1.6963018530298825e-06, + "loss": 78.7412, + "step": 220290 + }, + { + "epoch": 0.8900398760489179, + "grad_norm": 1491.7506103515625, + "learning_rate": 1.6951765475816495e-06, + "loss": 68.653, + "step": 220300 + }, + { + "epoch": 0.8900802773142855, + "grad_norm": 605.2929077148438, + "learning_rate": 1.6940515989953787e-06, + "loss": 64.5379, + "step": 220310 + }, + { + "epoch": 0.8901206785796532, + "grad_norm": 832.0281372070312, + "learning_rate": 1.6929270072929882e-06, + "loss": 79.5253, + "step": 220320 + }, + { + "epoch": 0.8901610798450208, + "grad_norm": 910.6145629882812, + "learning_rate": 1.6918027724964182e-06, + "loss": 49.291, + "step": 220330 + }, + { + "epoch": 0.8902014811103883, + "grad_norm": 367.8435974121094, + "learning_rate": 1.6906788946275731e-06, + "loss": 41.6823, + "step": 220340 + }, + { + "epoch": 0.890241882375756, + "grad_norm": 1388.43798828125, + "learning_rate": 1.6895553737083759e-06, + "loss": 88.4075, + "step": 220350 + }, + { + "epoch": 0.8902822836411236, + "grad_norm": 977.73681640625, + "learning_rate": 1.6884322097607197e-06, + "loss": 100.0292, + "step": 220360 + }, + { + "epoch": 0.8903226849064912, + "grad_norm": 486.7503662109375, + "learning_rate": 1.6873094028065051e-06, + "loss": 94.0028, + "step": 220370 + }, + { + "epoch": 0.8903630861718589, + "grad_norm": 586.5834350585938, + "learning_rate": 1.6861869528676234e-06, + "loss": 75.1862, + "step": 220380 + }, + { + "epoch": 0.8904034874372265, + "grad_norm": 740.9622192382812, + "learning_rate": 1.6850648599659548e-06, + "loss": 61.4748, + "step": 220390 + }, + { + "epoch": 0.8904438887025942, + "grad_norm": 821.1700439453125, + "learning_rate": 1.683943124123375e-06, + "loss": 87.3424, + "step": 220400 + }, + { + "epoch": 0.8904842899679618, + "grad_norm": 193.85939025878906, + "learning_rate": 1.6828217453617534e-06, + "loss": 57.0162, + "step": 220410 + }, + { + "epoch": 0.8905246912333294, + "grad_norm": 2058.58837890625, + "learning_rate": 1.6817007237029525e-06, + "loss": 99.6407, + "step": 220420 + }, + { + "epoch": 0.8905650924986971, + "grad_norm": 955.1715087890625, + "learning_rate": 1.680580059168826e-06, + "loss": 80.5179, + "step": 220430 + }, + { + "epoch": 0.8906054937640647, + "grad_norm": 770.6917114257812, + "learning_rate": 1.6794597517812271e-06, + "loss": 51.0294, + "step": 220440 + }, + { + "epoch": 0.8906458950294324, + "grad_norm": 426.6238708496094, + "learning_rate": 1.67833980156199e-06, + "loss": 55.6197, + "step": 220450 + }, + { + "epoch": 0.8906862962948, + "grad_norm": 640.5567626953125, + "learning_rate": 1.677220208532948e-06, + "loss": 29.6902, + "step": 220460 + }, + { + "epoch": 0.8907266975601675, + "grad_norm": 533.095703125, + "learning_rate": 1.676100972715937e-06, + "loss": 67.3474, + "step": 220470 + }, + { + "epoch": 0.8907670988255352, + "grad_norm": 629.9608154296875, + "learning_rate": 1.6749820941327643e-06, + "loss": 78.3582, + "step": 220480 + }, + { + "epoch": 0.8908075000909028, + "grad_norm": 570.25244140625, + "learning_rate": 1.6738635728052566e-06, + "loss": 110.9105, + "step": 220490 + }, + { + "epoch": 0.8908479013562705, + "grad_norm": 546.55517578125, + "learning_rate": 1.6727454087552074e-06, + "loss": 71.2166, + "step": 220500 + }, + { + "epoch": 0.8908883026216381, + "grad_norm": 868.41845703125, + "learning_rate": 1.6716276020044264e-06, + "loss": 99.5428, + "step": 220510 + }, + { + "epoch": 0.8909287038870057, + "grad_norm": 831.2777709960938, + "learning_rate": 1.6705101525747003e-06, + "loss": 39.8076, + "step": 220520 + }, + { + "epoch": 0.8909691051523734, + "grad_norm": 713.010009765625, + "learning_rate": 1.669393060487814e-06, + "loss": 59.5726, + "step": 220530 + }, + { + "epoch": 0.891009506417741, + "grad_norm": 816.619384765625, + "learning_rate": 1.6682763257655522e-06, + "loss": 65.7893, + "step": 220540 + }, + { + "epoch": 0.8910499076831087, + "grad_norm": 759.829345703125, + "learning_rate": 1.6671599484296775e-06, + "loss": 64.5642, + "step": 220550 + }, + { + "epoch": 0.8910903089484763, + "grad_norm": 437.42669677734375, + "learning_rate": 1.666043928501957e-06, + "loss": 64.2295, + "step": 220560 + }, + { + "epoch": 0.8911307102138439, + "grad_norm": 743.0881958007812, + "learning_rate": 1.6649282660041487e-06, + "loss": 55.6986, + "step": 220570 + }, + { + "epoch": 0.8911711114792116, + "grad_norm": 955.76123046875, + "learning_rate": 1.6638129609580045e-06, + "loss": 76.3335, + "step": 220580 + }, + { + "epoch": 0.8912115127445792, + "grad_norm": 2209.108154296875, + "learning_rate": 1.6626980133852667e-06, + "loss": 76.759, + "step": 220590 + }, + { + "epoch": 0.8912519140099467, + "grad_norm": 466.2005615234375, + "learning_rate": 1.6615834233076756e-06, + "loss": 55.0248, + "step": 220600 + }, + { + "epoch": 0.8912923152753144, + "grad_norm": 528.9178466796875, + "learning_rate": 1.6604691907469495e-06, + "loss": 39.8318, + "step": 220610 + }, + { + "epoch": 0.891332716540682, + "grad_norm": 375.0162048339844, + "learning_rate": 1.6593553157248243e-06, + "loss": 50.7802, + "step": 220620 + }, + { + "epoch": 0.8913731178060497, + "grad_norm": 253.44577026367188, + "learning_rate": 1.6582417982630072e-06, + "loss": 52.4809, + "step": 220630 + }, + { + "epoch": 0.8914135190714173, + "grad_norm": 305.50836181640625, + "learning_rate": 1.6571286383832097e-06, + "loss": 71.8923, + "step": 220640 + }, + { + "epoch": 0.8914539203367849, + "grad_norm": 1446.7620849609375, + "learning_rate": 1.6560158361071344e-06, + "loss": 82.7373, + "step": 220650 + }, + { + "epoch": 0.8914943216021526, + "grad_norm": 1413.450439453125, + "learning_rate": 1.6549033914564704e-06, + "loss": 63.0672, + "step": 220660 + }, + { + "epoch": 0.8915347228675202, + "grad_norm": 249.8374786376953, + "learning_rate": 1.6537913044529142e-06, + "loss": 59.2668, + "step": 220670 + }, + { + "epoch": 0.8915751241328879, + "grad_norm": 689.407958984375, + "learning_rate": 1.652679575118139e-06, + "loss": 66.0198, + "step": 220680 + }, + { + "epoch": 0.8916155253982555, + "grad_norm": 1332.32470703125, + "learning_rate": 1.6515682034738213e-06, + "loss": 70.4106, + "step": 220690 + }, + { + "epoch": 0.8916559266636231, + "grad_norm": 981.78125, + "learning_rate": 1.6504571895416277e-06, + "loss": 95.8059, + "step": 220700 + }, + { + "epoch": 0.8916963279289908, + "grad_norm": 494.5906066894531, + "learning_rate": 1.649346533343219e-06, + "loss": 53.9419, + "step": 220710 + }, + { + "epoch": 0.8917367291943583, + "grad_norm": 538.2669677734375, + "learning_rate": 1.648236234900249e-06, + "loss": 77.3244, + "step": 220720 + }, + { + "epoch": 0.891777130459726, + "grad_norm": 91.97218322753906, + "learning_rate": 1.6471262942343602e-06, + "loss": 75.7583, + "step": 220730 + }, + { + "epoch": 0.8918175317250936, + "grad_norm": 438.6918640136719, + "learning_rate": 1.646016711367191e-06, + "loss": 66.5238, + "step": 220740 + }, + { + "epoch": 0.8918579329904612, + "grad_norm": 1203.455810546875, + "learning_rate": 1.6449074863203773e-06, + "loss": 77.4733, + "step": 220750 + }, + { + "epoch": 0.8918983342558289, + "grad_norm": 818.682861328125, + "learning_rate": 1.6437986191155442e-06, + "loss": 59.6207, + "step": 220760 + }, + { + "epoch": 0.8919387355211965, + "grad_norm": 606.0189819335938, + "learning_rate": 1.642690109774301e-06, + "loss": 56.968, + "step": 220770 + }, + { + "epoch": 0.8919791367865642, + "grad_norm": 1831.9705810546875, + "learning_rate": 1.6415819583182723e-06, + "loss": 42.9059, + "step": 220780 + }, + { + "epoch": 0.8920195380519318, + "grad_norm": 516.8098754882812, + "learning_rate": 1.6404741647690503e-06, + "loss": 50.4686, + "step": 220790 + }, + { + "epoch": 0.8920599393172994, + "grad_norm": 843.4729614257812, + "learning_rate": 1.6393667291482374e-06, + "loss": 59.3552, + "step": 220800 + }, + { + "epoch": 0.8921003405826671, + "grad_norm": 429.7647399902344, + "learning_rate": 1.6382596514774275e-06, + "loss": 88.9628, + "step": 220810 + }, + { + "epoch": 0.8921407418480347, + "grad_norm": 1324.25341796875, + "learning_rate": 1.6371529317781897e-06, + "loss": 72.2841, + "step": 220820 + }, + { + "epoch": 0.8921811431134024, + "grad_norm": 1379.561767578125, + "learning_rate": 1.6360465700721162e-06, + "loss": 58.9253, + "step": 220830 + }, + { + "epoch": 0.89222154437877, + "grad_norm": 498.8700256347656, + "learning_rate": 1.6349405663807671e-06, + "loss": 82.0088, + "step": 220840 + }, + { + "epoch": 0.8922619456441375, + "grad_norm": 890.2133178710938, + "learning_rate": 1.6338349207257075e-06, + "loss": 79.5608, + "step": 220850 + }, + { + "epoch": 0.8923023469095052, + "grad_norm": 976.6843872070312, + "learning_rate": 1.6327296331284893e-06, + "loss": 58.65, + "step": 220860 + }, + { + "epoch": 0.8923427481748728, + "grad_norm": 424.9386291503906, + "learning_rate": 1.6316247036106636e-06, + "loss": 57.4203, + "step": 220870 + }, + { + "epoch": 0.8923831494402404, + "grad_norm": 719.2518310546875, + "learning_rate": 1.6305201321937758e-06, + "loss": 56.8704, + "step": 220880 + }, + { + "epoch": 0.8924235507056081, + "grad_norm": 943.808349609375, + "learning_rate": 1.6294159188993507e-06, + "loss": 59.7588, + "step": 220890 + }, + { + "epoch": 0.8924639519709757, + "grad_norm": 483.03436279296875, + "learning_rate": 1.6283120637489202e-06, + "loss": 86.4996, + "step": 220900 + }, + { + "epoch": 0.8925043532363434, + "grad_norm": 311.8918151855469, + "learning_rate": 1.6272085667640047e-06, + "loss": 61.9805, + "step": 220910 + }, + { + "epoch": 0.892544754501711, + "grad_norm": 489.3194885253906, + "learning_rate": 1.6261054279661204e-06, + "loss": 89.3326, + "step": 220920 + }, + { + "epoch": 0.8925851557670786, + "grad_norm": 525.5072631835938, + "learning_rate": 1.6250026473767633e-06, + "loss": 64.6445, + "step": 220930 + }, + { + "epoch": 0.8926255570324463, + "grad_norm": 882.951416015625, + "learning_rate": 1.6239002250174473e-06, + "loss": 107.9664, + "step": 220940 + }, + { + "epoch": 0.8926659582978139, + "grad_norm": 616.5293579101562, + "learning_rate": 1.6227981609096488e-06, + "loss": 63.436, + "step": 220950 + }, + { + "epoch": 0.8927063595631816, + "grad_norm": 610.52197265625, + "learning_rate": 1.6216964550748703e-06, + "loss": 67.4082, + "step": 220960 + }, + { + "epoch": 0.8927467608285492, + "grad_norm": 1042.026611328125, + "learning_rate": 1.6205951075345772e-06, + "loss": 85.4354, + "step": 220970 + }, + { + "epoch": 0.8927871620939167, + "grad_norm": 737.819091796875, + "learning_rate": 1.6194941183102431e-06, + "loss": 61.5743, + "step": 220980 + }, + { + "epoch": 0.8928275633592844, + "grad_norm": 680.5140991210938, + "learning_rate": 1.6183934874233399e-06, + "loss": 36.1813, + "step": 220990 + }, + { + "epoch": 0.892867964624652, + "grad_norm": 818.8609619140625, + "learning_rate": 1.6172932148953125e-06, + "loss": 77.0852, + "step": 221000 + }, + { + "epoch": 0.8929083658900197, + "grad_norm": 544.2930908203125, + "learning_rate": 1.6161933007476217e-06, + "loss": 50.3119, + "step": 221010 + }, + { + "epoch": 0.8929487671553873, + "grad_norm": 489.115234375, + "learning_rate": 1.6150937450017035e-06, + "loss": 68.4272, + "step": 221020 + }, + { + "epoch": 0.8929891684207549, + "grad_norm": 1507.3304443359375, + "learning_rate": 1.6139945476790008e-06, + "loss": 69.0073, + "step": 221030 + }, + { + "epoch": 0.8930295696861226, + "grad_norm": 589.3776245117188, + "learning_rate": 1.6128957088009388e-06, + "loss": 68.6411, + "step": 221040 + }, + { + "epoch": 0.8930699709514902, + "grad_norm": 401.0243225097656, + "learning_rate": 1.611797228388945e-06, + "loss": 65.3773, + "step": 221050 + }, + { + "epoch": 0.8931103722168579, + "grad_norm": 993.5067138671875, + "learning_rate": 1.6106991064644284e-06, + "loss": 54.9092, + "step": 221060 + }, + { + "epoch": 0.8931507734822255, + "grad_norm": 830.2800903320312, + "learning_rate": 1.609601343048799e-06, + "loss": 66.8838, + "step": 221070 + }, + { + "epoch": 0.8931911747475931, + "grad_norm": 575.3751831054688, + "learning_rate": 1.6085039381634638e-06, + "loss": 60.6112, + "step": 221080 + }, + { + "epoch": 0.8932315760129608, + "grad_norm": 1344.0823974609375, + "learning_rate": 1.6074068918298036e-06, + "loss": 65.161, + "step": 221090 + }, + { + "epoch": 0.8932719772783284, + "grad_norm": 1332.4002685546875, + "learning_rate": 1.6063102040692258e-06, + "loss": 57.316, + "step": 221100 + }, + { + "epoch": 0.8933123785436959, + "grad_norm": 913.5841064453125, + "learning_rate": 1.605213874903091e-06, + "loss": 73.0439, + "step": 221110 + }, + { + "epoch": 0.8933527798090636, + "grad_norm": 1267.6986083984375, + "learning_rate": 1.6041179043527888e-06, + "loss": 93.9197, + "step": 221120 + }, + { + "epoch": 0.8933931810744312, + "grad_norm": 607.529296875, + "learning_rate": 1.6030222924396754e-06, + "loss": 75.2403, + "step": 221130 + }, + { + "epoch": 0.8934335823397989, + "grad_norm": 1111.951171875, + "learning_rate": 1.601927039185116e-06, + "loss": 71.0258, + "step": 221140 + }, + { + "epoch": 0.8934739836051665, + "grad_norm": 466.9970703125, + "learning_rate": 1.6008321446104625e-06, + "loss": 55.2363, + "step": 221150 + }, + { + "epoch": 0.8935143848705341, + "grad_norm": 821.0440673828125, + "learning_rate": 1.5997376087370508e-06, + "loss": 75.0917, + "step": 221160 + }, + { + "epoch": 0.8935547861359018, + "grad_norm": 1292.1806640625, + "learning_rate": 1.5986434315862376e-06, + "loss": 63.8985, + "step": 221170 + }, + { + "epoch": 0.8935951874012694, + "grad_norm": 562.1430053710938, + "learning_rate": 1.597549613179339e-06, + "loss": 80.6998, + "step": 221180 + }, + { + "epoch": 0.8936355886666371, + "grad_norm": 620.8677978515625, + "learning_rate": 1.5964561535376844e-06, + "loss": 64.9451, + "step": 221190 + }, + { + "epoch": 0.8936759899320047, + "grad_norm": 945.6676025390625, + "learning_rate": 1.5953630526825925e-06, + "loss": 71.5925, + "step": 221200 + }, + { + "epoch": 0.8937163911973723, + "grad_norm": 353.17108154296875, + "learning_rate": 1.5942703106353774e-06, + "loss": 51.0173, + "step": 221210 + }, + { + "epoch": 0.89375679246274, + "grad_norm": 796.9381103515625, + "learning_rate": 1.5931779274173354e-06, + "loss": 91.1763, + "step": 221220 + }, + { + "epoch": 0.8937971937281076, + "grad_norm": 1587.897216796875, + "learning_rate": 1.5920859030497649e-06, + "loss": 91.1535, + "step": 221230 + }, + { + "epoch": 0.8938375949934751, + "grad_norm": 595.0715942382812, + "learning_rate": 1.5909942375539556e-06, + "loss": 76.8349, + "step": 221240 + }, + { + "epoch": 0.8938779962588428, + "grad_norm": 674.66943359375, + "learning_rate": 1.5899029309511926e-06, + "loss": 62.3354, + "step": 221250 + }, + { + "epoch": 0.8939183975242104, + "grad_norm": 718.2734985351562, + "learning_rate": 1.5888119832627546e-06, + "loss": 76.5299, + "step": 221260 + }, + { + "epoch": 0.8939587987895781, + "grad_norm": 895.485595703125, + "learning_rate": 1.587721394509898e-06, + "loss": 87.6935, + "step": 221270 + }, + { + "epoch": 0.8939992000549457, + "grad_norm": 488.78125, + "learning_rate": 1.5866311647138988e-06, + "loss": 57.0768, + "step": 221280 + }, + { + "epoch": 0.8940396013203133, + "grad_norm": 852.343994140625, + "learning_rate": 1.5855412938960023e-06, + "loss": 75.701, + "step": 221290 + }, + { + "epoch": 0.894080002585681, + "grad_norm": 613.3756103515625, + "learning_rate": 1.5844517820774563e-06, + "loss": 87.2781, + "step": 221300 + }, + { + "epoch": 0.8941204038510486, + "grad_norm": 315.58929443359375, + "learning_rate": 1.5833626292795056e-06, + "loss": 47.1506, + "step": 221310 + }, + { + "epoch": 0.8941608051164163, + "grad_norm": 648.8484497070312, + "learning_rate": 1.5822738355233824e-06, + "loss": 96.1449, + "step": 221320 + }, + { + "epoch": 0.8942012063817839, + "grad_norm": 796.3201904296875, + "learning_rate": 1.5811854008303163e-06, + "loss": 78.7771, + "step": 221330 + }, + { + "epoch": 0.8942416076471515, + "grad_norm": 382.81451416015625, + "learning_rate": 1.580097325221519e-06, + "loss": 57.8638, + "step": 221340 + }, + { + "epoch": 0.8942820089125192, + "grad_norm": 367.498046875, + "learning_rate": 1.5790096087182072e-06, + "loss": 64.062, + "step": 221350 + }, + { + "epoch": 0.8943224101778867, + "grad_norm": 740.3942260742188, + "learning_rate": 1.5779222513415882e-06, + "loss": 52.5106, + "step": 221360 + }, + { + "epoch": 0.8943628114432544, + "grad_norm": 479.7514343261719, + "learning_rate": 1.5768352531128629e-06, + "loss": 57.1011, + "step": 221370 + }, + { + "epoch": 0.894403212708622, + "grad_norm": 409.02520751953125, + "learning_rate": 1.5757486140532097e-06, + "loss": 57.2839, + "step": 221380 + }, + { + "epoch": 0.8944436139739896, + "grad_norm": 846.2710571289062, + "learning_rate": 1.5746623341838319e-06, + "loss": 58.3296, + "step": 221390 + }, + { + "epoch": 0.8944840152393573, + "grad_norm": 577.0972290039062, + "learning_rate": 1.5735764135258945e-06, + "loss": 57.6212, + "step": 221400 + }, + { + "epoch": 0.8945244165047249, + "grad_norm": 984.3857421875, + "learning_rate": 1.57249085210057e-06, + "loss": 49.893, + "step": 221410 + }, + { + "epoch": 0.8945648177700926, + "grad_norm": 553.3682250976562, + "learning_rate": 1.5714056499290276e-06, + "loss": 62.492, + "step": 221420 + }, + { + "epoch": 0.8946052190354602, + "grad_norm": 611.9230346679688, + "learning_rate": 1.5703208070324128e-06, + "loss": 70.6202, + "step": 221430 + }, + { + "epoch": 0.8946456203008278, + "grad_norm": 656.7820434570312, + "learning_rate": 1.5692363234318908e-06, + "loss": 58.7497, + "step": 221440 + }, + { + "epoch": 0.8946860215661955, + "grad_norm": 633.5784301757812, + "learning_rate": 1.5681521991485893e-06, + "loss": 74.9866, + "step": 221450 + }, + { + "epoch": 0.8947264228315631, + "grad_norm": 737.6774291992188, + "learning_rate": 1.5670684342036513e-06, + "loss": 74.3245, + "step": 221460 + }, + { + "epoch": 0.8947668240969308, + "grad_norm": 829.914306640625, + "learning_rate": 1.5659850286182043e-06, + "loss": 69.0556, + "step": 221470 + }, + { + "epoch": 0.8948072253622984, + "grad_norm": 868.13232421875, + "learning_rate": 1.5649019824133693e-06, + "loss": 82.2762, + "step": 221480 + }, + { + "epoch": 0.8948476266276659, + "grad_norm": 1032.4337158203125, + "learning_rate": 1.563819295610265e-06, + "loss": 49.6741, + "step": 221490 + }, + { + "epoch": 0.8948880278930336, + "grad_norm": 964.06494140625, + "learning_rate": 1.562736968229992e-06, + "loss": 79.4659, + "step": 221500 + }, + { + "epoch": 0.8949284291584012, + "grad_norm": 685.1739501953125, + "learning_rate": 1.5616550002936515e-06, + "loss": 81.8072, + "step": 221510 + }, + { + "epoch": 0.8949688304237688, + "grad_norm": 610.3966064453125, + "learning_rate": 1.560573391822342e-06, + "loss": 71.6444, + "step": 221520 + }, + { + "epoch": 0.8950092316891365, + "grad_norm": 546.2423095703125, + "learning_rate": 1.5594921428371512e-06, + "loss": 98.7119, + "step": 221530 + }, + { + "epoch": 0.8950496329545041, + "grad_norm": 541.729248046875, + "learning_rate": 1.5584112533591467e-06, + "loss": 71.4115, + "step": 221540 + }, + { + "epoch": 0.8950900342198718, + "grad_norm": 512.8060302734375, + "learning_rate": 1.557330723409416e-06, + "loss": 50.7254, + "step": 221550 + }, + { + "epoch": 0.8951304354852394, + "grad_norm": 394.78643798828125, + "learning_rate": 1.5562505530090155e-06, + "loss": 68.1411, + "step": 221560 + }, + { + "epoch": 0.895170836750607, + "grad_norm": 864.4313354492188, + "learning_rate": 1.555170742179004e-06, + "loss": 65.1117, + "step": 221570 + }, + { + "epoch": 0.8952112380159747, + "grad_norm": 591.1809692382812, + "learning_rate": 1.554091290940436e-06, + "loss": 64.4191, + "step": 221580 + }, + { + "epoch": 0.8952516392813423, + "grad_norm": 1057.6497802734375, + "learning_rate": 1.5530121993143544e-06, + "loss": 88.3042, + "step": 221590 + }, + { + "epoch": 0.89529204054671, + "grad_norm": 490.5820007324219, + "learning_rate": 1.5519334673218023e-06, + "loss": 61.8426, + "step": 221600 + }, + { + "epoch": 0.8953324418120776, + "grad_norm": 725.0401000976562, + "learning_rate": 1.5508550949838008e-06, + "loss": 61.6642, + "step": 221610 + }, + { + "epoch": 0.8953728430774451, + "grad_norm": 678.1658935546875, + "learning_rate": 1.5497770823213754e-06, + "loss": 69.256, + "step": 221620 + }, + { + "epoch": 0.8954132443428128, + "grad_norm": 261.5865478515625, + "learning_rate": 1.5486994293555468e-06, + "loss": 63.5032, + "step": 221630 + }, + { + "epoch": 0.8954536456081804, + "grad_norm": 1552.3807373046875, + "learning_rate": 1.5476221361073206e-06, + "loss": 66.2778, + "step": 221640 + }, + { + "epoch": 0.895494046873548, + "grad_norm": 739.3807983398438, + "learning_rate": 1.5465452025977024e-06, + "loss": 73.2389, + "step": 221650 + }, + { + "epoch": 0.8955344481389157, + "grad_norm": 751.2479858398438, + "learning_rate": 1.5454686288476883e-06, + "loss": 55.5145, + "step": 221660 + }, + { + "epoch": 0.8955748494042833, + "grad_norm": 502.93548583984375, + "learning_rate": 1.5443924148782618e-06, + "loss": 62.4693, + "step": 221670 + }, + { + "epoch": 0.895615250669651, + "grad_norm": 603.1491088867188, + "learning_rate": 1.5433165607104062e-06, + "loss": 74.5281, + "step": 221680 + }, + { + "epoch": 0.8956556519350186, + "grad_norm": 740.739501953125, + "learning_rate": 1.5422410663650978e-06, + "loss": 40.5402, + "step": 221690 + }, + { + "epoch": 0.8956960532003863, + "grad_norm": 521.0811767578125, + "learning_rate": 1.5411659318633e-06, + "loss": 48.4772, + "step": 221700 + }, + { + "epoch": 0.8957364544657539, + "grad_norm": 605.6300048828125, + "learning_rate": 1.5400911572259802e-06, + "loss": 56.4083, + "step": 221710 + }, + { + "epoch": 0.8957768557311215, + "grad_norm": 697.5730590820312, + "learning_rate": 1.5390167424740799e-06, + "loss": 91.3887, + "step": 221720 + }, + { + "epoch": 0.8958172569964892, + "grad_norm": 890.4893188476562, + "learning_rate": 1.5379426876285597e-06, + "loss": 62.1376, + "step": 221730 + }, + { + "epoch": 0.8958576582618568, + "grad_norm": 801.906005859375, + "learning_rate": 1.5368689927103498e-06, + "loss": 63.8843, + "step": 221740 + }, + { + "epoch": 0.8958980595272243, + "grad_norm": 402.1195068359375, + "learning_rate": 1.5357956577403822e-06, + "loss": 52.5954, + "step": 221750 + }, + { + "epoch": 0.895938460792592, + "grad_norm": 671.3176879882812, + "learning_rate": 1.534722682739589e-06, + "loss": 65.9928, + "step": 221760 + }, + { + "epoch": 0.8959788620579596, + "grad_norm": 440.7599182128906, + "learning_rate": 1.5336500677288757e-06, + "loss": 68.1583, + "step": 221770 + }, + { + "epoch": 0.8960192633233273, + "grad_norm": 927.2649536132812, + "learning_rate": 1.532577812729168e-06, + "loss": 60.3403, + "step": 221780 + }, + { + "epoch": 0.8960596645886949, + "grad_norm": 455.891845703125, + "learning_rate": 1.5315059177613622e-06, + "loss": 61.5973, + "step": 221790 + }, + { + "epoch": 0.8961000658540625, + "grad_norm": 965.6763305664062, + "learning_rate": 1.5304343828463553e-06, + "loss": 72.8458, + "step": 221800 + }, + { + "epoch": 0.8961404671194302, + "grad_norm": 831.720458984375, + "learning_rate": 1.529363208005039e-06, + "loss": 62.2753, + "step": 221810 + }, + { + "epoch": 0.8961808683847978, + "grad_norm": 705.921875, + "learning_rate": 1.5282923932582994e-06, + "loss": 69.755, + "step": 221820 + }, + { + "epoch": 0.8962212696501655, + "grad_norm": 544.6253662109375, + "learning_rate": 1.527221938627006e-06, + "loss": 73.1654, + "step": 221830 + }, + { + "epoch": 0.8962616709155331, + "grad_norm": 683.97265625, + "learning_rate": 1.5261518441320312e-06, + "loss": 50.9144, + "step": 221840 + }, + { + "epoch": 0.8963020721809007, + "grad_norm": 962.111572265625, + "learning_rate": 1.525082109794238e-06, + "loss": 72.3431, + "step": 221850 + }, + { + "epoch": 0.8963424734462684, + "grad_norm": 669.9906005859375, + "learning_rate": 1.5240127356344813e-06, + "loss": 62.7091, + "step": 221860 + }, + { + "epoch": 0.8963828747116359, + "grad_norm": 448.8879089355469, + "learning_rate": 1.522943721673611e-06, + "loss": 97.9281, + "step": 221870 + }, + { + "epoch": 0.8964232759770036, + "grad_norm": 655.4896850585938, + "learning_rate": 1.5218750679324567e-06, + "loss": 50.6408, + "step": 221880 + }, + { + "epoch": 0.8964636772423712, + "grad_norm": 1197.3651123046875, + "learning_rate": 1.520806774431871e-06, + "loss": 90.3339, + "step": 221890 + }, + { + "epoch": 0.8965040785077388, + "grad_norm": 346.5458679199219, + "learning_rate": 1.5197388411926661e-06, + "loss": 72.1722, + "step": 221900 + }, + { + "epoch": 0.8965444797731065, + "grad_norm": 870.5877075195312, + "learning_rate": 1.5186712682356674e-06, + "loss": 87.8209, + "step": 221910 + }, + { + "epoch": 0.8965848810384741, + "grad_norm": 510.7125244140625, + "learning_rate": 1.5176040555816851e-06, + "loss": 91.478, + "step": 221920 + }, + { + "epoch": 0.8966252823038418, + "grad_norm": 364.77911376953125, + "learning_rate": 1.5165372032515292e-06, + "loss": 50.6566, + "step": 221930 + }, + { + "epoch": 0.8966656835692094, + "grad_norm": 887.6796264648438, + "learning_rate": 1.5154707112660006e-06, + "loss": 68.4127, + "step": 221940 + }, + { + "epoch": 0.896706084834577, + "grad_norm": 603.9096069335938, + "learning_rate": 1.5144045796458807e-06, + "loss": 46.002, + "step": 221950 + }, + { + "epoch": 0.8967464860999447, + "grad_norm": 869.3789672851562, + "learning_rate": 1.5133388084119616e-06, + "loss": 65.7907, + "step": 221960 + }, + { + "epoch": 0.8967868873653123, + "grad_norm": 606.91796875, + "learning_rate": 1.5122733975850224e-06, + "loss": 57.2866, + "step": 221970 + }, + { + "epoch": 0.89682728863068, + "grad_norm": 1323.509033203125, + "learning_rate": 1.511208347185833e-06, + "loss": 58.7704, + "step": 221980 + }, + { + "epoch": 0.8968676898960476, + "grad_norm": 829.470458984375, + "learning_rate": 1.510143657235148e-06, + "loss": 84.5734, + "step": 221990 + }, + { + "epoch": 0.8969080911614151, + "grad_norm": 898.7881469726562, + "learning_rate": 1.5090793277537396e-06, + "loss": 83.455, + "step": 222000 + }, + { + "epoch": 0.8969484924267828, + "grad_norm": 440.84722900390625, + "learning_rate": 1.5080153587623447e-06, + "loss": 45.8367, + "step": 222010 + }, + { + "epoch": 0.8969888936921504, + "grad_norm": 538.4140625, + "learning_rate": 1.5069517502817111e-06, + "loss": 68.8327, + "step": 222020 + }, + { + "epoch": 0.897029294957518, + "grad_norm": 534.7180786132812, + "learning_rate": 1.5058885023325797e-06, + "loss": 44.6647, + "step": 222030 + }, + { + "epoch": 0.8970696962228857, + "grad_norm": 691.2738037109375, + "learning_rate": 1.5048256149356632e-06, + "loss": 54.0792, + "step": 222040 + }, + { + "epoch": 0.8971100974882533, + "grad_norm": 1128.468505859375, + "learning_rate": 1.5037630881117027e-06, + "loss": 68.7071, + "step": 222050 + }, + { + "epoch": 0.897150498753621, + "grad_norm": 586.9756469726562, + "learning_rate": 1.5027009218813993e-06, + "loss": 64.9544, + "step": 222060 + }, + { + "epoch": 0.8971909000189886, + "grad_norm": 329.9964294433594, + "learning_rate": 1.501639116265463e-06, + "loss": 35.3646, + "step": 222070 + }, + { + "epoch": 0.8972313012843562, + "grad_norm": 903.369873046875, + "learning_rate": 1.5005776712845976e-06, + "loss": 46.9833, + "step": 222080 + }, + { + "epoch": 0.8972717025497239, + "grad_norm": 943.786865234375, + "learning_rate": 1.499516586959493e-06, + "loss": 40.1359, + "step": 222090 + }, + { + "epoch": 0.8973121038150915, + "grad_norm": 712.5819702148438, + "learning_rate": 1.4984558633108414e-06, + "loss": 85.1926, + "step": 222100 + }, + { + "epoch": 0.8973525050804592, + "grad_norm": 1092.22021484375, + "learning_rate": 1.4973955003593154e-06, + "loss": 74.4683, + "step": 222110 + }, + { + "epoch": 0.8973929063458268, + "grad_norm": 506.7978210449219, + "learning_rate": 1.4963354981255896e-06, + "loss": 51.8598, + "step": 222120 + }, + { + "epoch": 0.8974333076111943, + "grad_norm": 1611.95654296875, + "learning_rate": 1.4952758566303272e-06, + "loss": 76.1535, + "step": 222130 + }, + { + "epoch": 0.897473708876562, + "grad_norm": 438.1163024902344, + "learning_rate": 1.4942165758941918e-06, + "loss": 76.6385, + "step": 222140 + }, + { + "epoch": 0.8975141101419296, + "grad_norm": 811.18603515625, + "learning_rate": 1.4931576559378313e-06, + "loss": 76.8861, + "step": 222150 + }, + { + "epoch": 0.8975545114072973, + "grad_norm": 838.0398559570312, + "learning_rate": 1.4920990967818916e-06, + "loss": 60.8003, + "step": 222160 + }, + { + "epoch": 0.8975949126726649, + "grad_norm": 802.9805297851562, + "learning_rate": 1.4910408984470072e-06, + "loss": 57.2776, + "step": 222170 + }, + { + "epoch": 0.8976353139380325, + "grad_norm": 333.6586608886719, + "learning_rate": 1.489983060953808e-06, + "loss": 87.7012, + "step": 222180 + }, + { + "epoch": 0.8976757152034002, + "grad_norm": 637.074951171875, + "learning_rate": 1.48892558432292e-06, + "loss": 86.0378, + "step": 222190 + }, + { + "epoch": 0.8977161164687678, + "grad_norm": 739.9834594726562, + "learning_rate": 1.4878684685749579e-06, + "loss": 67.2963, + "step": 222200 + }, + { + "epoch": 0.8977565177341355, + "grad_norm": 730.9069213867188, + "learning_rate": 1.4868117137305339e-06, + "loss": 61.7846, + "step": 222210 + }, + { + "epoch": 0.8977969189995031, + "grad_norm": 920.531982421875, + "learning_rate": 1.485755319810238e-06, + "loss": 41.0014, + "step": 222220 + }, + { + "epoch": 0.8978373202648707, + "grad_norm": 585.5343627929688, + "learning_rate": 1.4846992868346833e-06, + "loss": 38.0949, + "step": 222230 + }, + { + "epoch": 0.8978777215302384, + "grad_norm": 2511.248046875, + "learning_rate": 1.4836436148244437e-06, + "loss": 61.5334, + "step": 222240 + }, + { + "epoch": 0.897918122795606, + "grad_norm": 1512.885009765625, + "learning_rate": 1.4825883038001054e-06, + "loss": 79.5837, + "step": 222250 + }, + { + "epoch": 0.8979585240609735, + "grad_norm": 758.6636962890625, + "learning_rate": 1.4815333537822407e-06, + "loss": 63.1573, + "step": 222260 + }, + { + "epoch": 0.8979989253263412, + "grad_norm": 1718.753173828125, + "learning_rate": 1.4804787647914198e-06, + "loss": 105.2347, + "step": 222270 + }, + { + "epoch": 0.8980393265917088, + "grad_norm": 701.3446655273438, + "learning_rate": 1.4794245368481996e-06, + "loss": 47.2546, + "step": 222280 + }, + { + "epoch": 0.8980797278570765, + "grad_norm": 1087.1846923828125, + "learning_rate": 1.4783706699731304e-06, + "loss": 69.8913, + "step": 222290 + }, + { + "epoch": 0.8981201291224441, + "grad_norm": 1200.8665771484375, + "learning_rate": 1.4773171641867623e-06, + "loss": 88.3109, + "step": 222300 + }, + { + "epoch": 0.8981605303878117, + "grad_norm": 270.22698974609375, + "learning_rate": 1.47626401950963e-06, + "loss": 48.869, + "step": 222310 + }, + { + "epoch": 0.8982009316531794, + "grad_norm": 438.6893005371094, + "learning_rate": 1.4752112359622727e-06, + "loss": 51.0817, + "step": 222320 + }, + { + "epoch": 0.898241332918547, + "grad_norm": 428.5312805175781, + "learning_rate": 1.4741588135652007e-06, + "loss": 56.9726, + "step": 222330 + }, + { + "epoch": 0.8982817341839147, + "grad_norm": 1029.17529296875, + "learning_rate": 1.4731067523389486e-06, + "loss": 50.1861, + "step": 222340 + }, + { + "epoch": 0.8983221354492823, + "grad_norm": 1367.995849609375, + "learning_rate": 1.4720550523040156e-06, + "loss": 72.7565, + "step": 222350 + }, + { + "epoch": 0.8983625367146499, + "grad_norm": 552.6217651367188, + "learning_rate": 1.4710037134809074e-06, + "loss": 69.3481, + "step": 222360 + }, + { + "epoch": 0.8984029379800176, + "grad_norm": 1489.939208984375, + "learning_rate": 1.4699527358901256e-06, + "loss": 79.3745, + "step": 222370 + }, + { + "epoch": 0.8984433392453852, + "grad_norm": 1292.38037109375, + "learning_rate": 1.4689021195521469e-06, + "loss": 104.7945, + "step": 222380 + }, + { + "epoch": 0.8984837405107527, + "grad_norm": 867.2813110351562, + "learning_rate": 1.4678518644874684e-06, + "loss": 88.4666, + "step": 222390 + }, + { + "epoch": 0.8985241417761204, + "grad_norm": 788.9912719726562, + "learning_rate": 1.4668019707165581e-06, + "loss": 47.6423, + "step": 222400 + }, + { + "epoch": 0.898564543041488, + "grad_norm": 555.4107666015625, + "learning_rate": 1.465752438259882e-06, + "loss": 76.2173, + "step": 222410 + }, + { + "epoch": 0.8986049443068557, + "grad_norm": 328.09814453125, + "learning_rate": 1.4647032671379059e-06, + "loss": 49.1068, + "step": 222420 + }, + { + "epoch": 0.8986453455722233, + "grad_norm": 1082.3272705078125, + "learning_rate": 1.4636544573710842e-06, + "loss": 67.7167, + "step": 222430 + }, + { + "epoch": 0.898685746837591, + "grad_norm": 566.7816772460938, + "learning_rate": 1.462606008979861e-06, + "loss": 29.8075, + "step": 222440 + }, + { + "epoch": 0.8987261481029586, + "grad_norm": 505.9683837890625, + "learning_rate": 1.4615579219846754e-06, + "loss": 42.4321, + "step": 222450 + }, + { + "epoch": 0.8987665493683262, + "grad_norm": 1267.645263671875, + "learning_rate": 1.460510196405962e-06, + "loss": 55.2721, + "step": 222460 + }, + { + "epoch": 0.8988069506336939, + "grad_norm": 675.2928466796875, + "learning_rate": 1.4594628322641492e-06, + "loss": 93.4545, + "step": 222470 + }, + { + "epoch": 0.8988473518990615, + "grad_norm": 461.63043212890625, + "learning_rate": 1.4584158295796535e-06, + "loss": 44.9919, + "step": 222480 + }, + { + "epoch": 0.8988877531644291, + "grad_norm": 1068.450439453125, + "learning_rate": 1.4573691883728836e-06, + "loss": 70.255, + "step": 222490 + }, + { + "epoch": 0.8989281544297968, + "grad_norm": 808.7091674804688, + "learning_rate": 1.4563229086642538e-06, + "loss": 58.3941, + "step": 222500 + }, + { + "epoch": 0.8989685556951643, + "grad_norm": 620.4358520507812, + "learning_rate": 1.4552769904741526e-06, + "loss": 70.4419, + "step": 222510 + }, + { + "epoch": 0.899008956960532, + "grad_norm": 592.838623046875, + "learning_rate": 1.4542314338229723e-06, + "loss": 82.3163, + "step": 222520 + }, + { + "epoch": 0.8990493582258996, + "grad_norm": 413.5105285644531, + "learning_rate": 1.453186238731099e-06, + "loss": 76.5434, + "step": 222530 + }, + { + "epoch": 0.8990897594912672, + "grad_norm": 380.1504211425781, + "learning_rate": 1.4521414052189097e-06, + "loss": 66.9069, + "step": 222540 + }, + { + "epoch": 0.8991301607566349, + "grad_norm": 793.7691040039062, + "learning_rate": 1.4510969333067747e-06, + "loss": 54.4114, + "step": 222550 + }, + { + "epoch": 0.8991705620220025, + "grad_norm": 691.017822265625, + "learning_rate": 1.4500528230150534e-06, + "loss": 73.2005, + "step": 222560 + }, + { + "epoch": 0.8992109632873702, + "grad_norm": 504.4805908203125, + "learning_rate": 1.4490090743641006e-06, + "loss": 46.4006, + "step": 222570 + }, + { + "epoch": 0.8992513645527378, + "grad_norm": 2081.857421875, + "learning_rate": 1.4479656873742665e-06, + "loss": 56.7575, + "step": 222580 + }, + { + "epoch": 0.8992917658181054, + "grad_norm": 952.9689331054688, + "learning_rate": 1.4469226620658972e-06, + "loss": 80.8941, + "step": 222590 + }, + { + "epoch": 0.8993321670834731, + "grad_norm": 1095.5927734375, + "learning_rate": 1.445879998459314e-06, + "loss": 70.3145, + "step": 222600 + }, + { + "epoch": 0.8993725683488407, + "grad_norm": 624.5864868164062, + "learning_rate": 1.4448376965748612e-06, + "loss": 84.8981, + "step": 222610 + }, + { + "epoch": 0.8994129696142084, + "grad_norm": 896.3707885742188, + "learning_rate": 1.4437957564328441e-06, + "loss": 67.9274, + "step": 222620 + }, + { + "epoch": 0.899453370879576, + "grad_norm": 1085.0633544921875, + "learning_rate": 1.4427541780535848e-06, + "loss": 50.4234, + "step": 222630 + }, + { + "epoch": 0.8994937721449435, + "grad_norm": 1233.7166748046875, + "learning_rate": 1.441712961457389e-06, + "loss": 74.988, + "step": 222640 + }, + { + "epoch": 0.8995341734103112, + "grad_norm": 258.4087219238281, + "learning_rate": 1.4406721066645468e-06, + "loss": 55.429, + "step": 222650 + }, + { + "epoch": 0.8995745746756788, + "grad_norm": 571.8363037109375, + "learning_rate": 1.4396316136953647e-06, + "loss": 73.0087, + "step": 222660 + }, + { + "epoch": 0.8996149759410464, + "grad_norm": 686.3413696289062, + "learning_rate": 1.4385914825701109e-06, + "loss": 53.1978, + "step": 222670 + }, + { + "epoch": 0.8996553772064141, + "grad_norm": 632.16943359375, + "learning_rate": 1.4375517133090799e-06, + "loss": 77.9788, + "step": 222680 + }, + { + "epoch": 0.8996957784717817, + "grad_norm": 798.5208129882812, + "learning_rate": 1.4365123059325314e-06, + "loss": 74.8441, + "step": 222690 + }, + { + "epoch": 0.8997361797371494, + "grad_norm": 577.5982666015625, + "learning_rate": 1.4354732604607335e-06, + "loss": 87.5857, + "step": 222700 + }, + { + "epoch": 0.899776581002517, + "grad_norm": 3161.21240234375, + "learning_rate": 1.4344345769139456e-06, + "loss": 119.7087, + "step": 222710 + }, + { + "epoch": 0.8998169822678846, + "grad_norm": 362.800048828125, + "learning_rate": 1.4333962553124092e-06, + "loss": 74.3041, + "step": 222720 + }, + { + "epoch": 0.8998573835332523, + "grad_norm": 509.8194580078125, + "learning_rate": 1.4323582956763726e-06, + "loss": 55.0121, + "step": 222730 + }, + { + "epoch": 0.8998977847986199, + "grad_norm": 486.101318359375, + "learning_rate": 1.4313206980260686e-06, + "loss": 54.9921, + "step": 222740 + }, + { + "epoch": 0.8999381860639876, + "grad_norm": 426.1357727050781, + "learning_rate": 1.4302834623817296e-06, + "loss": 44.8287, + "step": 222750 + }, + { + "epoch": 0.8999785873293552, + "grad_norm": 249.80726623535156, + "learning_rate": 1.4292465887635731e-06, + "loss": 61.2572, + "step": 222760 + }, + { + "epoch": 0.9000189885947227, + "grad_norm": 615.3236694335938, + "learning_rate": 1.4282100771918184e-06, + "loss": 52.0428, + "step": 222770 + }, + { + "epoch": 0.9000593898600904, + "grad_norm": 447.8368225097656, + "learning_rate": 1.427173927686667e-06, + "loss": 47.0896, + "step": 222780 + }, + { + "epoch": 0.900099791125458, + "grad_norm": 541.9323120117188, + "learning_rate": 1.4261381402683227e-06, + "loss": 61.3765, + "step": 222790 + }, + { + "epoch": 0.9001401923908257, + "grad_norm": 746.8529663085938, + "learning_rate": 1.4251027149569808e-06, + "loss": 51.9973, + "step": 222800 + }, + { + "epoch": 0.9001805936561933, + "grad_norm": 542.7180786132812, + "learning_rate": 1.424067651772818e-06, + "loss": 72.9436, + "step": 222810 + }, + { + "epoch": 0.9002209949215609, + "grad_norm": 688.0560913085938, + "learning_rate": 1.4230329507360251e-06, + "loss": 42.6149, + "step": 222820 + }, + { + "epoch": 0.9002613961869286, + "grad_norm": 487.5868225097656, + "learning_rate": 1.421998611866764e-06, + "loss": 62.1073, + "step": 222830 + }, + { + "epoch": 0.9003017974522962, + "grad_norm": 232.54092407226562, + "learning_rate": 1.420964635185209e-06, + "loss": 37.0399, + "step": 222840 + }, + { + "epoch": 0.9003421987176639, + "grad_norm": 959.6676635742188, + "learning_rate": 1.4199310207115113e-06, + "loss": 79.0651, + "step": 222850 + }, + { + "epoch": 0.9003825999830315, + "grad_norm": 824.2658081054688, + "learning_rate": 1.4188977684658256e-06, + "loss": 85.9762, + "step": 222860 + }, + { + "epoch": 0.9004230012483991, + "grad_norm": 568.0551147460938, + "learning_rate": 1.4178648784682914e-06, + "loss": 57.8951, + "step": 222870 + }, + { + "epoch": 0.9004634025137668, + "grad_norm": 742.1373901367188, + "learning_rate": 1.4168323507390525e-06, + "loss": 80.2307, + "step": 222880 + }, + { + "epoch": 0.9005038037791344, + "grad_norm": 460.93914794921875, + "learning_rate": 1.4158001852982329e-06, + "loss": 107.2461, + "step": 222890 + }, + { + "epoch": 0.9005442050445019, + "grad_norm": 388.5916748046875, + "learning_rate": 1.4147683821659542e-06, + "loss": 84.5374, + "step": 222900 + }, + { + "epoch": 0.9005846063098696, + "grad_norm": 477.6562805175781, + "learning_rate": 1.4137369413623336e-06, + "loss": 48.2681, + "step": 222910 + }, + { + "epoch": 0.9006250075752372, + "grad_norm": 876.147216796875, + "learning_rate": 1.412705862907482e-06, + "loss": 63.8501, + "step": 222920 + }, + { + "epoch": 0.9006654088406049, + "grad_norm": 731.41943359375, + "learning_rate": 1.4116751468215006e-06, + "loss": 104.1399, + "step": 222930 + }, + { + "epoch": 0.9007058101059725, + "grad_norm": 317.22381591796875, + "learning_rate": 1.4106447931244781e-06, + "loss": 72.1604, + "step": 222940 + }, + { + "epoch": 0.9007462113713401, + "grad_norm": 691.7069091796875, + "learning_rate": 1.4096148018365097e-06, + "loss": 61.813, + "step": 222950 + }, + { + "epoch": 0.9007866126367078, + "grad_norm": 618.0923461914062, + "learning_rate": 1.4085851729776701e-06, + "loss": 76.2778, + "step": 222960 + }, + { + "epoch": 0.9008270139020754, + "grad_norm": 428.1542053222656, + "learning_rate": 1.4075559065680322e-06, + "loss": 60.1169, + "step": 222970 + }, + { + "epoch": 0.9008674151674431, + "grad_norm": 912.5491333007812, + "learning_rate": 1.406527002627669e-06, + "loss": 67.6751, + "step": 222980 + }, + { + "epoch": 0.9009078164328107, + "grad_norm": 819.9051513671875, + "learning_rate": 1.4054984611766288e-06, + "loss": 51.6866, + "step": 222990 + }, + { + "epoch": 0.9009482176981783, + "grad_norm": 689.966064453125, + "learning_rate": 1.4044702822349731e-06, + "loss": 60.235, + "step": 223000 + }, + { + "epoch": 0.900988618963546, + "grad_norm": 900.9361572265625, + "learning_rate": 1.403442465822742e-06, + "loss": 62.0173, + "step": 223010 + }, + { + "epoch": 0.9010290202289136, + "grad_norm": 762.463623046875, + "learning_rate": 1.4024150119599721e-06, + "loss": 63.4135, + "step": 223020 + }, + { + "epoch": 0.9010694214942812, + "grad_norm": 471.5097961425781, + "learning_rate": 1.401387920666697e-06, + "loss": 65.2816, + "step": 223030 + }, + { + "epoch": 0.9011098227596488, + "grad_norm": 388.55047607421875, + "learning_rate": 1.4003611919629423e-06, + "loss": 72.6597, + "step": 223040 + }, + { + "epoch": 0.9011502240250164, + "grad_norm": 805.3184814453125, + "learning_rate": 1.3993348258687211e-06, + "loss": 55.1166, + "step": 223050 + }, + { + "epoch": 0.9011906252903841, + "grad_norm": 837.434326171875, + "learning_rate": 1.398308822404042e-06, + "loss": 63.9727, + "step": 223060 + }, + { + "epoch": 0.9012310265557517, + "grad_norm": 783.3877563476562, + "learning_rate": 1.3972831815889088e-06, + "loss": 78.5174, + "step": 223070 + }, + { + "epoch": 0.9012714278211194, + "grad_norm": 982.860595703125, + "learning_rate": 1.3962579034433167e-06, + "loss": 87.7101, + "step": 223080 + }, + { + "epoch": 0.901311829086487, + "grad_norm": 799.5336303710938, + "learning_rate": 1.3952329879872583e-06, + "loss": 60.702, + "step": 223090 + }, + { + "epoch": 0.9013522303518546, + "grad_norm": 850.2606811523438, + "learning_rate": 1.394208435240707e-06, + "loss": 72.6989, + "step": 223100 + }, + { + "epoch": 0.9013926316172223, + "grad_norm": 335.2709045410156, + "learning_rate": 1.3931842452236444e-06, + "loss": 75.7172, + "step": 223110 + }, + { + "epoch": 0.9014330328825899, + "grad_norm": 1114.275146484375, + "learning_rate": 1.3921604179560343e-06, + "loss": 75.2633, + "step": 223120 + }, + { + "epoch": 0.9014734341479576, + "grad_norm": 714.2669067382812, + "learning_rate": 1.3911369534578344e-06, + "loss": 63.1604, + "step": 223130 + }, + { + "epoch": 0.9015138354133252, + "grad_norm": 623.8490600585938, + "learning_rate": 1.3901138517490043e-06, + "loss": 78.5972, + "step": 223140 + }, + { + "epoch": 0.9015542366786927, + "grad_norm": 256.2215881347656, + "learning_rate": 1.3890911128494789e-06, + "loss": 63.1528, + "step": 223150 + }, + { + "epoch": 0.9015946379440604, + "grad_norm": 578.7676391601562, + "learning_rate": 1.3880687367792089e-06, + "loss": 49.3024, + "step": 223160 + }, + { + "epoch": 0.901635039209428, + "grad_norm": 503.7436828613281, + "learning_rate": 1.3870467235581208e-06, + "loss": 70.371, + "step": 223170 + }, + { + "epoch": 0.9016754404747956, + "grad_norm": 380.93609619140625, + "learning_rate": 1.3860250732061364e-06, + "loss": 71.8928, + "step": 223180 + }, + { + "epoch": 0.9017158417401633, + "grad_norm": 614.88623046875, + "learning_rate": 1.3850037857431775e-06, + "loss": 91.0353, + "step": 223190 + }, + { + "epoch": 0.9017562430055309, + "grad_norm": 1178.55126953125, + "learning_rate": 1.3839828611891547e-06, + "loss": 75.3983, + "step": 223200 + }, + { + "epoch": 0.9017966442708986, + "grad_norm": 657.9109497070312, + "learning_rate": 1.382962299563968e-06, + "loss": 74.8668, + "step": 223210 + }, + { + "epoch": 0.9018370455362662, + "grad_norm": 1002.372802734375, + "learning_rate": 1.3819421008875212e-06, + "loss": 103.2476, + "step": 223220 + }, + { + "epoch": 0.9018774468016338, + "grad_norm": 928.9434814453125, + "learning_rate": 1.3809222651796938e-06, + "loss": 81.121, + "step": 223230 + }, + { + "epoch": 0.9019178480670015, + "grad_norm": 382.8856201171875, + "learning_rate": 1.3799027924603724e-06, + "loss": 46.1385, + "step": 223240 + }, + { + "epoch": 0.9019582493323691, + "grad_norm": 894.6405639648438, + "learning_rate": 1.3788836827494367e-06, + "loss": 93.452, + "step": 223250 + }, + { + "epoch": 0.9019986505977368, + "grad_norm": 702.4419555664062, + "learning_rate": 1.377864936066744e-06, + "loss": 40.2199, + "step": 223260 + }, + { + "epoch": 0.9020390518631044, + "grad_norm": 381.83245849609375, + "learning_rate": 1.3768465524321672e-06, + "loss": 50.9946, + "step": 223270 + }, + { + "epoch": 0.9020794531284719, + "grad_norm": 443.68927001953125, + "learning_rate": 1.3758285318655485e-06, + "loss": 58.1701, + "step": 223280 + }, + { + "epoch": 0.9021198543938396, + "grad_norm": 1047.51904296875, + "learning_rate": 1.3748108743867472e-06, + "loss": 62.9302, + "step": 223290 + }, + { + "epoch": 0.9021602556592072, + "grad_norm": 472.6526794433594, + "learning_rate": 1.3737935800155944e-06, + "loss": 58.2637, + "step": 223300 + }, + { + "epoch": 0.9022006569245749, + "grad_norm": 427.49053955078125, + "learning_rate": 1.3727766487719253e-06, + "loss": 69.3146, + "step": 223310 + }, + { + "epoch": 0.9022410581899425, + "grad_norm": 687.09765625, + "learning_rate": 1.3717600806755682e-06, + "loss": 48.4614, + "step": 223320 + }, + { + "epoch": 0.9022814594553101, + "grad_norm": 503.7699890136719, + "learning_rate": 1.3707438757463344e-06, + "loss": 50.5717, + "step": 223330 + }, + { + "epoch": 0.9023218607206778, + "grad_norm": 605.9425048828125, + "learning_rate": 1.3697280340040408e-06, + "loss": 66.4546, + "step": 223340 + }, + { + "epoch": 0.9023622619860454, + "grad_norm": 1120.4403076171875, + "learning_rate": 1.36871255546849e-06, + "loss": 46.7314, + "step": 223350 + }, + { + "epoch": 0.902402663251413, + "grad_norm": 262.0587463378906, + "learning_rate": 1.3676974401594811e-06, + "loss": 72.5275, + "step": 223360 + }, + { + "epoch": 0.9024430645167807, + "grad_norm": 583.3543701171875, + "learning_rate": 1.3666826880968032e-06, + "loss": 56.2323, + "step": 223370 + }, + { + "epoch": 0.9024834657821483, + "grad_norm": 542.0613403320312, + "learning_rate": 1.3656682993002423e-06, + "loss": 60.9644, + "step": 223380 + }, + { + "epoch": 0.902523867047516, + "grad_norm": 1933.28125, + "learning_rate": 1.3646542737895673e-06, + "loss": 79.6699, + "step": 223390 + }, + { + "epoch": 0.9025642683128836, + "grad_norm": 517.1232299804688, + "learning_rate": 1.3636406115845512e-06, + "loss": 67.476, + "step": 223400 + }, + { + "epoch": 0.9026046695782511, + "grad_norm": 980.044921875, + "learning_rate": 1.3626273127049606e-06, + "loss": 70.6065, + "step": 223410 + }, + { + "epoch": 0.9026450708436188, + "grad_norm": 452.0819091796875, + "learning_rate": 1.3616143771705393e-06, + "loss": 75.4096, + "step": 223420 + }, + { + "epoch": 0.9026854721089864, + "grad_norm": 491.7975769042969, + "learning_rate": 1.3606018050010472e-06, + "loss": 78.8138, + "step": 223430 + }, + { + "epoch": 0.9027258733743541, + "grad_norm": 441.1270446777344, + "learning_rate": 1.359589596216213e-06, + "loss": 73.7608, + "step": 223440 + }, + { + "epoch": 0.9027662746397217, + "grad_norm": 951.4102172851562, + "learning_rate": 1.3585777508357811e-06, + "loss": 75.1027, + "step": 223450 + }, + { + "epoch": 0.9028066759050893, + "grad_norm": 189.31875610351562, + "learning_rate": 1.357566268879471e-06, + "loss": 60.4211, + "step": 223460 + }, + { + "epoch": 0.902847077170457, + "grad_norm": 1534.732421875, + "learning_rate": 1.356555150367005e-06, + "loss": 89.9265, + "step": 223470 + }, + { + "epoch": 0.9028874784358246, + "grad_norm": 657.0514526367188, + "learning_rate": 1.355544395318098e-06, + "loss": 71.7766, + "step": 223480 + }, + { + "epoch": 0.9029278797011923, + "grad_norm": 225.95887756347656, + "learning_rate": 1.3545340037524479e-06, + "loss": 52.7945, + "step": 223490 + }, + { + "epoch": 0.9029682809665599, + "grad_norm": 443.5253601074219, + "learning_rate": 1.3535239756897566e-06, + "loss": 59.413, + "step": 223500 + }, + { + "epoch": 0.9030086822319275, + "grad_norm": 741.3661499023438, + "learning_rate": 1.3525143111497174e-06, + "loss": 69.0169, + "step": 223510 + }, + { + "epoch": 0.9030490834972952, + "grad_norm": 618.5284423828125, + "learning_rate": 1.3515050101520099e-06, + "loss": 46.7761, + "step": 223520 + }, + { + "epoch": 0.9030894847626628, + "grad_norm": 1214.7528076171875, + "learning_rate": 1.3504960727163118e-06, + "loss": 78.7897, + "step": 223530 + }, + { + "epoch": 0.9031298860280303, + "grad_norm": 700.9738159179688, + "learning_rate": 1.3494874988623008e-06, + "loss": 76.3542, + "step": 223540 + }, + { + "epoch": 0.903170287293398, + "grad_norm": 295.2047119140625, + "learning_rate": 1.3484792886096275e-06, + "loss": 50.47, + "step": 223550 + }, + { + "epoch": 0.9032106885587656, + "grad_norm": 676.2153930664062, + "learning_rate": 1.3474714419779545e-06, + "loss": 55.1653, + "step": 223560 + }, + { + "epoch": 0.9032510898241333, + "grad_norm": 476.6521911621094, + "learning_rate": 1.3464639589869256e-06, + "loss": 42.6802, + "step": 223570 + }, + { + "epoch": 0.9032914910895009, + "grad_norm": 1053.2906494140625, + "learning_rate": 1.3454568396561874e-06, + "loss": 52.1616, + "step": 223580 + }, + { + "epoch": 0.9033318923548685, + "grad_norm": 639.0435180664062, + "learning_rate": 1.3444500840053753e-06, + "loss": 64.047, + "step": 223590 + }, + { + "epoch": 0.9033722936202362, + "grad_norm": 524.2411499023438, + "learning_rate": 1.3434436920541072e-06, + "loss": 65.5631, + "step": 223600 + }, + { + "epoch": 0.9034126948856038, + "grad_norm": 627.8121337890625, + "learning_rate": 1.342437663822016e-06, + "loss": 68.1433, + "step": 223610 + }, + { + "epoch": 0.9034530961509715, + "grad_norm": 294.93292236328125, + "learning_rate": 1.341431999328704e-06, + "loss": 100.4094, + "step": 223620 + }, + { + "epoch": 0.9034934974163391, + "grad_norm": 608.6337890625, + "learning_rate": 1.340426698593782e-06, + "loss": 58.5181, + "step": 223630 + }, + { + "epoch": 0.9035338986817067, + "grad_norm": 1339.6729736328125, + "learning_rate": 1.3394217616368478e-06, + "loss": 89.584, + "step": 223640 + }, + { + "epoch": 0.9035742999470744, + "grad_norm": 306.8447570800781, + "learning_rate": 1.3384171884774967e-06, + "loss": 78.0323, + "step": 223650 + }, + { + "epoch": 0.903614701212442, + "grad_norm": 675.7647705078125, + "learning_rate": 1.3374129791353064e-06, + "loss": 53.6586, + "step": 223660 + }, + { + "epoch": 0.9036551024778096, + "grad_norm": 1123.623046875, + "learning_rate": 1.336409133629859e-06, + "loss": 56.7409, + "step": 223670 + }, + { + "epoch": 0.9036955037431772, + "grad_norm": 594.0226440429688, + "learning_rate": 1.3354056519807236e-06, + "loss": 62.7171, + "step": 223680 + }, + { + "epoch": 0.9037359050085448, + "grad_norm": 533.5323486328125, + "learning_rate": 1.334402534207464e-06, + "loss": 71.9335, + "step": 223690 + }, + { + "epoch": 0.9037763062739125, + "grad_norm": 1163.5810546875, + "learning_rate": 1.3333997803296405e-06, + "loss": 58.1065, + "step": 223700 + }, + { + "epoch": 0.9038167075392801, + "grad_norm": 371.7931213378906, + "learning_rate": 1.332397390366793e-06, + "loss": 82.3661, + "step": 223710 + }, + { + "epoch": 0.9038571088046478, + "grad_norm": 816.8228149414062, + "learning_rate": 1.3313953643384748e-06, + "loss": 75.9711, + "step": 223720 + }, + { + "epoch": 0.9038975100700154, + "grad_norm": 561.39892578125, + "learning_rate": 1.33039370226421e-06, + "loss": 120.986, + "step": 223730 + }, + { + "epoch": 0.903937911335383, + "grad_norm": 813.5474853515625, + "learning_rate": 1.3293924041635342e-06, + "loss": 52.8644, + "step": 223740 + }, + { + "epoch": 0.9039783126007507, + "grad_norm": 1089.0355224609375, + "learning_rate": 1.3283914700559675e-06, + "loss": 61.9126, + "step": 223750 + }, + { + "epoch": 0.9040187138661183, + "grad_norm": 517.533935546875, + "learning_rate": 1.3273908999610164e-06, + "loss": 67.3425, + "step": 223760 + }, + { + "epoch": 0.904059115131486, + "grad_norm": 981.814208984375, + "learning_rate": 1.3263906938981985e-06, + "loss": 72.9926, + "step": 223770 + }, + { + "epoch": 0.9040995163968536, + "grad_norm": 510.303955078125, + "learning_rate": 1.325390851887005e-06, + "loss": 59.7582, + "step": 223780 + }, + { + "epoch": 0.9041399176622211, + "grad_norm": 813.87451171875, + "learning_rate": 1.3243913739469317e-06, + "loss": 75.7813, + "step": 223790 + }, + { + "epoch": 0.9041803189275888, + "grad_norm": 706.9825439453125, + "learning_rate": 1.3233922600974647e-06, + "loss": 52.5728, + "step": 223800 + }, + { + "epoch": 0.9042207201929564, + "grad_norm": 621.7930908203125, + "learning_rate": 1.3223935103580798e-06, + "loss": 74.3231, + "step": 223810 + }, + { + "epoch": 0.904261121458324, + "grad_norm": 969.4845581054688, + "learning_rate": 1.3213951247482503e-06, + "loss": 66.6424, + "step": 223820 + }, + { + "epoch": 0.9043015227236917, + "grad_norm": 2133.521484375, + "learning_rate": 1.3203971032874386e-06, + "loss": 77.5846, + "step": 223830 + }, + { + "epoch": 0.9043419239890593, + "grad_norm": 360.34552001953125, + "learning_rate": 1.3193994459951022e-06, + "loss": 48.7875, + "step": 223840 + }, + { + "epoch": 0.904382325254427, + "grad_norm": 779.7639770507812, + "learning_rate": 1.3184021528906898e-06, + "loss": 65.9055, + "step": 223850 + }, + { + "epoch": 0.9044227265197946, + "grad_norm": 454.5089111328125, + "learning_rate": 1.3174052239936485e-06, + "loss": 50.2147, + "step": 223860 + }, + { + "epoch": 0.9044631277851622, + "grad_norm": 638.8740844726562, + "learning_rate": 1.316408659323405e-06, + "loss": 54.731, + "step": 223870 + }, + { + "epoch": 0.9045035290505299, + "grad_norm": 533.8671264648438, + "learning_rate": 1.315412458899401e-06, + "loss": 58.9396, + "step": 223880 + }, + { + "epoch": 0.9045439303158975, + "grad_norm": 439.32159423828125, + "learning_rate": 1.314416622741046e-06, + "loss": 41.3985, + "step": 223890 + }, + { + "epoch": 0.9045843315812652, + "grad_norm": 478.0245361328125, + "learning_rate": 1.3134211508677597e-06, + "loss": 85.9598, + "step": 223900 + }, + { + "epoch": 0.9046247328466328, + "grad_norm": 553.142333984375, + "learning_rate": 1.312426043298949e-06, + "loss": 104.2955, + "step": 223910 + }, + { + "epoch": 0.9046651341120003, + "grad_norm": 649.4154663085938, + "learning_rate": 1.3114313000540114e-06, + "loss": 50.8248, + "step": 223920 + }, + { + "epoch": 0.904705535377368, + "grad_norm": 929.0820922851562, + "learning_rate": 1.3104369211523471e-06, + "loss": 81.6999, + "step": 223930 + }, + { + "epoch": 0.9047459366427356, + "grad_norm": 984.729736328125, + "learning_rate": 1.3094429066133341e-06, + "loss": 44.2702, + "step": 223940 + }, + { + "epoch": 0.9047863379081033, + "grad_norm": 700.0787963867188, + "learning_rate": 1.3084492564563545e-06, + "loss": 48.7378, + "step": 223950 + }, + { + "epoch": 0.9048267391734709, + "grad_norm": 537.3731079101562, + "learning_rate": 1.3074559707007772e-06, + "loss": 48.6774, + "step": 223960 + }, + { + "epoch": 0.9048671404388385, + "grad_norm": 976.7647705078125, + "learning_rate": 1.3064630493659714e-06, + "loss": 68.0619, + "step": 223970 + }, + { + "epoch": 0.9049075417042062, + "grad_norm": 553.5558471679688, + "learning_rate": 1.3054704924712925e-06, + "loss": 52.8855, + "step": 223980 + }, + { + "epoch": 0.9049479429695738, + "grad_norm": 725.6716918945312, + "learning_rate": 1.3044783000360938e-06, + "loss": 84.244, + "step": 223990 + }, + { + "epoch": 0.9049883442349415, + "grad_norm": 692.5856323242188, + "learning_rate": 1.3034864720797112e-06, + "loss": 67.8682, + "step": 224000 + }, + { + "epoch": 0.9050287455003091, + "grad_norm": 523.4568481445312, + "learning_rate": 1.302495008621487e-06, + "loss": 52.7275, + "step": 224010 + }, + { + "epoch": 0.9050691467656767, + "grad_norm": 629.0975952148438, + "learning_rate": 1.3015039096807524e-06, + "loss": 58.7515, + "step": 224020 + }, + { + "epoch": 0.9051095480310444, + "grad_norm": 589.2857055664062, + "learning_rate": 1.3005131752768185e-06, + "loss": 83.5096, + "step": 224030 + }, + { + "epoch": 0.905149949296412, + "grad_norm": 362.1048583984375, + "learning_rate": 1.2995228054290144e-06, + "loss": 70.3352, + "step": 224040 + }, + { + "epoch": 0.9051903505617795, + "grad_norm": 707.5917358398438, + "learning_rate": 1.2985328001566332e-06, + "loss": 53.1806, + "step": 224050 + }, + { + "epoch": 0.9052307518271472, + "grad_norm": 457.467529296875, + "learning_rate": 1.297543159478991e-06, + "loss": 57.8336, + "step": 224060 + }, + { + "epoch": 0.9052711530925148, + "grad_norm": 348.59228515625, + "learning_rate": 1.2965538834153702e-06, + "loss": 89.2807, + "step": 224070 + }, + { + "epoch": 0.9053115543578825, + "grad_norm": 1064.025634765625, + "learning_rate": 1.2955649719850593e-06, + "loss": 60.1034, + "step": 224080 + }, + { + "epoch": 0.9053519556232501, + "grad_norm": 832.38232421875, + "learning_rate": 1.2945764252073435e-06, + "loss": 45.9545, + "step": 224090 + }, + { + "epoch": 0.9053923568886177, + "grad_norm": 271.0911865234375, + "learning_rate": 1.2935882431014868e-06, + "loss": 55.7165, + "step": 224100 + }, + { + "epoch": 0.9054327581539854, + "grad_norm": 745.3134155273438, + "learning_rate": 1.2926004256867564e-06, + "loss": 61.4699, + "step": 224110 + }, + { + "epoch": 0.905473159419353, + "grad_norm": 534.8504028320312, + "learning_rate": 1.2916129729824145e-06, + "loss": 68.6361, + "step": 224120 + }, + { + "epoch": 0.9055135606847207, + "grad_norm": 686.9100952148438, + "learning_rate": 1.290625885007708e-06, + "loss": 79.222, + "step": 224130 + }, + { + "epoch": 0.9055539619500883, + "grad_norm": 530.6326904296875, + "learning_rate": 1.2896391617818838e-06, + "loss": 47.5688, + "step": 224140 + }, + { + "epoch": 0.9055943632154559, + "grad_norm": 439.57452392578125, + "learning_rate": 1.2886528033241773e-06, + "loss": 53.6146, + "step": 224150 + }, + { + "epoch": 0.9056347644808236, + "grad_norm": 361.5394592285156, + "learning_rate": 1.2876668096538158e-06, + "loss": 72.4455, + "step": 224160 + }, + { + "epoch": 0.9056751657461912, + "grad_norm": 989.2299194335938, + "learning_rate": 1.2866811807900236e-06, + "loss": 53.0704, + "step": 224170 + }, + { + "epoch": 0.9057155670115588, + "grad_norm": 581.9675903320312, + "learning_rate": 1.2856959167520166e-06, + "loss": 47.7663, + "step": 224180 + }, + { + "epoch": 0.9057559682769264, + "grad_norm": 519.4675903320312, + "learning_rate": 1.2847110175590038e-06, + "loss": 46.4361, + "step": 224190 + }, + { + "epoch": 0.905796369542294, + "grad_norm": 621.0328369140625, + "learning_rate": 1.2837264832301854e-06, + "loss": 44.8617, + "step": 224200 + }, + { + "epoch": 0.9058367708076617, + "grad_norm": 578.2052001953125, + "learning_rate": 1.2827423137847505e-06, + "loss": 53.1646, + "step": 224210 + }, + { + "epoch": 0.9058771720730293, + "grad_norm": 703.8250122070312, + "learning_rate": 1.281758509241895e-06, + "loss": 62.1071, + "step": 224220 + }, + { + "epoch": 0.905917573338397, + "grad_norm": 325.7259521484375, + "learning_rate": 1.280775069620792e-06, + "loss": 67.2651, + "step": 224230 + }, + { + "epoch": 0.9059579746037646, + "grad_norm": 481.5425720214844, + "learning_rate": 1.279791994940618e-06, + "loss": 43.2539, + "step": 224240 + }, + { + "epoch": 0.9059983758691322, + "grad_norm": 1673.975830078125, + "learning_rate": 1.2788092852205346e-06, + "loss": 61.3203, + "step": 224250 + }, + { + "epoch": 0.9060387771344999, + "grad_norm": 584.09814453125, + "learning_rate": 1.2778269404797029e-06, + "loss": 83.5031, + "step": 224260 + }, + { + "epoch": 0.9060791783998675, + "grad_norm": 1082.5809326171875, + "learning_rate": 1.2768449607372778e-06, + "loss": 67.8426, + "step": 224270 + }, + { + "epoch": 0.9061195796652352, + "grad_norm": 1095.920166015625, + "learning_rate": 1.2758633460123959e-06, + "loss": 74.7229, + "step": 224280 + }, + { + "epoch": 0.9061599809306028, + "grad_norm": 747.3521728515625, + "learning_rate": 1.2748820963241971e-06, + "loss": 68.1058, + "step": 224290 + }, + { + "epoch": 0.9062003821959704, + "grad_norm": 353.958984375, + "learning_rate": 1.2739012116918125e-06, + "loss": 43.3153, + "step": 224300 + }, + { + "epoch": 0.906240783461338, + "grad_norm": 596.5028686523438, + "learning_rate": 1.2729206921343674e-06, + "loss": 91.1634, + "step": 224310 + }, + { + "epoch": 0.9062811847267056, + "grad_norm": 754.8894653320312, + "learning_rate": 1.2719405376709704e-06, + "loss": 72.3594, + "step": 224320 + }, + { + "epoch": 0.9063215859920732, + "grad_norm": 563.6803588867188, + "learning_rate": 1.2709607483207397e-06, + "loss": 44.9163, + "step": 224330 + }, + { + "epoch": 0.9063619872574409, + "grad_norm": 621.7728271484375, + "learning_rate": 1.269981324102767e-06, + "loss": 36.04, + "step": 224340 + }, + { + "epoch": 0.9064023885228085, + "grad_norm": 387.3051452636719, + "learning_rate": 1.2690022650361544e-06, + "loss": 42.1906, + "step": 224350 + }, + { + "epoch": 0.9064427897881762, + "grad_norm": 735.0849609375, + "learning_rate": 1.2680235711399868e-06, + "loss": 48.3956, + "step": 224360 + }, + { + "epoch": 0.9064831910535438, + "grad_norm": 501.9884033203125, + "learning_rate": 1.267045242433338e-06, + "loss": 56.8739, + "step": 224370 + }, + { + "epoch": 0.9065235923189114, + "grad_norm": 1661.3331298828125, + "learning_rate": 1.2660672789352924e-06, + "loss": 82.5641, + "step": 224380 + }, + { + "epoch": 0.9065639935842791, + "grad_norm": 324.2792663574219, + "learning_rate": 1.265089680664906e-06, + "loss": 51.4075, + "step": 224390 + }, + { + "epoch": 0.9066043948496467, + "grad_norm": 1548.6533203125, + "learning_rate": 1.2641124476412436e-06, + "loss": 83.7154, + "step": 224400 + }, + { + "epoch": 0.9066447961150144, + "grad_norm": 617.8522338867188, + "learning_rate": 1.2631355798833544e-06, + "loss": 54.8215, + "step": 224410 + }, + { + "epoch": 0.906685197380382, + "grad_norm": 884.1571044921875, + "learning_rate": 1.2621590774102831e-06, + "loss": 80.8181, + "step": 224420 + }, + { + "epoch": 0.9067255986457495, + "grad_norm": 876.1567993164062, + "learning_rate": 1.2611829402410703e-06, + "loss": 82.9546, + "step": 224430 + }, + { + "epoch": 0.9067659999111172, + "grad_norm": 396.60888671875, + "learning_rate": 1.2602071683947404e-06, + "loss": 60.8188, + "step": 224440 + }, + { + "epoch": 0.9068064011764848, + "grad_norm": 594.5731201171875, + "learning_rate": 1.2592317618903204e-06, + "loss": 49.6413, + "step": 224450 + }, + { + "epoch": 0.9068468024418525, + "grad_norm": 650.7897338867188, + "learning_rate": 1.2582567207468243e-06, + "loss": 55.6207, + "step": 224460 + }, + { + "epoch": 0.9068872037072201, + "grad_norm": 585.8992919921875, + "learning_rate": 1.257282044983268e-06, + "loss": 74.9455, + "step": 224470 + }, + { + "epoch": 0.9069276049725877, + "grad_norm": 486.8210144042969, + "learning_rate": 1.2563077346186382e-06, + "loss": 74.186, + "step": 224480 + }, + { + "epoch": 0.9069680062379554, + "grad_norm": 211.26858520507812, + "learning_rate": 1.2553337896719486e-06, + "loss": 79.8826, + "step": 224490 + }, + { + "epoch": 0.907008407503323, + "grad_norm": 610.60498046875, + "learning_rate": 1.2543602101621711e-06, + "loss": 58.0202, + "step": 224500 + }, + { + "epoch": 0.9070488087686907, + "grad_norm": 667.841064453125, + "learning_rate": 1.2533869961082944e-06, + "loss": 49.4052, + "step": 224510 + }, + { + "epoch": 0.9070892100340583, + "grad_norm": 895.470703125, + "learning_rate": 1.2524141475292883e-06, + "loss": 61.8216, + "step": 224520 + }, + { + "epoch": 0.9071296112994259, + "grad_norm": 246.18310546875, + "learning_rate": 1.2514416644441196e-06, + "loss": 61.7963, + "step": 224530 + }, + { + "epoch": 0.9071700125647936, + "grad_norm": 1231.4830322265625, + "learning_rate": 1.2504695468717532e-06, + "loss": 76.5339, + "step": 224540 + }, + { + "epoch": 0.9072104138301612, + "grad_norm": 740.5501098632812, + "learning_rate": 1.2494977948311315e-06, + "loss": 76.366, + "step": 224550 + }, + { + "epoch": 0.9072508150955287, + "grad_norm": 432.94903564453125, + "learning_rate": 1.248526408341204e-06, + "loss": 61.5258, + "step": 224560 + }, + { + "epoch": 0.9072912163608964, + "grad_norm": 516.9461669921875, + "learning_rate": 1.2475553874209067e-06, + "loss": 42.9339, + "step": 224570 + }, + { + "epoch": 0.907331617626264, + "grad_norm": 586.3526000976562, + "learning_rate": 1.2465847320891732e-06, + "loss": 53.2496, + "step": 224580 + }, + { + "epoch": 0.9073720188916317, + "grad_norm": 366.79925537109375, + "learning_rate": 1.245614442364924e-06, + "loss": 101.6134, + "step": 224590 + }, + { + "epoch": 0.9074124201569993, + "grad_norm": 793.0040283203125, + "learning_rate": 1.2446445182670818e-06, + "loss": 52.734, + "step": 224600 + }, + { + "epoch": 0.9074528214223669, + "grad_norm": 763.4259643554688, + "learning_rate": 1.243674959814547e-06, + "loss": 51.1072, + "step": 224610 + }, + { + "epoch": 0.9074932226877346, + "grad_norm": 571.683349609375, + "learning_rate": 1.2427057670262244e-06, + "loss": 54.705, + "step": 224620 + }, + { + "epoch": 0.9075336239531022, + "grad_norm": 612.1589965820312, + "learning_rate": 1.2417369399210121e-06, + "loss": 79.1051, + "step": 224630 + }, + { + "epoch": 0.9075740252184699, + "grad_norm": 1292.39208984375, + "learning_rate": 1.2407684785177909e-06, + "loss": 77.8271, + "step": 224640 + }, + { + "epoch": 0.9076144264838375, + "grad_norm": 497.6644592285156, + "learning_rate": 1.2398003828354522e-06, + "loss": 89.4932, + "step": 224650 + }, + { + "epoch": 0.9076548277492051, + "grad_norm": 414.5049743652344, + "learning_rate": 1.2388326528928563e-06, + "loss": 52.5838, + "step": 224660 + }, + { + "epoch": 0.9076952290145728, + "grad_norm": 767.6492309570312, + "learning_rate": 1.2378652887088837e-06, + "loss": 46.2832, + "step": 224670 + }, + { + "epoch": 0.9077356302799404, + "grad_norm": 977.2911376953125, + "learning_rate": 1.236898290302384e-06, + "loss": 73.2122, + "step": 224680 + }, + { + "epoch": 0.907776031545308, + "grad_norm": 388.73944091796875, + "learning_rate": 1.2359316576922108e-06, + "loss": 59.5898, + "step": 224690 + }, + { + "epoch": 0.9078164328106756, + "grad_norm": 1391.84521484375, + "learning_rate": 1.2349653908972138e-06, + "loss": 50.0566, + "step": 224700 + }, + { + "epoch": 0.9078568340760432, + "grad_norm": 684.59619140625, + "learning_rate": 1.233999489936224e-06, + "loss": 62.5855, + "step": 224710 + }, + { + "epoch": 0.9078972353414109, + "grad_norm": 648.3529663085938, + "learning_rate": 1.2330339548280757e-06, + "loss": 108.467, + "step": 224720 + }, + { + "epoch": 0.9079376366067785, + "grad_norm": 560.5143432617188, + "learning_rate": 1.2320687855915936e-06, + "loss": 65.9199, + "step": 224730 + }, + { + "epoch": 0.9079780378721461, + "grad_norm": 805.701416015625, + "learning_rate": 1.2311039822455917e-06, + "loss": 49.0017, + "step": 224740 + }, + { + "epoch": 0.9080184391375138, + "grad_norm": 1801.6422119140625, + "learning_rate": 1.2301395448088815e-06, + "loss": 101.2508, + "step": 224750 + }, + { + "epoch": 0.9080588404028814, + "grad_norm": 894.8280029296875, + "learning_rate": 1.2291754733002659e-06, + "loss": 78.163, + "step": 224760 + }, + { + "epoch": 0.9080992416682491, + "grad_norm": 732.5630493164062, + "learning_rate": 1.2282117677385362e-06, + "loss": 63.6804, + "step": 224770 + }, + { + "epoch": 0.9081396429336167, + "grad_norm": 330.9618835449219, + "learning_rate": 1.2272484281424824e-06, + "loss": 71.5051, + "step": 224780 + }, + { + "epoch": 0.9081800441989843, + "grad_norm": 1083.729248046875, + "learning_rate": 1.2262854545308844e-06, + "loss": 53.4422, + "step": 224790 + }, + { + "epoch": 0.908220445464352, + "grad_norm": 747.0211791992188, + "learning_rate": 1.2253228469225186e-06, + "loss": 48.3737, + "step": 224800 + }, + { + "epoch": 0.9082608467297196, + "grad_norm": 720.2556762695312, + "learning_rate": 1.2243606053361522e-06, + "loss": 65.7403, + "step": 224810 + }, + { + "epoch": 0.9083012479950872, + "grad_norm": 728.1594848632812, + "learning_rate": 1.2233987297905347e-06, + "loss": 47.4217, + "step": 224820 + }, + { + "epoch": 0.9083416492604548, + "grad_norm": 474.6098937988281, + "learning_rate": 1.2224372203044332e-06, + "loss": 63.1549, + "step": 224830 + }, + { + "epoch": 0.9083820505258224, + "grad_norm": 772.771728515625, + "learning_rate": 1.2214760768965817e-06, + "loss": 55.2894, + "step": 224840 + }, + { + "epoch": 0.9084224517911901, + "grad_norm": 854.3800659179688, + "learning_rate": 1.2205152995857228e-06, + "loss": 85.6646, + "step": 224850 + }, + { + "epoch": 0.9084628530565577, + "grad_norm": 923.1888427734375, + "learning_rate": 1.2195548883905861e-06, + "loss": 65.3896, + "step": 224860 + }, + { + "epoch": 0.9085032543219254, + "grad_norm": 366.7779235839844, + "learning_rate": 1.2185948433298945e-06, + "loss": 59.5288, + "step": 224870 + }, + { + "epoch": 0.908543655587293, + "grad_norm": 1235.2933349609375, + "learning_rate": 1.2176351644223705e-06, + "loss": 81.6377, + "step": 224880 + }, + { + "epoch": 0.9085840568526606, + "grad_norm": 1062.806884765625, + "learning_rate": 1.216675851686715e-06, + "loss": 80.2774, + "step": 224890 + }, + { + "epoch": 0.9086244581180283, + "grad_norm": 556.7374267578125, + "learning_rate": 1.215716905141635e-06, + "loss": 60.6658, + "step": 224900 + }, + { + "epoch": 0.9086648593833959, + "grad_norm": 670.254150390625, + "learning_rate": 1.2147583248058225e-06, + "loss": 64.8875, + "step": 224910 + }, + { + "epoch": 0.9087052606487636, + "grad_norm": 714.25634765625, + "learning_rate": 1.2138001106979735e-06, + "loss": 53.0773, + "step": 224920 + }, + { + "epoch": 0.9087456619141312, + "grad_norm": 261.23419189453125, + "learning_rate": 1.2128422628367553e-06, + "loss": 65.4036, + "step": 224930 + }, + { + "epoch": 0.9087860631794987, + "grad_norm": 593.7000122070312, + "learning_rate": 1.2118847812408552e-06, + "loss": 84.487, + "step": 224940 + }, + { + "epoch": 0.9088264644448664, + "grad_norm": 660.6608276367188, + "learning_rate": 1.2109276659289315e-06, + "loss": 44.2214, + "step": 224950 + }, + { + "epoch": 0.908866865710234, + "grad_norm": 396.62786865234375, + "learning_rate": 1.2099709169196472e-06, + "loss": 93.2467, + "step": 224960 + }, + { + "epoch": 0.9089072669756016, + "grad_norm": 646.77197265625, + "learning_rate": 1.209014534231656e-06, + "loss": 52.1761, + "step": 224970 + }, + { + "epoch": 0.9089476682409693, + "grad_norm": 1012.2188110351562, + "learning_rate": 1.2080585178835946e-06, + "loss": 83.6811, + "step": 224980 + }, + { + "epoch": 0.9089880695063369, + "grad_norm": 1177.95458984375, + "learning_rate": 1.207102867894112e-06, + "loss": 91.5255, + "step": 224990 + }, + { + "epoch": 0.9090284707717046, + "grad_norm": 920.0118408203125, + "learning_rate": 1.2061475842818337e-06, + "loss": 54.3996, + "step": 225000 + }, + { + "epoch": 0.9090688720370722, + "grad_norm": 881.8795776367188, + "learning_rate": 1.2051926670653824e-06, + "loss": 57.5606, + "step": 225010 + }, + { + "epoch": 0.9091092733024398, + "grad_norm": 1518.319580078125, + "learning_rate": 1.2042381162633765e-06, + "loss": 91.8327, + "step": 225020 + }, + { + "epoch": 0.9091496745678075, + "grad_norm": 604.890869140625, + "learning_rate": 1.2032839318944255e-06, + "loss": 53.174, + "step": 225030 + }, + { + "epoch": 0.9091900758331751, + "grad_norm": 720.8255615234375, + "learning_rate": 1.2023301139771349e-06, + "loss": 78.9611, + "step": 225040 + }, + { + "epoch": 0.9092304770985428, + "grad_norm": 566.9550170898438, + "learning_rate": 1.2013766625300915e-06, + "loss": 69.5951, + "step": 225050 + }, + { + "epoch": 0.9092708783639104, + "grad_norm": 495.0597229003906, + "learning_rate": 1.2004235775718898e-06, + "loss": 69.7713, + "step": 225060 + }, + { + "epoch": 0.9093112796292779, + "grad_norm": 722.394775390625, + "learning_rate": 1.199470859121108e-06, + "loss": 58.5742, + "step": 225070 + }, + { + "epoch": 0.9093516808946456, + "grad_norm": 401.758544921875, + "learning_rate": 1.1985185071963267e-06, + "loss": 52.9157, + "step": 225080 + }, + { + "epoch": 0.9093920821600132, + "grad_norm": 675.2825317382812, + "learning_rate": 1.1975665218160982e-06, + "loss": 52.7187, + "step": 225090 + }, + { + "epoch": 0.9094324834253809, + "grad_norm": 707.7344970703125, + "learning_rate": 1.196614902998996e-06, + "loss": 64.5017, + "step": 225100 + }, + { + "epoch": 0.9094728846907485, + "grad_norm": 479.8082275390625, + "learning_rate": 1.1956636507635656e-06, + "loss": 51.017, + "step": 225110 + }, + { + "epoch": 0.9095132859561161, + "grad_norm": 521.3306274414062, + "learning_rate": 1.194712765128354e-06, + "loss": 40.8532, + "step": 225120 + }, + { + "epoch": 0.9095536872214838, + "grad_norm": 191.6006622314453, + "learning_rate": 1.1937622461119002e-06, + "loss": 73.4767, + "step": 225130 + }, + { + "epoch": 0.9095940884868514, + "grad_norm": 634.1344604492188, + "learning_rate": 1.192812093732727e-06, + "loss": 53.6514, + "step": 225140 + }, + { + "epoch": 0.909634489752219, + "grad_norm": 735.2237548828125, + "learning_rate": 1.1918623080093727e-06, + "loss": 66.2303, + "step": 225150 + }, + { + "epoch": 0.9096748910175867, + "grad_norm": 619.7503662109375, + "learning_rate": 1.1909128889603406e-06, + "loss": 51.6375, + "step": 225160 + }, + { + "epoch": 0.9097152922829543, + "grad_norm": 759.5311889648438, + "learning_rate": 1.1899638366041466e-06, + "loss": 108.0682, + "step": 225170 + }, + { + "epoch": 0.909755693548322, + "grad_norm": 869.3694458007812, + "learning_rate": 1.1890151509592918e-06, + "loss": 81.5644, + "step": 225180 + }, + { + "epoch": 0.9097960948136896, + "grad_norm": 294.3611755371094, + "learning_rate": 1.18806683204427e-06, + "loss": 62.0306, + "step": 225190 + }, + { + "epoch": 0.9098364960790571, + "grad_norm": 534.8812866210938, + "learning_rate": 1.1871188798775713e-06, + "loss": 67.3516, + "step": 225200 + }, + { + "epoch": 0.9098768973444248, + "grad_norm": 880.0435791015625, + "learning_rate": 1.1861712944776781e-06, + "loss": 65.1721, + "step": 225210 + }, + { + "epoch": 0.9099172986097924, + "grad_norm": 697.0563354492188, + "learning_rate": 1.1852240758630585e-06, + "loss": 67.9559, + "step": 225220 + }, + { + "epoch": 0.9099576998751601, + "grad_norm": 314.5659484863281, + "learning_rate": 1.1842772240521838e-06, + "loss": 58.6697, + "step": 225230 + }, + { + "epoch": 0.9099981011405277, + "grad_norm": 965.6824951171875, + "learning_rate": 1.1833307390635084e-06, + "loss": 83.3973, + "step": 225240 + }, + { + "epoch": 0.9100385024058953, + "grad_norm": 459.7911682128906, + "learning_rate": 1.182384620915491e-06, + "loss": 54.2702, + "step": 225250 + }, + { + "epoch": 0.910078903671263, + "grad_norm": 536.1661987304688, + "learning_rate": 1.1814388696265743e-06, + "loss": 48.4023, + "step": 225260 + }, + { + "epoch": 0.9101193049366306, + "grad_norm": 397.65692138671875, + "learning_rate": 1.1804934852151884e-06, + "loss": 46.67, + "step": 225270 + }, + { + "epoch": 0.9101597062019983, + "grad_norm": 1074.755859375, + "learning_rate": 1.1795484676997803e-06, + "loss": 68.5005, + "step": 225280 + }, + { + "epoch": 0.9102001074673659, + "grad_norm": 716.1530151367188, + "learning_rate": 1.178603817098758e-06, + "loss": 54.7074, + "step": 225290 + }, + { + "epoch": 0.9102405087327335, + "grad_norm": 641.9833984375, + "learning_rate": 1.1776595334305462e-06, + "loss": 105.9259, + "step": 225300 + }, + { + "epoch": 0.9102809099981012, + "grad_norm": 675.2156372070312, + "learning_rate": 1.1767156167135552e-06, + "loss": 50.9412, + "step": 225310 + }, + { + "epoch": 0.9103213112634688, + "grad_norm": 626.1691284179688, + "learning_rate": 1.1757720669661765e-06, + "loss": 54.7313, + "step": 225320 + }, + { + "epoch": 0.9103617125288364, + "grad_norm": 862.8429565429688, + "learning_rate": 1.1748288842068202e-06, + "loss": 59.5924, + "step": 225330 + }, + { + "epoch": 0.910402113794204, + "grad_norm": 582.5835571289062, + "learning_rate": 1.1738860684538622e-06, + "loss": 61.8396, + "step": 225340 + }, + { + "epoch": 0.9104425150595716, + "grad_norm": 694.2994384765625, + "learning_rate": 1.1729436197256882e-06, + "loss": 82.6766, + "step": 225350 + }, + { + "epoch": 0.9104829163249393, + "grad_norm": 813.416015625, + "learning_rate": 1.1720015380406724e-06, + "loss": 47.9021, + "step": 225360 + }, + { + "epoch": 0.9105233175903069, + "grad_norm": 244.39883422851562, + "learning_rate": 1.171059823417182e-06, + "loss": 38.3181, + "step": 225370 + }, + { + "epoch": 0.9105637188556746, + "grad_norm": 510.8074645996094, + "learning_rate": 1.1701184758735695e-06, + "loss": 64.9926, + "step": 225380 + }, + { + "epoch": 0.9106041201210422, + "grad_norm": 491.42327880859375, + "learning_rate": 1.1691774954281931e-06, + "loss": 80.8822, + "step": 225390 + }, + { + "epoch": 0.9106445213864098, + "grad_norm": 288.5765686035156, + "learning_rate": 1.1682368820993983e-06, + "loss": 76.8746, + "step": 225400 + }, + { + "epoch": 0.9106849226517775, + "grad_norm": 714.8207397460938, + "learning_rate": 1.1672966359055172e-06, + "loss": 70.5493, + "step": 225410 + }, + { + "epoch": 0.9107253239171451, + "grad_norm": 736.543212890625, + "learning_rate": 1.1663567568648882e-06, + "loss": 88.2236, + "step": 225420 + }, + { + "epoch": 0.9107657251825128, + "grad_norm": 553.852294921875, + "learning_rate": 1.1654172449958257e-06, + "loss": 86.497, + "step": 225430 + }, + { + "epoch": 0.9108061264478804, + "grad_norm": 941.0424194335938, + "learning_rate": 1.164478100316655e-06, + "loss": 63.4179, + "step": 225440 + }, + { + "epoch": 0.910846527713248, + "grad_norm": 374.4584045410156, + "learning_rate": 1.1635393228456793e-06, + "loss": 71.8724, + "step": 225450 + }, + { + "epoch": 0.9108869289786156, + "grad_norm": 765.2040405273438, + "learning_rate": 1.1626009126012018e-06, + "loss": 47.2444, + "step": 225460 + }, + { + "epoch": 0.9109273302439832, + "grad_norm": 395.72491455078125, + "learning_rate": 1.1616628696015187e-06, + "loss": 58.4815, + "step": 225470 + }, + { + "epoch": 0.9109677315093508, + "grad_norm": 433.0412902832031, + "learning_rate": 1.1607251938649134e-06, + "loss": 54.9752, + "step": 225480 + }, + { + "epoch": 0.9110081327747185, + "grad_norm": 250.4219512939453, + "learning_rate": 1.1597878854096733e-06, + "loss": 51.944, + "step": 225490 + }, + { + "epoch": 0.9110485340400861, + "grad_norm": 553.6806640625, + "learning_rate": 1.1588509442540662e-06, + "loss": 71.2005, + "step": 225500 + }, + { + "epoch": 0.9110889353054538, + "grad_norm": 397.5582580566406, + "learning_rate": 1.1579143704163599e-06, + "loss": 94.9974, + "step": 225510 + }, + { + "epoch": 0.9111293365708214, + "grad_norm": 685.5157470703125, + "learning_rate": 1.1569781639148148e-06, + "loss": 93.8283, + "step": 225520 + }, + { + "epoch": 0.911169737836189, + "grad_norm": 620.15771484375, + "learning_rate": 1.156042324767681e-06, + "loss": 57.7604, + "step": 225530 + }, + { + "epoch": 0.9112101391015567, + "grad_norm": 540.0260620117188, + "learning_rate": 1.1551068529931997e-06, + "loss": 42.2198, + "step": 225540 + }, + { + "epoch": 0.9112505403669243, + "grad_norm": 146.38214111328125, + "learning_rate": 1.1541717486096182e-06, + "loss": 67.5932, + "step": 225550 + }, + { + "epoch": 0.911290941632292, + "grad_norm": 307.3544006347656, + "learning_rate": 1.1532370116351599e-06, + "loss": 64.9948, + "step": 225560 + }, + { + "epoch": 0.9113313428976596, + "grad_norm": 671.755126953125, + "learning_rate": 1.1523026420880457e-06, + "loss": 62.452, + "step": 225570 + }, + { + "epoch": 0.9113717441630271, + "grad_norm": 524.630859375, + "learning_rate": 1.151368639986501e-06, + "loss": 80.6027, + "step": 225580 + }, + { + "epoch": 0.9114121454283948, + "grad_norm": 1141.1142578125, + "learning_rate": 1.1504350053487201e-06, + "loss": 116.3356, + "step": 225590 + }, + { + "epoch": 0.9114525466937624, + "grad_norm": 857.3485717773438, + "learning_rate": 1.1495017381929197e-06, + "loss": 60.5438, + "step": 225600 + }, + { + "epoch": 0.91149294795913, + "grad_norm": 739.5303955078125, + "learning_rate": 1.148568838537285e-06, + "loss": 42.093, + "step": 225610 + }, + { + "epoch": 0.9115333492244977, + "grad_norm": 411.86419677734375, + "learning_rate": 1.1476363064000062e-06, + "loss": 73.1998, + "step": 225620 + }, + { + "epoch": 0.9115737504898653, + "grad_norm": 794.5975952148438, + "learning_rate": 1.1467041417992642e-06, + "loss": 46.1199, + "step": 225630 + }, + { + "epoch": 0.911614151755233, + "grad_norm": 864.4928588867188, + "learning_rate": 1.145772344753231e-06, + "loss": 115.9764, + "step": 225640 + }, + { + "epoch": 0.9116545530206006, + "grad_norm": 514.2152709960938, + "learning_rate": 1.1448409152800743e-06, + "loss": 36.02, + "step": 225650 + }, + { + "epoch": 0.9116949542859683, + "grad_norm": 764.4119262695312, + "learning_rate": 1.143909853397951e-06, + "loss": 40.4367, + "step": 225660 + }, + { + "epoch": 0.9117353555513359, + "grad_norm": 638.3949584960938, + "learning_rate": 1.1429791591250106e-06, + "loss": 78.9779, + "step": 225670 + }, + { + "epoch": 0.9117757568167035, + "grad_norm": 728.14892578125, + "learning_rate": 1.142048832479401e-06, + "loss": 54.7629, + "step": 225680 + }, + { + "epoch": 0.9118161580820712, + "grad_norm": 491.0167236328125, + "learning_rate": 1.1411188734792588e-06, + "loss": 74.4667, + "step": 225690 + }, + { + "epoch": 0.9118565593474388, + "grad_norm": 688.6765747070312, + "learning_rate": 1.1401892821427096e-06, + "loss": 71.6387, + "step": 225700 + }, + { + "epoch": 0.9118969606128063, + "grad_norm": 470.6224365234375, + "learning_rate": 1.1392600584878855e-06, + "loss": 87.9709, + "step": 225710 + }, + { + "epoch": 0.911937361878174, + "grad_norm": 734.5479736328125, + "learning_rate": 1.1383312025328941e-06, + "loss": 65.0279, + "step": 225720 + }, + { + "epoch": 0.9119777631435416, + "grad_norm": 767.07568359375, + "learning_rate": 1.1374027142958455e-06, + "loss": 69.7878, + "step": 225730 + }, + { + "epoch": 0.9120181644089093, + "grad_norm": 366.0469665527344, + "learning_rate": 1.1364745937948473e-06, + "loss": 59.8288, + "step": 225740 + }, + { + "epoch": 0.9120585656742769, + "grad_norm": 793.14794921875, + "learning_rate": 1.1355468410479808e-06, + "loss": 78.6838, + "step": 225750 + }, + { + "epoch": 0.9120989669396445, + "grad_norm": 639.5416870117188, + "learning_rate": 1.134619456073347e-06, + "loss": 57.0906, + "step": 225760 + }, + { + "epoch": 0.9121393682050122, + "grad_norm": 1037.8489990234375, + "learning_rate": 1.1336924388890135e-06, + "loss": 87.7672, + "step": 225770 + }, + { + "epoch": 0.9121797694703798, + "grad_norm": 706.2542724609375, + "learning_rate": 1.132765789513064e-06, + "loss": 65.7023, + "step": 225780 + }, + { + "epoch": 0.9122201707357475, + "grad_norm": 324.8990783691406, + "learning_rate": 1.131839507963557e-06, + "loss": 81.8321, + "step": 225790 + }, + { + "epoch": 0.9122605720011151, + "grad_norm": 171.4705810546875, + "learning_rate": 1.1309135942585513e-06, + "loss": 52.6776, + "step": 225800 + }, + { + "epoch": 0.9123009732664827, + "grad_norm": 548.4721069335938, + "learning_rate": 1.1299880484161041e-06, + "loss": 68.2269, + "step": 225810 + }, + { + "epoch": 0.9123413745318504, + "grad_norm": 859.7296142578125, + "learning_rate": 1.1290628704542516e-06, + "loss": 72.8381, + "step": 225820 + }, + { + "epoch": 0.912381775797218, + "grad_norm": 768.3001708984375, + "learning_rate": 1.128138060391033e-06, + "loss": 108.9974, + "step": 225830 + }, + { + "epoch": 0.9124221770625855, + "grad_norm": 1000.8006591796875, + "learning_rate": 1.1272136182444804e-06, + "loss": 68.675, + "step": 225840 + }, + { + "epoch": 0.9124625783279532, + "grad_norm": 863.776123046875, + "learning_rate": 1.1262895440326149e-06, + "loss": 60.1931, + "step": 225850 + }, + { + "epoch": 0.9125029795933208, + "grad_norm": 426.031494140625, + "learning_rate": 1.125365837773449e-06, + "loss": 59.7661, + "step": 225860 + }, + { + "epoch": 0.9125433808586885, + "grad_norm": 1227.77001953125, + "learning_rate": 1.1244424994849989e-06, + "loss": 101.649, + "step": 225870 + }, + { + "epoch": 0.9125837821240561, + "grad_norm": 614.5582275390625, + "learning_rate": 1.123519529185253e-06, + "loss": 68.6725, + "step": 225880 + }, + { + "epoch": 0.9126241833894237, + "grad_norm": 1619.830810546875, + "learning_rate": 1.1225969268922187e-06, + "loss": 85.5207, + "step": 225890 + }, + { + "epoch": 0.9126645846547914, + "grad_norm": 570.661865234375, + "learning_rate": 1.1216746926238708e-06, + "loss": 48.158, + "step": 225900 + }, + { + "epoch": 0.912704985920159, + "grad_norm": 560.7559204101562, + "learning_rate": 1.120752826398197e-06, + "loss": 57.5115, + "step": 225910 + }, + { + "epoch": 0.9127453871855267, + "grad_norm": 168.8243865966797, + "learning_rate": 1.1198313282331674e-06, + "loss": 51.4902, + "step": 225920 + }, + { + "epoch": 0.9127857884508943, + "grad_norm": 975.3294677734375, + "learning_rate": 1.118910198146741e-06, + "loss": 62.0105, + "step": 225930 + }, + { + "epoch": 0.912826189716262, + "grad_norm": 489.2710266113281, + "learning_rate": 1.1179894361568878e-06, + "loss": 46.8012, + "step": 225940 + }, + { + "epoch": 0.9128665909816296, + "grad_norm": 1643.773681640625, + "learning_rate": 1.1170690422815468e-06, + "loss": 68.8638, + "step": 225950 + }, + { + "epoch": 0.9129069922469972, + "grad_norm": 909.5855102539062, + "learning_rate": 1.116149016538668e-06, + "loss": 88.6427, + "step": 225960 + }, + { + "epoch": 0.9129473935123648, + "grad_norm": 992.6939086914062, + "learning_rate": 1.115229358946186e-06, + "loss": 69.001, + "step": 225970 + }, + { + "epoch": 0.9129877947777324, + "grad_norm": 542.9225463867188, + "learning_rate": 1.1143100695220333e-06, + "loss": 77.7028, + "step": 225980 + }, + { + "epoch": 0.9130281960431, + "grad_norm": 584.8157348632812, + "learning_rate": 1.1133911482841242e-06, + "loss": 52.061, + "step": 225990 + }, + { + "epoch": 0.9130685973084677, + "grad_norm": 886.44189453125, + "learning_rate": 1.1124725952503801e-06, + "loss": 122.7811, + "step": 226000 + }, + { + "epoch": 0.9131089985738353, + "grad_norm": 817.204833984375, + "learning_rate": 1.1115544104387066e-06, + "loss": 63.5779, + "step": 226010 + }, + { + "epoch": 0.913149399839203, + "grad_norm": 668.1753540039062, + "learning_rate": 1.1106365938670027e-06, + "loss": 78.3897, + "step": 226020 + }, + { + "epoch": 0.9131898011045706, + "grad_norm": 1107.604736328125, + "learning_rate": 1.1097191455531676e-06, + "loss": 72.1366, + "step": 226030 + }, + { + "epoch": 0.9132302023699382, + "grad_norm": 364.7510986328125, + "learning_rate": 1.1088020655150755e-06, + "loss": 32.7604, + "step": 226040 + }, + { + "epoch": 0.9132706036353059, + "grad_norm": 637.323486328125, + "learning_rate": 1.1078853537706214e-06, + "loss": 55.6713, + "step": 226050 + }, + { + "epoch": 0.9133110049006735, + "grad_norm": 668.5408325195312, + "learning_rate": 1.106969010337664e-06, + "loss": 70.812, + "step": 226060 + }, + { + "epoch": 0.9133514061660412, + "grad_norm": 428.319091796875, + "learning_rate": 1.1060530352340736e-06, + "loss": 52.9346, + "step": 226070 + }, + { + "epoch": 0.9133918074314088, + "grad_norm": 791.3923950195312, + "learning_rate": 1.1051374284777095e-06, + "loss": 59.2228, + "step": 226080 + }, + { + "epoch": 0.9134322086967764, + "grad_norm": 540.7993774414062, + "learning_rate": 1.1042221900864103e-06, + "loss": 66.5678, + "step": 226090 + }, + { + "epoch": 0.913472609962144, + "grad_norm": 667.8656005859375, + "learning_rate": 1.1033073200780375e-06, + "loss": 41.0591, + "step": 226100 + }, + { + "epoch": 0.9135130112275116, + "grad_norm": 650.4783935546875, + "learning_rate": 1.1023928184704125e-06, + "loss": 51.8837, + "step": 226110 + }, + { + "epoch": 0.9135534124928792, + "grad_norm": 502.3866271972656, + "learning_rate": 1.1014786852813674e-06, + "loss": 43.4091, + "step": 226120 + }, + { + "epoch": 0.9135938137582469, + "grad_norm": 664.5758056640625, + "learning_rate": 1.1005649205287261e-06, + "loss": 82.5558, + "step": 226130 + }, + { + "epoch": 0.9136342150236145, + "grad_norm": 346.47314453125, + "learning_rate": 1.0996515242303052e-06, + "loss": 47.5815, + "step": 226140 + }, + { + "epoch": 0.9136746162889822, + "grad_norm": 520.1072387695312, + "learning_rate": 1.098738496403906e-06, + "loss": 58.6761, + "step": 226150 + }, + { + "epoch": 0.9137150175543498, + "grad_norm": 907.5020751953125, + "learning_rate": 1.0978258370673301e-06, + "loss": 65.3907, + "step": 226160 + }, + { + "epoch": 0.9137554188197174, + "grad_norm": 665.6424560546875, + "learning_rate": 1.0969135462383695e-06, + "loss": 55.7067, + "step": 226170 + }, + { + "epoch": 0.9137958200850851, + "grad_norm": 921.2628173828125, + "learning_rate": 1.0960016239348125e-06, + "loss": 59.8697, + "step": 226180 + }, + { + "epoch": 0.9138362213504527, + "grad_norm": 460.1278381347656, + "learning_rate": 1.0950900701744404e-06, + "loss": 62.4297, + "step": 226190 + }, + { + "epoch": 0.9138766226158204, + "grad_norm": 453.7831115722656, + "learning_rate": 1.0941788849750123e-06, + "loss": 66.4981, + "step": 226200 + }, + { + "epoch": 0.913917023881188, + "grad_norm": 1037.22705078125, + "learning_rate": 1.0932680683543073e-06, + "loss": 79.971, + "step": 226210 + }, + { + "epoch": 0.9139574251465555, + "grad_norm": 891.52978515625, + "learning_rate": 1.0923576203300713e-06, + "loss": 60.2378, + "step": 226220 + }, + { + "epoch": 0.9139978264119232, + "grad_norm": 573.4867553710938, + "learning_rate": 1.0914475409200587e-06, + "loss": 43.116, + "step": 226230 + }, + { + "epoch": 0.9140382276772908, + "grad_norm": 590.679931640625, + "learning_rate": 1.090537830142011e-06, + "loss": 32.468, + "step": 226240 + }, + { + "epoch": 0.9140786289426585, + "grad_norm": 754.6320190429688, + "learning_rate": 1.0896284880136632e-06, + "loss": 92.7525, + "step": 226250 + }, + { + "epoch": 0.9141190302080261, + "grad_norm": 661.9486083984375, + "learning_rate": 1.0887195145527473e-06, + "loss": 95.5463, + "step": 226260 + }, + { + "epoch": 0.9141594314733937, + "grad_norm": 187.8131103515625, + "learning_rate": 1.0878109097769762e-06, + "loss": 62.4884, + "step": 226270 + }, + { + "epoch": 0.9141998327387614, + "grad_norm": 667.7847900390625, + "learning_rate": 1.0869026737040688e-06, + "loss": 54.5359, + "step": 226280 + }, + { + "epoch": 0.914240234004129, + "grad_norm": 823.4574584960938, + "learning_rate": 1.0859948063517311e-06, + "loss": 79.3728, + "step": 226290 + }, + { + "epoch": 0.9142806352694967, + "grad_norm": 346.9222412109375, + "learning_rate": 1.0850873077376645e-06, + "loss": 70.3954, + "step": 226300 + }, + { + "epoch": 0.9143210365348643, + "grad_norm": 746.0244750976562, + "learning_rate": 1.0841801778795547e-06, + "loss": 62.2139, + "step": 226310 + }, + { + "epoch": 0.9143614378002319, + "grad_norm": 1200.623779296875, + "learning_rate": 1.0832734167950965e-06, + "loss": 68.0521, + "step": 226320 + }, + { + "epoch": 0.9144018390655996, + "grad_norm": 416.016357421875, + "learning_rate": 1.0823670245019579e-06, + "loss": 59.2634, + "step": 226330 + }, + { + "epoch": 0.9144422403309672, + "grad_norm": 510.09283447265625, + "learning_rate": 1.081461001017814e-06, + "loss": 88.8497, + "step": 226340 + }, + { + "epoch": 0.9144826415963347, + "grad_norm": 553.2496337890625, + "learning_rate": 1.0805553463603324e-06, + "loss": 56.627, + "step": 226350 + }, + { + "epoch": 0.9145230428617024, + "grad_norm": 844.9332885742188, + "learning_rate": 1.0796500605471571e-06, + "loss": 59.3533, + "step": 226360 + }, + { + "epoch": 0.91456344412707, + "grad_norm": 549.1014404296875, + "learning_rate": 1.0787451435959494e-06, + "loss": 60.7977, + "step": 226370 + }, + { + "epoch": 0.9146038453924377, + "grad_norm": 169.83497619628906, + "learning_rate": 1.077840595524342e-06, + "loss": 75.4367, + "step": 226380 + }, + { + "epoch": 0.9146442466578053, + "grad_norm": 1181.109130859375, + "learning_rate": 1.0769364163499806e-06, + "loss": 90.7528, + "step": 226390 + }, + { + "epoch": 0.9146846479231729, + "grad_norm": 221.9049072265625, + "learning_rate": 1.0760326060904825e-06, + "loss": 73.9645, + "step": 226400 + }, + { + "epoch": 0.9147250491885406, + "grad_norm": 390.87481689453125, + "learning_rate": 1.075129164763471e-06, + "loss": 35.8809, + "step": 226410 + }, + { + "epoch": 0.9147654504539082, + "grad_norm": 1090.6776123046875, + "learning_rate": 1.0742260923865632e-06, + "loss": 57.7958, + "step": 226420 + }, + { + "epoch": 0.9148058517192759, + "grad_norm": 540.6681518554688, + "learning_rate": 1.0733233889773586e-06, + "loss": 65.4027, + "step": 226430 + }, + { + "epoch": 0.9148462529846435, + "grad_norm": 449.84991455078125, + "learning_rate": 1.0724210545534564e-06, + "loss": 57.6965, + "step": 226440 + }, + { + "epoch": 0.9148866542500111, + "grad_norm": 358.4213562011719, + "learning_rate": 1.0715190891324534e-06, + "loss": 63.122, + "step": 226450 + }, + { + "epoch": 0.9149270555153788, + "grad_norm": 845.4864501953125, + "learning_rate": 1.0706174927319291e-06, + "loss": 82.5609, + "step": 226460 + }, + { + "epoch": 0.9149674567807464, + "grad_norm": 1156.18994140625, + "learning_rate": 1.0697162653694626e-06, + "loss": 48.8588, + "step": 226470 + }, + { + "epoch": 0.915007858046114, + "grad_norm": 750.32470703125, + "learning_rate": 1.0688154070626266e-06, + "loss": 47.0031, + "step": 226480 + }, + { + "epoch": 0.9150482593114816, + "grad_norm": 116.11683654785156, + "learning_rate": 1.0679149178289782e-06, + "loss": 86.7934, + "step": 226490 + }, + { + "epoch": 0.9150886605768492, + "grad_norm": 588.1036376953125, + "learning_rate": 1.0670147976860746e-06, + "loss": 65.7974, + "step": 226500 + }, + { + "epoch": 0.9151290618422169, + "grad_norm": 563.9423217773438, + "learning_rate": 1.0661150466514636e-06, + "loss": 70.6478, + "step": 226510 + }, + { + "epoch": 0.9151694631075845, + "grad_norm": 358.78143310546875, + "learning_rate": 1.0652156647426891e-06, + "loss": 52.6934, + "step": 226520 + }, + { + "epoch": 0.9152098643729522, + "grad_norm": 941.8817138671875, + "learning_rate": 1.0643166519772864e-06, + "loss": 77.9094, + "step": 226530 + }, + { + "epoch": 0.9152502656383198, + "grad_norm": 854.72705078125, + "learning_rate": 1.063418008372774e-06, + "loss": 82.2224, + "step": 226540 + }, + { + "epoch": 0.9152906669036874, + "grad_norm": 724.9200439453125, + "learning_rate": 1.0625197339466808e-06, + "loss": 72.0623, + "step": 226550 + }, + { + "epoch": 0.9153310681690551, + "grad_norm": 477.3977355957031, + "learning_rate": 1.0616218287165148e-06, + "loss": 57.9802, + "step": 226560 + }, + { + "epoch": 0.9153714694344227, + "grad_norm": 270.53924560546875, + "learning_rate": 1.0607242926997795e-06, + "loss": 57.3872, + "step": 226570 + }, + { + "epoch": 0.9154118706997904, + "grad_norm": 574.882568359375, + "learning_rate": 1.0598271259139748e-06, + "loss": 76.3236, + "step": 226580 + }, + { + "epoch": 0.915452271965158, + "grad_norm": 709.5353393554688, + "learning_rate": 1.0589303283765951e-06, + "loss": 59.7262, + "step": 226590 + }, + { + "epoch": 0.9154926732305256, + "grad_norm": 1033.2796630859375, + "learning_rate": 1.0580339001051153e-06, + "loss": 67.1499, + "step": 226600 + }, + { + "epoch": 0.9155330744958932, + "grad_norm": 983.73828125, + "learning_rate": 1.0571378411170176e-06, + "loss": 63.6712, + "step": 226610 + }, + { + "epoch": 0.9155734757612608, + "grad_norm": 751.2908935546875, + "learning_rate": 1.0562421514297717e-06, + "loss": 80.2219, + "step": 226620 + }, + { + "epoch": 0.9156138770266284, + "grad_norm": 1638.102783203125, + "learning_rate": 1.0553468310608373e-06, + "loss": 77.9845, + "step": 226630 + }, + { + "epoch": 0.9156542782919961, + "grad_norm": 623.50439453125, + "learning_rate": 1.0544518800276715e-06, + "loss": 57.9382, + "step": 226640 + }, + { + "epoch": 0.9156946795573637, + "grad_norm": 541.9660034179688, + "learning_rate": 1.0535572983477139e-06, + "loss": 34.7817, + "step": 226650 + }, + { + "epoch": 0.9157350808227314, + "grad_norm": 676.223388671875, + "learning_rate": 1.0526630860384191e-06, + "loss": 85.6516, + "step": 226660 + }, + { + "epoch": 0.915775482088099, + "grad_norm": 1050.9276123046875, + "learning_rate": 1.0517692431172088e-06, + "loss": 66.4141, + "step": 226670 + }, + { + "epoch": 0.9158158833534666, + "grad_norm": 1096.0545654296875, + "learning_rate": 1.0508757696015114e-06, + "loss": 57.1789, + "step": 226680 + }, + { + "epoch": 0.9158562846188343, + "grad_norm": 779.0993041992188, + "learning_rate": 1.0499826655087485e-06, + "loss": 66.7548, + "step": 226690 + }, + { + "epoch": 0.9158966858842019, + "grad_norm": 1923.0286865234375, + "learning_rate": 1.0490899308563263e-06, + "loss": 94.045, + "step": 226700 + }, + { + "epoch": 0.9159370871495696, + "grad_norm": 911.7400512695312, + "learning_rate": 1.048197565661655e-06, + "loss": 82.4002, + "step": 226710 + }, + { + "epoch": 0.9159774884149372, + "grad_norm": 585.8935546875, + "learning_rate": 1.0473055699421296e-06, + "loss": 84.1412, + "step": 226720 + }, + { + "epoch": 0.9160178896803048, + "grad_norm": 400.1241455078125, + "learning_rate": 1.0464139437151388e-06, + "loss": 90.0653, + "step": 226730 + }, + { + "epoch": 0.9160582909456724, + "grad_norm": 320.8519287109375, + "learning_rate": 1.045522686998064e-06, + "loss": 47.7216, + "step": 226740 + }, + { + "epoch": 0.91609869221104, + "grad_norm": 641.7861938476562, + "learning_rate": 1.0446317998082888e-06, + "loss": 66.2063, + "step": 226750 + }, + { + "epoch": 0.9161390934764077, + "grad_norm": 610.3090209960938, + "learning_rate": 1.043741282163171e-06, + "loss": 67.9397, + "step": 226760 + }, + { + "epoch": 0.9161794947417753, + "grad_norm": 855.9674072265625, + "learning_rate": 1.0428511340800761e-06, + "loss": 67.4388, + "step": 226770 + }, + { + "epoch": 0.9162198960071429, + "grad_norm": 902.8604736328125, + "learning_rate": 1.0419613555763596e-06, + "loss": 74.6918, + "step": 226780 + }, + { + "epoch": 0.9162602972725106, + "grad_norm": 431.0906677246094, + "learning_rate": 1.0410719466693652e-06, + "loss": 63.6096, + "step": 226790 + }, + { + "epoch": 0.9163006985378782, + "grad_norm": 422.8153381347656, + "learning_rate": 1.0401829073764368e-06, + "loss": 74.057, + "step": 226800 + }, + { + "epoch": 0.9163410998032459, + "grad_norm": 2079.13671875, + "learning_rate": 1.0392942377148985e-06, + "loss": 116.9047, + "step": 226810 + }, + { + "epoch": 0.9163815010686135, + "grad_norm": 609.6698608398438, + "learning_rate": 1.038405937702087e-06, + "loss": 58.6039, + "step": 226820 + }, + { + "epoch": 0.9164219023339811, + "grad_norm": 714.2447509765625, + "learning_rate": 1.0375180073553094e-06, + "loss": 60.6086, + "step": 226830 + }, + { + "epoch": 0.9164623035993488, + "grad_norm": 623.4655151367188, + "learning_rate": 1.036630446691882e-06, + "loss": 87.5241, + "step": 226840 + }, + { + "epoch": 0.9165027048647164, + "grad_norm": 780.3870239257812, + "learning_rate": 1.0357432557291069e-06, + "loss": 71.8769, + "step": 226850 + }, + { + "epoch": 0.9165431061300839, + "grad_norm": 545.2744140625, + "learning_rate": 1.0348564344842792e-06, + "loss": 35.3074, + "step": 226860 + }, + { + "epoch": 0.9165835073954516, + "grad_norm": 875.2745971679688, + "learning_rate": 1.033969982974694e-06, + "loss": 61.0569, + "step": 226870 + }, + { + "epoch": 0.9166239086608192, + "grad_norm": 344.07244873046875, + "learning_rate": 1.0330839012176242e-06, + "loss": 59.1008, + "step": 226880 + }, + { + "epoch": 0.9166643099261869, + "grad_norm": 576.1427001953125, + "learning_rate": 1.0321981892303512e-06, + "loss": 51.6573, + "step": 226890 + }, + { + "epoch": 0.9167047111915545, + "grad_norm": 1128.3040771484375, + "learning_rate": 1.0313128470301371e-06, + "loss": 81.668, + "step": 226900 + }, + { + "epoch": 0.9167451124569221, + "grad_norm": 671.5454711914062, + "learning_rate": 1.0304278746342456e-06, + "loss": 83.966, + "step": 226910 + }, + { + "epoch": 0.9167855137222898, + "grad_norm": 762.3535766601562, + "learning_rate": 1.0295432720599275e-06, + "loss": 85.738, + "step": 226920 + }, + { + "epoch": 0.9168259149876574, + "grad_norm": 495.4464111328125, + "learning_rate": 1.0286590393244356e-06, + "loss": 91.854, + "step": 226930 + }, + { + "epoch": 0.9168663162530251, + "grad_norm": 695.5411376953125, + "learning_rate": 1.027775176444996e-06, + "loss": 72.5167, + "step": 226940 + }, + { + "epoch": 0.9169067175183927, + "grad_norm": 707.5381469726562, + "learning_rate": 1.026891683438851e-06, + "loss": 72.9509, + "step": 226950 + }, + { + "epoch": 0.9169471187837603, + "grad_norm": 283.4139404296875, + "learning_rate": 1.0260085603232218e-06, + "loss": 74.1688, + "step": 226960 + }, + { + "epoch": 0.916987520049128, + "grad_norm": 588.527587890625, + "learning_rate": 1.025125807115317e-06, + "loss": 45.5252, + "step": 226970 + }, + { + "epoch": 0.9170279213144956, + "grad_norm": 774.3060913085938, + "learning_rate": 1.0242434238323606e-06, + "loss": 54.4311, + "step": 226980 + }, + { + "epoch": 0.9170683225798631, + "grad_norm": 739.551513671875, + "learning_rate": 1.0233614104915434e-06, + "loss": 83.1025, + "step": 226990 + }, + { + "epoch": 0.9171087238452308, + "grad_norm": 1012.3488159179688, + "learning_rate": 1.0224797671100695e-06, + "loss": 60.7478, + "step": 227000 + }, + { + "epoch": 0.9171491251105984, + "grad_norm": 1073.89599609375, + "learning_rate": 1.0215984937051204e-06, + "loss": 55.1986, + "step": 227010 + }, + { + "epoch": 0.9171895263759661, + "grad_norm": 1049.5576171875, + "learning_rate": 1.0207175902938803e-06, + "loss": 65.1684, + "step": 227020 + }, + { + "epoch": 0.9172299276413337, + "grad_norm": 1188.416015625, + "learning_rate": 1.0198370568935246e-06, + "loss": 77.2907, + "step": 227030 + }, + { + "epoch": 0.9172703289067013, + "grad_norm": 968.2594604492188, + "learning_rate": 1.0189568935212123e-06, + "loss": 79.8775, + "step": 227040 + }, + { + "epoch": 0.917310730172069, + "grad_norm": 375.3421630859375, + "learning_rate": 1.0180771001941103e-06, + "loss": 158.3027, + "step": 227050 + }, + { + "epoch": 0.9173511314374366, + "grad_norm": 1414.5833740234375, + "learning_rate": 1.0171976769293646e-06, + "loss": 70.3624, + "step": 227060 + }, + { + "epoch": 0.9173915327028043, + "grad_norm": 661.4661254882812, + "learning_rate": 1.0163186237441259e-06, + "loss": 55.7377, + "step": 227070 + }, + { + "epoch": 0.9174319339681719, + "grad_norm": 692.8190307617188, + "learning_rate": 1.0154399406555271e-06, + "loss": 71.1216, + "step": 227080 + }, + { + "epoch": 0.9174723352335395, + "grad_norm": 745.64404296875, + "learning_rate": 1.0145616276807058e-06, + "loss": 55.5031, + "step": 227090 + }, + { + "epoch": 0.9175127364989072, + "grad_norm": 864.1234741210938, + "learning_rate": 1.0136836848367748e-06, + "loss": 97.6258, + "step": 227100 + }, + { + "epoch": 0.9175531377642748, + "grad_norm": 970.3352661132812, + "learning_rate": 1.012806112140856e-06, + "loss": 59.6766, + "step": 227110 + }, + { + "epoch": 0.9175935390296424, + "grad_norm": 484.7643737792969, + "learning_rate": 1.0119289096100604e-06, + "loss": 79.1345, + "step": 227120 + }, + { + "epoch": 0.91763394029501, + "grad_norm": 443.5517272949219, + "learning_rate": 1.0110520772614806e-06, + "loss": 46.6717, + "step": 227130 + }, + { + "epoch": 0.9176743415603776, + "grad_norm": 601.894775390625, + "learning_rate": 1.0101756151122232e-06, + "loss": 69.3496, + "step": 227140 + }, + { + "epoch": 0.9177147428257453, + "grad_norm": 392.0855712890625, + "learning_rate": 1.0092995231793612e-06, + "loss": 54.2771, + "step": 227150 + }, + { + "epoch": 0.9177551440911129, + "grad_norm": 1470.0030517578125, + "learning_rate": 1.0084238014799896e-06, + "loss": 105.8993, + "step": 227160 + }, + { + "epoch": 0.9177955453564806, + "grad_norm": 1345.71875, + "learning_rate": 1.0075484500311705e-06, + "loss": 84.6762, + "step": 227170 + }, + { + "epoch": 0.9178359466218482, + "grad_norm": 585.3522338867188, + "learning_rate": 1.0066734688499702e-06, + "loss": 54.3725, + "step": 227180 + }, + { + "epoch": 0.9178763478872158, + "grad_norm": 372.5501708984375, + "learning_rate": 1.0057988579534505e-06, + "loss": 55.4811, + "step": 227190 + }, + { + "epoch": 0.9179167491525835, + "grad_norm": 749.8014526367188, + "learning_rate": 1.0049246173586646e-06, + "loss": 51.3461, + "step": 227200 + }, + { + "epoch": 0.9179571504179511, + "grad_norm": 527.1925048828125, + "learning_rate": 1.0040507470826499e-06, + "loss": 73.9271, + "step": 227210 + }, + { + "epoch": 0.9179975516833188, + "grad_norm": 272.3022766113281, + "learning_rate": 1.003177247142444e-06, + "loss": 61.9544, + "step": 227220 + }, + { + "epoch": 0.9180379529486864, + "grad_norm": 462.49945068359375, + "learning_rate": 1.0023041175550797e-06, + "loss": 47.9032, + "step": 227230 + }, + { + "epoch": 0.918078354214054, + "grad_norm": 765.3421020507812, + "learning_rate": 1.0014313583375745e-06, + "loss": 51.1869, + "step": 227240 + }, + { + "epoch": 0.9181187554794216, + "grad_norm": 888.4177856445312, + "learning_rate": 1.000558969506953e-06, + "loss": 63.9379, + "step": 227250 + }, + { + "epoch": 0.9181591567447892, + "grad_norm": 373.97772216796875, + "learning_rate": 9.996869510802076e-07, + "loss": 70.6131, + "step": 227260 + }, + { + "epoch": 0.9181995580101568, + "grad_norm": 428.20037841796875, + "learning_rate": 9.98815303074352e-07, + "loss": 49.4115, + "step": 227270 + }, + { + "epoch": 0.9182399592755245, + "grad_norm": 518.8023681640625, + "learning_rate": 9.979440255063744e-07, + "loss": 98.3402, + "step": 227280 + }, + { + "epoch": 0.9182803605408921, + "grad_norm": 903.892333984375, + "learning_rate": 9.970731183932592e-07, + "loss": 67.9347, + "step": 227290 + }, + { + "epoch": 0.9183207618062598, + "grad_norm": 1001.895263671875, + "learning_rate": 9.962025817519883e-07, + "loss": 86.6482, + "step": 227300 + }, + { + "epoch": 0.9183611630716274, + "grad_norm": 394.3913879394531, + "learning_rate": 9.95332415599528e-07, + "loss": 63.8747, + "step": 227310 + }, + { + "epoch": 0.918401564336995, + "grad_norm": 1118.7200927734375, + "learning_rate": 9.94462619952854e-07, + "loss": 82.3674, + "step": 227320 + }, + { + "epoch": 0.9184419656023627, + "grad_norm": 1293.289794921875, + "learning_rate": 9.935931948289102e-07, + "loss": 74.5728, + "step": 227330 + }, + { + "epoch": 0.9184823668677303, + "grad_norm": 1054.6895751953125, + "learning_rate": 9.927241402446542e-07, + "loss": 80.8768, + "step": 227340 + }, + { + "epoch": 0.918522768133098, + "grad_norm": 697.5621337890625, + "learning_rate": 9.918554562170257e-07, + "loss": 65.7783, + "step": 227350 + }, + { + "epoch": 0.9185631693984656, + "grad_norm": 458.992919921875, + "learning_rate": 9.909871427629603e-07, + "loss": 66.782, + "step": 227360 + }, + { + "epoch": 0.9186035706638332, + "grad_norm": 458.9566345214844, + "learning_rate": 9.90119199899391e-07, + "loss": 80.6713, + "step": 227370 + }, + { + "epoch": 0.9186439719292008, + "grad_norm": 1314.3929443359375, + "learning_rate": 9.892516276432306e-07, + "loss": 51.9403, + "step": 227380 + }, + { + "epoch": 0.9186843731945684, + "grad_norm": 560.048095703125, + "learning_rate": 9.883844260113996e-07, + "loss": 46.9636, + "step": 227390 + }, + { + "epoch": 0.918724774459936, + "grad_norm": 380.17864990234375, + "learning_rate": 9.875175950207994e-07, + "loss": 62.4909, + "step": 227400 + }, + { + "epoch": 0.9187651757253037, + "grad_norm": 293.5907897949219, + "learning_rate": 9.866511346883367e-07, + "loss": 54.7892, + "step": 227410 + }, + { + "epoch": 0.9188055769906713, + "grad_norm": 453.9716491699219, + "learning_rate": 9.857850450308936e-07, + "loss": 60.8062, + "step": 227420 + }, + { + "epoch": 0.918845978256039, + "grad_norm": 664.472412109375, + "learning_rate": 9.849193260653633e-07, + "loss": 64.8499, + "step": 227430 + }, + { + "epoch": 0.9188863795214066, + "grad_norm": 635.7804565429688, + "learning_rate": 9.840539778086188e-07, + "loss": 33.9143, + "step": 227440 + }, + { + "epoch": 0.9189267807867743, + "grad_norm": 1218.208984375, + "learning_rate": 9.831890002775336e-07, + "loss": 72.4912, + "step": 227450 + }, + { + "epoch": 0.9189671820521419, + "grad_norm": 518.87255859375, + "learning_rate": 9.823243934889716e-07, + "loss": 56.7078, + "step": 227460 + }, + { + "epoch": 0.9190075833175095, + "grad_norm": 507.93212890625, + "learning_rate": 9.814601574597793e-07, + "loss": 64.3529, + "step": 227470 + }, + { + "epoch": 0.9190479845828772, + "grad_norm": 760.3419189453125, + "learning_rate": 9.80596292206819e-07, + "loss": 46.6309, + "step": 227480 + }, + { + "epoch": 0.9190883858482448, + "grad_norm": 479.492431640625, + "learning_rate": 9.79732797746924e-07, + "loss": 57.2331, + "step": 227490 + }, + { + "epoch": 0.9191287871136123, + "grad_norm": 433.6677551269531, + "learning_rate": 9.788696740969295e-07, + "loss": 55.475, + "step": 227500 + }, + { + "epoch": 0.91916918837898, + "grad_norm": 943.913330078125, + "learning_rate": 9.780069212736643e-07, + "loss": 85.84, + "step": 227510 + }, + { + "epoch": 0.9192095896443476, + "grad_norm": 255.56320190429688, + "learning_rate": 9.77144539293946e-07, + "loss": 77.8904, + "step": 227520 + }, + { + "epoch": 0.9192499909097153, + "grad_norm": 403.1540832519531, + "learning_rate": 9.762825281745881e-07, + "loss": 70.7063, + "step": 227530 + }, + { + "epoch": 0.9192903921750829, + "grad_norm": 431.0110168457031, + "learning_rate": 9.754208879324012e-07, + "loss": 60.7446, + "step": 227540 + }, + { + "epoch": 0.9193307934404505, + "grad_norm": 485.9320373535156, + "learning_rate": 9.745596185841743e-07, + "loss": 42.552, + "step": 227550 + }, + { + "epoch": 0.9193711947058182, + "grad_norm": 705.6109619140625, + "learning_rate": 9.73698720146703e-07, + "loss": 46.9506, + "step": 227560 + }, + { + "epoch": 0.9194115959711858, + "grad_norm": 442.1241760253906, + "learning_rate": 9.728381926367736e-07, + "loss": 57.7568, + "step": 227570 + }, + { + "epoch": 0.9194519972365535, + "grad_norm": 1128.1453857421875, + "learning_rate": 9.719780360711551e-07, + "loss": 39.2328, + "step": 227580 + }, + { + "epoch": 0.9194923985019211, + "grad_norm": 740.3536987304688, + "learning_rate": 9.71118250466625e-07, + "loss": 67.153, + "step": 227590 + }, + { + "epoch": 0.9195327997672887, + "grad_norm": 405.9766845703125, + "learning_rate": 9.702588358399345e-07, + "loss": 50.8714, + "step": 227600 + }, + { + "epoch": 0.9195732010326564, + "grad_norm": 706.9024047851562, + "learning_rate": 9.693997922078524e-07, + "loss": 60.4695, + "step": 227610 + }, + { + "epoch": 0.919613602298024, + "grad_norm": 794.648193359375, + "learning_rate": 9.685411195871163e-07, + "loss": 69.6095, + "step": 227620 + }, + { + "epoch": 0.9196540035633916, + "grad_norm": 789.849365234375, + "learning_rate": 9.676828179944687e-07, + "loss": 56.1416, + "step": 227630 + }, + { + "epoch": 0.9196944048287592, + "grad_norm": 729.1079711914062, + "learning_rate": 9.668248874466468e-07, + "loss": 68.0602, + "step": 227640 + }, + { + "epoch": 0.9197348060941268, + "grad_norm": 279.80279541015625, + "learning_rate": 9.659673279603688e-07, + "loss": 37.2798, + "step": 227650 + }, + { + "epoch": 0.9197752073594945, + "grad_norm": 262.9844665527344, + "learning_rate": 9.651101395523588e-07, + "loss": 36.5063, + "step": 227660 + }, + { + "epoch": 0.9198156086248621, + "grad_norm": 672.517578125, + "learning_rate": 9.642533222393235e-07, + "loss": 60.9833, + "step": 227670 + }, + { + "epoch": 0.9198560098902298, + "grad_norm": 901.4411010742188, + "learning_rate": 9.63396876037972e-07, + "loss": 61.2069, + "step": 227680 + }, + { + "epoch": 0.9198964111555974, + "grad_norm": 597.974365234375, + "learning_rate": 9.625408009649995e-07, + "loss": 60.8054, + "step": 227690 + }, + { + "epoch": 0.919936812420965, + "grad_norm": 680.3971557617188, + "learning_rate": 9.616850970370972e-07, + "loss": 67.5123, + "step": 227700 + }, + { + "epoch": 0.9199772136863327, + "grad_norm": 1034.8609619140625, + "learning_rate": 9.60829764270943e-07, + "loss": 69.9939, + "step": 227710 + }, + { + "epoch": 0.9200176149517003, + "grad_norm": 597.1076049804688, + "learning_rate": 9.599748026832144e-07, + "loss": 60.5433, + "step": 227720 + }, + { + "epoch": 0.920058016217068, + "grad_norm": 685.9915161132812, + "learning_rate": 9.591202122905807e-07, + "loss": 46.3553, + "step": 227730 + }, + { + "epoch": 0.9200984174824356, + "grad_norm": 602.1477661132812, + "learning_rate": 9.582659931096971e-07, + "loss": 55.0845, + "step": 227740 + }, + { + "epoch": 0.9201388187478032, + "grad_norm": 854.1395263671875, + "learning_rate": 9.574121451572282e-07, + "loss": 69.0381, + "step": 227750 + }, + { + "epoch": 0.9201792200131708, + "grad_norm": 1045.4866943359375, + "learning_rate": 9.565586684498074e-07, + "loss": 84.3757, + "step": 227760 + }, + { + "epoch": 0.9202196212785384, + "grad_norm": 753.731689453125, + "learning_rate": 9.557055630040835e-07, + "loss": 80.5045, + "step": 227770 + }, + { + "epoch": 0.920260022543906, + "grad_norm": 300.7455139160156, + "learning_rate": 9.548528288366832e-07, + "loss": 80.7863, + "step": 227780 + }, + { + "epoch": 0.9203004238092737, + "grad_norm": 490.550048828125, + "learning_rate": 9.54000465964231e-07, + "loss": 57.7516, + "step": 227790 + }, + { + "epoch": 0.9203408250746413, + "grad_norm": 744.1099853515625, + "learning_rate": 9.53148474403347e-07, + "loss": 43.1976, + "step": 227800 + }, + { + "epoch": 0.920381226340009, + "grad_norm": 682.7489013671875, + "learning_rate": 9.522968541706356e-07, + "loss": 66.3353, + "step": 227810 + }, + { + "epoch": 0.9204216276053766, + "grad_norm": 1153.3792724609375, + "learning_rate": 9.514456052827081e-07, + "loss": 75.1345, + "step": 227820 + }, + { + "epoch": 0.9204620288707442, + "grad_norm": 726.7431030273438, + "learning_rate": 9.505947277561534e-07, + "loss": 86.3157, + "step": 227830 + }, + { + "epoch": 0.9205024301361119, + "grad_norm": 887.6593627929688, + "learning_rate": 9.497442216075625e-07, + "loss": 73.5182, + "step": 227840 + }, + { + "epoch": 0.9205428314014795, + "grad_norm": 588.2175903320312, + "learning_rate": 9.488940868535135e-07, + "loss": 64.4427, + "step": 227850 + }, + { + "epoch": 0.9205832326668472, + "grad_norm": 809.3809204101562, + "learning_rate": 9.480443235105863e-07, + "loss": 57.7711, + "step": 227860 + }, + { + "epoch": 0.9206236339322148, + "grad_norm": 705.2509155273438, + "learning_rate": 9.471949315953388e-07, + "loss": 55.1694, + "step": 227870 + }, + { + "epoch": 0.9206640351975824, + "grad_norm": 649.5911865234375, + "learning_rate": 9.4634591112434e-07, + "loss": 70.3239, + "step": 227880 + }, + { + "epoch": 0.92070443646295, + "grad_norm": 2116.83154296875, + "learning_rate": 9.454972621141345e-07, + "loss": 71.3408, + "step": 227890 + }, + { + "epoch": 0.9207448377283176, + "grad_norm": 1421.831787109375, + "learning_rate": 9.446489845812712e-07, + "loss": 81.8694, + "step": 227900 + }, + { + "epoch": 0.9207852389936853, + "grad_norm": 671.0345458984375, + "learning_rate": 9.438010785422858e-07, + "loss": 60.3597, + "step": 227910 + }, + { + "epoch": 0.9208256402590529, + "grad_norm": 461.36285400390625, + "learning_rate": 9.429535440137072e-07, + "loss": 54.3793, + "step": 227920 + }, + { + "epoch": 0.9208660415244205, + "grad_norm": 484.68994140625, + "learning_rate": 9.421063810120623e-07, + "loss": 56.4751, + "step": 227930 + }, + { + "epoch": 0.9209064427897882, + "grad_norm": 1596.470947265625, + "learning_rate": 9.412595895538645e-07, + "loss": 87.8142, + "step": 227940 + }, + { + "epoch": 0.9209468440551558, + "grad_norm": 742.5697631835938, + "learning_rate": 9.404131696556251e-07, + "loss": 74.5169, + "step": 227950 + }, + { + "epoch": 0.9209872453205235, + "grad_norm": 497.42950439453125, + "learning_rate": 9.395671213338397e-07, + "loss": 60.4736, + "step": 227960 + }, + { + "epoch": 0.9210276465858911, + "grad_norm": 640.7465209960938, + "learning_rate": 9.387214446050086e-07, + "loss": 64.5341, + "step": 227970 + }, + { + "epoch": 0.9210680478512587, + "grad_norm": 258.1431579589844, + "learning_rate": 9.378761394856184e-07, + "loss": 53.2613, + "step": 227980 + }, + { + "epoch": 0.9211084491166264, + "grad_norm": 433.0177307128906, + "learning_rate": 9.370312059921449e-07, + "loss": 60.822, + "step": 227990 + }, + { + "epoch": 0.921148850381994, + "grad_norm": 436.9791564941406, + "learning_rate": 9.361866441410617e-07, + "loss": 81.4766, + "step": 228000 + }, + { + "epoch": 0.9211892516473615, + "grad_norm": 867.533203125, + "learning_rate": 9.353424539488332e-07, + "loss": 83.0393, + "step": 228010 + }, + { + "epoch": 0.9212296529127292, + "grad_norm": 1068.5611572265625, + "learning_rate": 9.344986354319218e-07, + "loss": 72.167, + "step": 228020 + }, + { + "epoch": 0.9212700541780968, + "grad_norm": 892.1446533203125, + "learning_rate": 9.336551886067702e-07, + "loss": 58.82, + "step": 228030 + }, + { + "epoch": 0.9213104554434645, + "grad_norm": 973.5467529296875, + "learning_rate": 9.328121134898338e-07, + "loss": 53.3661, + "step": 228040 + }, + { + "epoch": 0.9213508567088321, + "grad_norm": 713.5765380859375, + "learning_rate": 9.319694100975374e-07, + "loss": 95.8126, + "step": 228050 + }, + { + "epoch": 0.9213912579741997, + "grad_norm": 905.9175415039062, + "learning_rate": 9.311270784463144e-07, + "loss": 75.3221, + "step": 228060 + }, + { + "epoch": 0.9214316592395674, + "grad_norm": 467.1725769042969, + "learning_rate": 9.302851185525896e-07, + "loss": 76.3435, + "step": 228070 + }, + { + "epoch": 0.921472060504935, + "grad_norm": 789.3355102539062, + "learning_rate": 9.294435304327675e-07, + "loss": 52.1242, + "step": 228080 + }, + { + "epoch": 0.9215124617703027, + "grad_norm": 1296.5084228515625, + "learning_rate": 9.286023141032685e-07, + "loss": 94.8776, + "step": 228090 + }, + { + "epoch": 0.9215528630356703, + "grad_norm": 1430.362060546875, + "learning_rate": 9.277614695804816e-07, + "loss": 85.611, + "step": 228100 + }, + { + "epoch": 0.9215932643010379, + "grad_norm": 471.9753112792969, + "learning_rate": 9.269209968808046e-07, + "loss": 71.026, + "step": 228110 + }, + { + "epoch": 0.9216336655664056, + "grad_norm": 871.133544921875, + "learning_rate": 9.260808960206225e-07, + "loss": 53.0889, + "step": 228120 + }, + { + "epoch": 0.9216740668317732, + "grad_norm": 850.5995483398438, + "learning_rate": 9.252411670163153e-07, + "loss": 55.8514, + "step": 228130 + }, + { + "epoch": 0.9217144680971407, + "grad_norm": 1198.52978515625, + "learning_rate": 9.244018098842522e-07, + "loss": 56.1045, + "step": 228140 + }, + { + "epoch": 0.9217548693625084, + "grad_norm": 977.1488037109375, + "learning_rate": 9.235628246407935e-07, + "loss": 90.1258, + "step": 228150 + }, + { + "epoch": 0.921795270627876, + "grad_norm": 519.801025390625, + "learning_rate": 9.227242113023016e-07, + "loss": 65.8213, + "step": 228160 + }, + { + "epoch": 0.9218356718932437, + "grad_norm": 742.3916625976562, + "learning_rate": 9.21885969885119e-07, + "loss": 66.0176, + "step": 228170 + }, + { + "epoch": 0.9218760731586113, + "grad_norm": 468.4938659667969, + "learning_rate": 9.210481004055971e-07, + "loss": 76.7612, + "step": 228180 + }, + { + "epoch": 0.921916474423979, + "grad_norm": 629.6813354492188, + "learning_rate": 9.202106028800584e-07, + "loss": 66.0611, + "step": 228190 + }, + { + "epoch": 0.9219568756893466, + "grad_norm": 472.7373352050781, + "learning_rate": 9.19373477324843e-07, + "loss": 51.2335, + "step": 228200 + }, + { + "epoch": 0.9219972769547142, + "grad_norm": 757.9939575195312, + "learning_rate": 9.185367237562581e-07, + "loss": 60.9032, + "step": 228210 + }, + { + "epoch": 0.9220376782200819, + "grad_norm": 752.9747924804688, + "learning_rate": 9.177003421906306e-07, + "loss": 86.1729, + "step": 228220 + }, + { + "epoch": 0.9220780794854495, + "grad_norm": 740.1076049804688, + "learning_rate": 9.168643326442561e-07, + "loss": 58.1216, + "step": 228230 + }, + { + "epoch": 0.9221184807508171, + "grad_norm": 1057.886962890625, + "learning_rate": 9.160286951334352e-07, + "loss": 52.9785, + "step": 228240 + }, + { + "epoch": 0.9221588820161848, + "grad_norm": 837.704345703125, + "learning_rate": 9.151934296744636e-07, + "loss": 63.5494, + "step": 228250 + }, + { + "epoch": 0.9221992832815524, + "grad_norm": 725.4239501953125, + "learning_rate": 9.143585362836194e-07, + "loss": 55.2221, + "step": 228260 + }, + { + "epoch": 0.92223968454692, + "grad_norm": 639.7677612304688, + "learning_rate": 9.135240149771807e-07, + "loss": 72.2831, + "step": 228270 + }, + { + "epoch": 0.9222800858122876, + "grad_norm": 744.0913696289062, + "learning_rate": 9.126898657714189e-07, + "loss": 69.6606, + "step": 228280 + }, + { + "epoch": 0.9223204870776552, + "grad_norm": 655.8161010742188, + "learning_rate": 9.118560886825923e-07, + "loss": 98.8454, + "step": 228290 + }, + { + "epoch": 0.9223608883430229, + "grad_norm": 711.4464111328125, + "learning_rate": 9.110226837269609e-07, + "loss": 124.194, + "step": 228300 + }, + { + "epoch": 0.9224012896083905, + "grad_norm": 476.03155517578125, + "learning_rate": 9.10189650920772e-07, + "loss": 68.3843, + "step": 228310 + }, + { + "epoch": 0.9224416908737582, + "grad_norm": 413.3608703613281, + "learning_rate": 9.093569902802613e-07, + "loss": 85.3806, + "step": 228320 + }, + { + "epoch": 0.9224820921391258, + "grad_norm": 653.7308959960938, + "learning_rate": 9.085247018216647e-07, + "loss": 46.9931, + "step": 228330 + }, + { + "epoch": 0.9225224934044934, + "grad_norm": 738.2017211914062, + "learning_rate": 9.076927855612072e-07, + "loss": 61.8869, + "step": 228340 + }, + { + "epoch": 0.9225628946698611, + "grad_norm": 296.32647705078125, + "learning_rate": 9.06861241515109e-07, + "loss": 38.1853, + "step": 228350 + }, + { + "epoch": 0.9226032959352287, + "grad_norm": 680.091552734375, + "learning_rate": 9.060300696995817e-07, + "loss": 68.3165, + "step": 228360 + }, + { + "epoch": 0.9226436972005964, + "grad_norm": 436.52044677734375, + "learning_rate": 9.051992701308254e-07, + "loss": 70.0716, + "step": 228370 + }, + { + "epoch": 0.922684098465964, + "grad_norm": 732.0703735351562, + "learning_rate": 9.043688428250453e-07, + "loss": 73.517, + "step": 228380 + }, + { + "epoch": 0.9227244997313316, + "grad_norm": 1323.5704345703125, + "learning_rate": 9.035387877984214e-07, + "loss": 63.8603, + "step": 228390 + }, + { + "epoch": 0.9227649009966992, + "grad_norm": 445.9588928222656, + "learning_rate": 9.027091050671411e-07, + "loss": 58.1955, + "step": 228400 + }, + { + "epoch": 0.9228053022620668, + "grad_norm": 611.745361328125, + "learning_rate": 9.018797946473823e-07, + "loss": 64.7322, + "step": 228410 + }, + { + "epoch": 0.9228457035274344, + "grad_norm": 496.98370361328125, + "learning_rate": 9.010508565553011e-07, + "loss": 65.0215, + "step": 228420 + }, + { + "epoch": 0.9228861047928021, + "grad_norm": 755.337158203125, + "learning_rate": 9.002222908070735e-07, + "loss": 63.5564, + "step": 228430 + }, + { + "epoch": 0.9229265060581697, + "grad_norm": 1200.765380859375, + "learning_rate": 8.993940974188442e-07, + "loss": 69.6889, + "step": 228440 + }, + { + "epoch": 0.9229669073235374, + "grad_norm": 892.1463012695312, + "learning_rate": 8.985662764067582e-07, + "loss": 66.2304, + "step": 228450 + }, + { + "epoch": 0.923007308588905, + "grad_norm": 572.3770141601562, + "learning_rate": 8.977388277869581e-07, + "loss": 52.3316, + "step": 228460 + }, + { + "epoch": 0.9230477098542726, + "grad_norm": 969.6731567382812, + "learning_rate": 8.969117515755776e-07, + "loss": 82.3766, + "step": 228470 + }, + { + "epoch": 0.9230881111196403, + "grad_norm": 477.52288818359375, + "learning_rate": 8.960850477887329e-07, + "loss": 59.7428, + "step": 228480 + }, + { + "epoch": 0.9231285123850079, + "grad_norm": 548.6964111328125, + "learning_rate": 8.952587164425463e-07, + "loss": 91.9443, + "step": 228490 + }, + { + "epoch": 0.9231689136503756, + "grad_norm": 1234.231689453125, + "learning_rate": 8.944327575531275e-07, + "loss": 105.2574, + "step": 228500 + }, + { + "epoch": 0.9232093149157432, + "grad_norm": 552.382080078125, + "learning_rate": 8.936071711365768e-07, + "loss": 66.3606, + "step": 228510 + }, + { + "epoch": 0.9232497161811108, + "grad_norm": 487.4247741699219, + "learning_rate": 8.927819572089946e-07, + "loss": 61.0539, + "step": 228520 + }, + { + "epoch": 0.9232901174464784, + "grad_norm": 388.75592041015625, + "learning_rate": 8.919571157864593e-07, + "loss": 63.7249, + "step": 228530 + }, + { + "epoch": 0.923330518711846, + "grad_norm": 392.1812744140625, + "learning_rate": 8.911326468850645e-07, + "loss": 64.6909, + "step": 228540 + }, + { + "epoch": 0.9233709199772137, + "grad_norm": 529.4835815429688, + "learning_rate": 8.90308550520873e-07, + "loss": 81.5324, + "step": 228550 + }, + { + "epoch": 0.9234113212425813, + "grad_norm": 866.8008422851562, + "learning_rate": 8.894848267099542e-07, + "loss": 51.8256, + "step": 228560 + }, + { + "epoch": 0.9234517225079489, + "grad_norm": 443.92901611328125, + "learning_rate": 8.886614754683664e-07, + "loss": 62.7457, + "step": 228570 + }, + { + "epoch": 0.9234921237733166, + "grad_norm": 650.2222900390625, + "learning_rate": 8.878384968121633e-07, + "loss": 63.2711, + "step": 228580 + }, + { + "epoch": 0.9235325250386842, + "grad_norm": 1145.6856689453125, + "learning_rate": 8.870158907573923e-07, + "loss": 95.8487, + "step": 228590 + }, + { + "epoch": 0.9235729263040519, + "grad_norm": 378.3177795410156, + "learning_rate": 8.861936573200825e-07, + "loss": 66.7939, + "step": 228600 + }, + { + "epoch": 0.9236133275694195, + "grad_norm": 1015.7752075195312, + "learning_rate": 8.853717965162678e-07, + "loss": 85.1606, + "step": 228610 + }, + { + "epoch": 0.9236537288347871, + "grad_norm": 1435.558837890625, + "learning_rate": 8.845503083619711e-07, + "loss": 53.4216, + "step": 228620 + }, + { + "epoch": 0.9236941301001548, + "grad_norm": 1054.9676513671875, + "learning_rate": 8.837291928732105e-07, + "loss": 72.9985, + "step": 228630 + }, + { + "epoch": 0.9237345313655224, + "grad_norm": 654.0689086914062, + "learning_rate": 8.829084500659846e-07, + "loss": 30.3138, + "step": 228640 + }, + { + "epoch": 0.9237749326308899, + "grad_norm": 974.3132934570312, + "learning_rate": 8.820880799563069e-07, + "loss": 67.5335, + "step": 228650 + }, + { + "epoch": 0.9238153338962576, + "grad_norm": 503.911376953125, + "learning_rate": 8.812680825601627e-07, + "loss": 77.4916, + "step": 228660 + }, + { + "epoch": 0.9238557351616252, + "grad_norm": 1109.158935546875, + "learning_rate": 8.804484578935391e-07, + "loss": 75.7073, + "step": 228670 + }, + { + "epoch": 0.9238961364269929, + "grad_norm": 784.7280883789062, + "learning_rate": 8.796292059724188e-07, + "loss": 64.7972, + "step": 228680 + }, + { + "epoch": 0.9239365376923605, + "grad_norm": 491.0041809082031, + "learning_rate": 8.788103268127646e-07, + "loss": 41.1837, + "step": 228690 + }, + { + "epoch": 0.9239769389577281, + "grad_norm": 511.57586669921875, + "learning_rate": 8.779918204305549e-07, + "loss": 42.9702, + "step": 228700 + }, + { + "epoch": 0.9240173402230958, + "grad_norm": 830.240966796875, + "learning_rate": 8.771736868417346e-07, + "loss": 107.0144, + "step": 228710 + }, + { + "epoch": 0.9240577414884634, + "grad_norm": 532.4107055664062, + "learning_rate": 8.763559260622601e-07, + "loss": 75.4442, + "step": 228720 + }, + { + "epoch": 0.9240981427538311, + "grad_norm": 782.3359985351562, + "learning_rate": 8.755385381080716e-07, + "loss": 58.0387, + "step": 228730 + }, + { + "epoch": 0.9241385440191987, + "grad_norm": 423.23553466796875, + "learning_rate": 8.747215229951056e-07, + "loss": 45.5132, + "step": 228740 + }, + { + "epoch": 0.9241789452845663, + "grad_norm": 896.886962890625, + "learning_rate": 8.739048807392913e-07, + "loss": 84.527, + "step": 228750 + }, + { + "epoch": 0.924219346549934, + "grad_norm": 1599.5672607421875, + "learning_rate": 8.730886113565473e-07, + "loss": 87.3274, + "step": 228760 + }, + { + "epoch": 0.9242597478153016, + "grad_norm": 1055.219970703125, + "learning_rate": 8.722727148627875e-07, + "loss": 93.8454, + "step": 228770 + }, + { + "epoch": 0.9243001490806692, + "grad_norm": 665.64599609375, + "learning_rate": 8.714571912739189e-07, + "loss": 90.1882, + "step": 228780 + }, + { + "epoch": 0.9243405503460368, + "grad_norm": 528.248046875, + "learning_rate": 8.706420406058424e-07, + "loss": 62.8089, + "step": 228790 + }, + { + "epoch": 0.9243809516114044, + "grad_norm": 468.6971435546875, + "learning_rate": 8.698272628744408e-07, + "loss": 82.4174, + "step": 228800 + }, + { + "epoch": 0.9244213528767721, + "grad_norm": 852.4969482421875, + "learning_rate": 8.690128580956126e-07, + "loss": 72.4451, + "step": 228810 + }, + { + "epoch": 0.9244617541421397, + "grad_norm": 716.810546875, + "learning_rate": 8.68198826285227e-07, + "loss": 56.1961, + "step": 228820 + }, + { + "epoch": 0.9245021554075074, + "grad_norm": 1050.5281982421875, + "learning_rate": 8.673851674591538e-07, + "loss": 59.1208, + "step": 228830 + }, + { + "epoch": 0.924542556672875, + "grad_norm": 1476.1949462890625, + "learning_rate": 8.665718816332558e-07, + "loss": 68.7911, + "step": 228840 + }, + { + "epoch": 0.9245829579382426, + "grad_norm": 718.591064453125, + "learning_rate": 8.657589688233891e-07, + "loss": 56.0872, + "step": 228850 + }, + { + "epoch": 0.9246233592036103, + "grad_norm": 668.1279296875, + "learning_rate": 8.649464290454079e-07, + "loss": 44.4279, + "step": 228860 + }, + { + "epoch": 0.9246637604689779, + "grad_norm": 476.161865234375, + "learning_rate": 8.641342623151395e-07, + "loss": 44.2917, + "step": 228870 + }, + { + "epoch": 0.9247041617343456, + "grad_norm": 829.2047119140625, + "learning_rate": 8.63322468648431e-07, + "loss": 57.071, + "step": 228880 + }, + { + "epoch": 0.9247445629997132, + "grad_norm": 426.54364013671875, + "learning_rate": 8.62511048061101e-07, + "loss": 78.0632, + "step": 228890 + }, + { + "epoch": 0.9247849642650808, + "grad_norm": 721.86572265625, + "learning_rate": 8.617000005689724e-07, + "loss": 63.5281, + "step": 228900 + }, + { + "epoch": 0.9248253655304484, + "grad_norm": 713.2583618164062, + "learning_rate": 8.608893261878526e-07, + "loss": 34.7386, + "step": 228910 + }, + { + "epoch": 0.924865766795816, + "grad_norm": 594.3661499023438, + "learning_rate": 8.600790249335533e-07, + "loss": 66.9097, + "step": 228920 + }, + { + "epoch": 0.9249061680611836, + "grad_norm": 938.271728515625, + "learning_rate": 8.592690968218642e-07, + "loss": 52.8094, + "step": 228930 + }, + { + "epoch": 0.9249465693265513, + "grad_norm": 538.2855834960938, + "learning_rate": 8.58459541868577e-07, + "loss": 65.3402, + "step": 228940 + }, + { + "epoch": 0.9249869705919189, + "grad_norm": 451.6004943847656, + "learning_rate": 8.57650360089477e-07, + "loss": 46.8609, + "step": 228950 + }, + { + "epoch": 0.9250273718572866, + "grad_norm": 602.2926635742188, + "learning_rate": 8.568415515003403e-07, + "loss": 44.2759, + "step": 228960 + }, + { + "epoch": 0.9250677731226542, + "grad_norm": 862.6986694335938, + "learning_rate": 8.560331161169322e-07, + "loss": 73.332, + "step": 228970 + }, + { + "epoch": 0.9251081743880218, + "grad_norm": 492.5233154296875, + "learning_rate": 8.552250539550111e-07, + "loss": 45.5124, + "step": 228980 + }, + { + "epoch": 0.9251485756533895, + "grad_norm": 702.1287841796875, + "learning_rate": 8.5441736503034e-07, + "loss": 62.8335, + "step": 228990 + }, + { + "epoch": 0.9251889769187571, + "grad_norm": 480.4644470214844, + "learning_rate": 8.536100493586552e-07, + "loss": 59.3177, + "step": 229000 + }, + { + "epoch": 0.9252293781841248, + "grad_norm": 504.3478698730469, + "learning_rate": 8.528031069556997e-07, + "loss": 44.1002, + "step": 229010 + }, + { + "epoch": 0.9252697794494924, + "grad_norm": 543.9301147460938, + "learning_rate": 8.519965378372097e-07, + "loss": 58.3339, + "step": 229020 + }, + { + "epoch": 0.92531018071486, + "grad_norm": 607.31591796875, + "learning_rate": 8.511903420188972e-07, + "loss": 45.5442, + "step": 229030 + }, + { + "epoch": 0.9253505819802276, + "grad_norm": 2684.189208984375, + "learning_rate": 8.503845195164939e-07, + "loss": 108.6344, + "step": 229040 + }, + { + "epoch": 0.9253909832455952, + "grad_norm": 467.5429992675781, + "learning_rate": 8.495790703457008e-07, + "loss": 58.1052, + "step": 229050 + }, + { + "epoch": 0.9254313845109629, + "grad_norm": 437.326904296875, + "learning_rate": 8.487739945222228e-07, + "loss": 76.4556, + "step": 229060 + }, + { + "epoch": 0.9254717857763305, + "grad_norm": 575.99072265625, + "learning_rate": 8.479692920617566e-07, + "loss": 73.4812, + "step": 229070 + }, + { + "epoch": 0.9255121870416981, + "grad_norm": 1009.6207275390625, + "learning_rate": 8.471649629799916e-07, + "loss": 78.7455, + "step": 229080 + }, + { + "epoch": 0.9255525883070658, + "grad_norm": 156.85250854492188, + "learning_rate": 8.463610072926021e-07, + "loss": 54.2053, + "step": 229090 + }, + { + "epoch": 0.9255929895724334, + "grad_norm": 442.9261169433594, + "learning_rate": 8.455574250152665e-07, + "loss": 53.0997, + "step": 229100 + }, + { + "epoch": 0.925633390837801, + "grad_norm": 660.5686645507812, + "learning_rate": 8.447542161636501e-07, + "loss": 70.3238, + "step": 229110 + }, + { + "epoch": 0.9256737921031687, + "grad_norm": 780.766845703125, + "learning_rate": 8.439513807534117e-07, + "loss": 72.3308, + "step": 229120 + }, + { + "epoch": 0.9257141933685363, + "grad_norm": 727.9300537109375, + "learning_rate": 8.431489188002073e-07, + "loss": 69.3834, + "step": 229130 + }, + { + "epoch": 0.925754594633904, + "grad_norm": 393.11761474609375, + "learning_rate": 8.423468303196713e-07, + "loss": 45.1736, + "step": 229140 + }, + { + "epoch": 0.9257949958992716, + "grad_norm": 462.6221008300781, + "learning_rate": 8.415451153274512e-07, + "loss": 57.6134, + "step": 229150 + }, + { + "epoch": 0.9258353971646393, + "grad_norm": 835.7976684570312, + "learning_rate": 8.407437738391722e-07, + "loss": 79.3175, + "step": 229160 + }, + { + "epoch": 0.9258757984300068, + "grad_norm": 688.6319580078125, + "learning_rate": 8.39942805870455e-07, + "loss": 76.3754, + "step": 229170 + }, + { + "epoch": 0.9259161996953744, + "grad_norm": 1290.7247314453125, + "learning_rate": 8.391422114369185e-07, + "loss": 96.1499, + "step": 229180 + }, + { + "epoch": 0.9259566009607421, + "grad_norm": 1032.16748046875, + "learning_rate": 8.383419905541679e-07, + "loss": 68.9474, + "step": 229190 + }, + { + "epoch": 0.9259970022261097, + "grad_norm": 408.2633972167969, + "learning_rate": 8.375421432378083e-07, + "loss": 71.1521, + "step": 229200 + }, + { + "epoch": 0.9260374034914773, + "grad_norm": 956.6557006835938, + "learning_rate": 8.367426695034275e-07, + "loss": 87.0406, + "step": 229210 + }, + { + "epoch": 0.926077804756845, + "grad_norm": 359.8739013671875, + "learning_rate": 8.359435693666151e-07, + "loss": 46.148, + "step": 229220 + }, + { + "epoch": 0.9261182060222126, + "grad_norm": 545.3629150390625, + "learning_rate": 8.351448428429498e-07, + "loss": 53.5108, + "step": 229230 + }, + { + "epoch": 0.9261586072875803, + "grad_norm": 776.9676513671875, + "learning_rate": 8.343464899480036e-07, + "loss": 61.7688, + "step": 229240 + }, + { + "epoch": 0.9261990085529479, + "grad_norm": 419.5853271484375, + "learning_rate": 8.335485106973352e-07, + "loss": 79.9766, + "step": 229250 + }, + { + "epoch": 0.9262394098183155, + "grad_norm": 600.081787109375, + "learning_rate": 8.327509051065097e-07, + "loss": 70.8776, + "step": 229260 + }, + { + "epoch": 0.9262798110836832, + "grad_norm": 534.948974609375, + "learning_rate": 8.319536731910727e-07, + "loss": 55.374, + "step": 229270 + }, + { + "epoch": 0.9263202123490508, + "grad_norm": 644.2175903320312, + "learning_rate": 8.311568149665672e-07, + "loss": 53.2522, + "step": 229280 + }, + { + "epoch": 0.9263606136144183, + "grad_norm": 390.21197509765625, + "learning_rate": 8.303603304485297e-07, + "loss": 58.8664, + "step": 229290 + }, + { + "epoch": 0.926401014879786, + "grad_norm": 974.7003784179688, + "learning_rate": 8.295642196524811e-07, + "loss": 85.6298, + "step": 229300 + }, + { + "epoch": 0.9264414161451536, + "grad_norm": 1075.0064697265625, + "learning_rate": 8.287684825939535e-07, + "loss": 64.4078, + "step": 229310 + }, + { + "epoch": 0.9264818174105213, + "grad_norm": 389.08221435546875, + "learning_rate": 8.279731192884522e-07, + "loss": 60.161, + "step": 229320 + }, + { + "epoch": 0.9265222186758889, + "grad_norm": 1258.2401123046875, + "learning_rate": 8.27178129751487e-07, + "loss": 63.4347, + "step": 229330 + }, + { + "epoch": 0.9265626199412565, + "grad_norm": 757.9690551757812, + "learning_rate": 8.263835139985521e-07, + "loss": 68.4738, + "step": 229340 + }, + { + "epoch": 0.9266030212066242, + "grad_norm": 671.93505859375, + "learning_rate": 8.255892720451441e-07, + "loss": 49.8394, + "step": 229350 + }, + { + "epoch": 0.9266434224719918, + "grad_norm": 639.8349609375, + "learning_rate": 8.247954039067463e-07, + "loss": 58.3296, + "step": 229360 + }, + { + "epoch": 0.9266838237373595, + "grad_norm": 470.7223815917969, + "learning_rate": 8.240019095988305e-07, + "loss": 50.5051, + "step": 229370 + }, + { + "epoch": 0.9267242250027271, + "grad_norm": 542.8892822265625, + "learning_rate": 8.232087891368712e-07, + "loss": 48.988, + "step": 229380 + }, + { + "epoch": 0.9267646262680947, + "grad_norm": 322.4114074707031, + "learning_rate": 8.224160425363292e-07, + "loss": 52.7342, + "step": 229390 + }, + { + "epoch": 0.9268050275334624, + "grad_norm": 418.1921691894531, + "learning_rate": 8.216236698126612e-07, + "loss": 58.1127, + "step": 229400 + }, + { + "epoch": 0.92684542879883, + "grad_norm": 520.8328857421875, + "learning_rate": 8.208316709813103e-07, + "loss": 72.0608, + "step": 229410 + }, + { + "epoch": 0.9268858300641976, + "grad_norm": 664.4025268554688, + "learning_rate": 8.200400460577262e-07, + "loss": 55.2416, + "step": 229420 + }, + { + "epoch": 0.9269262313295652, + "grad_norm": 1304.09619140625, + "learning_rate": 8.192487950573303e-07, + "loss": 66.6884, + "step": 229430 + }, + { + "epoch": 0.9269666325949328, + "grad_norm": 1162.4884033203125, + "learning_rate": 8.184579179955565e-07, + "loss": 72.2756, + "step": 229440 + }, + { + "epoch": 0.9270070338603005, + "grad_norm": 608.3755493164062, + "learning_rate": 8.176674148878195e-07, + "loss": 64.7428, + "step": 229450 + }, + { + "epoch": 0.9270474351256681, + "grad_norm": 470.55865478515625, + "learning_rate": 8.16877285749531e-07, + "loss": 70.39, + "step": 229460 + }, + { + "epoch": 0.9270878363910358, + "grad_norm": 650.9403076171875, + "learning_rate": 8.160875305961013e-07, + "loss": 70.3384, + "step": 229470 + }, + { + "epoch": 0.9271282376564034, + "grad_norm": 1190.7047119140625, + "learning_rate": 8.152981494429135e-07, + "loss": 83.5183, + "step": 229480 + }, + { + "epoch": 0.927168638921771, + "grad_norm": 1028.68701171875, + "learning_rate": 8.145091423053731e-07, + "loss": 79.5169, + "step": 229490 + }, + { + "epoch": 0.9272090401871387, + "grad_norm": 693.318115234375, + "learning_rate": 8.137205091988498e-07, + "loss": 39.8052, + "step": 229500 + }, + { + "epoch": 0.9272494414525063, + "grad_norm": 473.4018249511719, + "learning_rate": 8.129322501387249e-07, + "loss": 41.4523, + "step": 229510 + }, + { + "epoch": 0.927289842717874, + "grad_norm": 706.7362670898438, + "learning_rate": 8.121443651403615e-07, + "loss": 62.172, + "step": 229520 + }, + { + "epoch": 0.9273302439832416, + "grad_norm": 873.29833984375, + "learning_rate": 8.113568542191275e-07, + "loss": 60.5968, + "step": 229530 + }, + { + "epoch": 0.9273706452486092, + "grad_norm": 1127.896728515625, + "learning_rate": 8.105697173903659e-07, + "loss": 67.4148, + "step": 229540 + }, + { + "epoch": 0.9274110465139768, + "grad_norm": 328.8316650390625, + "learning_rate": 8.097829546694269e-07, + "loss": 65.0174, + "step": 229550 + }, + { + "epoch": 0.9274514477793444, + "grad_norm": 565.306640625, + "learning_rate": 8.089965660716514e-07, + "loss": 55.0119, + "step": 229560 + }, + { + "epoch": 0.927491849044712, + "grad_norm": 648.7337036132812, + "learning_rate": 8.08210551612365e-07, + "loss": 60.0441, + "step": 229570 + }, + { + "epoch": 0.9275322503100797, + "grad_norm": 633.1400146484375, + "learning_rate": 8.074249113068999e-07, + "loss": 74.7509, + "step": 229580 + }, + { + "epoch": 0.9275726515754473, + "grad_norm": 672.4359741210938, + "learning_rate": 8.066396451705594e-07, + "loss": 55.0739, + "step": 229590 + }, + { + "epoch": 0.927613052840815, + "grad_norm": 731.5086059570312, + "learning_rate": 8.058547532186667e-07, + "loss": 87.6489, + "step": 229600 + }, + { + "epoch": 0.9276534541061826, + "grad_norm": 452.5327453613281, + "learning_rate": 8.050702354665163e-07, + "loss": 52.1309, + "step": 229610 + }, + { + "epoch": 0.9276938553715502, + "grad_norm": 956.0733642578125, + "learning_rate": 8.042860919294027e-07, + "loss": 59.8707, + "step": 229620 + }, + { + "epoch": 0.9277342566369179, + "grad_norm": 974.9046020507812, + "learning_rate": 8.035023226226158e-07, + "loss": 92.933, + "step": 229630 + }, + { + "epoch": 0.9277746579022855, + "grad_norm": 499.1571350097656, + "learning_rate": 8.0271892756143e-07, + "loss": 53.1804, + "step": 229640 + }, + { + "epoch": 0.9278150591676532, + "grad_norm": 485.5625915527344, + "learning_rate": 8.019359067611266e-07, + "loss": 66.7756, + "step": 229650 + }, + { + "epoch": 0.9278554604330208, + "grad_norm": 628.2437744140625, + "learning_rate": 8.011532602369665e-07, + "loss": 58.3425, + "step": 229660 + }, + { + "epoch": 0.9278958616983884, + "grad_norm": 679.14990234375, + "learning_rate": 8.003709880042044e-07, + "loss": 47.8591, + "step": 229670 + }, + { + "epoch": 0.927936262963756, + "grad_norm": 649.2263793945312, + "learning_rate": 7.995890900780967e-07, + "loss": 65.3803, + "step": 229680 + }, + { + "epoch": 0.9279766642291236, + "grad_norm": 622.6150512695312, + "learning_rate": 7.98807566473887e-07, + "loss": 73.6081, + "step": 229690 + }, + { + "epoch": 0.9280170654944913, + "grad_norm": 519.5260009765625, + "learning_rate": 7.980264172068052e-07, + "loss": 100.0518, + "step": 229700 + }, + { + "epoch": 0.9280574667598589, + "grad_norm": 760.5896606445312, + "learning_rate": 7.97245642292086e-07, + "loss": 54.7106, + "step": 229710 + }, + { + "epoch": 0.9280978680252265, + "grad_norm": 1011.8035888671875, + "learning_rate": 7.964652417449481e-07, + "loss": 67.5511, + "step": 229720 + }, + { + "epoch": 0.9281382692905942, + "grad_norm": 592.2274780273438, + "learning_rate": 7.956852155806083e-07, + "loss": 53.2257, + "step": 229730 + }, + { + "epoch": 0.9281786705559618, + "grad_norm": 1325.98583984375, + "learning_rate": 7.949055638142744e-07, + "loss": 58.762, + "step": 229740 + }, + { + "epoch": 0.9282190718213295, + "grad_norm": 345.8152770996094, + "learning_rate": 7.941262864611387e-07, + "loss": 64.8477, + "step": 229750 + }, + { + "epoch": 0.9282594730866971, + "grad_norm": 928.7581176757812, + "learning_rate": 7.933473835364025e-07, + "loss": 66.4321, + "step": 229760 + }, + { + "epoch": 0.9282998743520647, + "grad_norm": 1063.65234375, + "learning_rate": 7.925688550552468e-07, + "loss": 84.4091, + "step": 229770 + }, + { + "epoch": 0.9283402756174324, + "grad_norm": 597.1815185546875, + "learning_rate": 7.917907010328485e-07, + "loss": 61.7598, + "step": 229780 + }, + { + "epoch": 0.9283806768828, + "grad_norm": 915.6011962890625, + "learning_rate": 7.91012921484382e-07, + "loss": 49.6887, + "step": 229790 + }, + { + "epoch": 0.9284210781481677, + "grad_norm": 319.69091796875, + "learning_rate": 7.902355164250041e-07, + "loss": 77.2658, + "step": 229800 + }, + { + "epoch": 0.9284614794135352, + "grad_norm": 529.29541015625, + "learning_rate": 7.894584858698806e-07, + "loss": 56.7942, + "step": 229810 + }, + { + "epoch": 0.9285018806789028, + "grad_norm": 556.9268798828125, + "learning_rate": 7.886818298341503e-07, + "loss": 56.6572, + "step": 229820 + }, + { + "epoch": 0.9285422819442705, + "grad_norm": 852.9275512695312, + "learning_rate": 7.879055483329567e-07, + "loss": 80.518, + "step": 229830 + }, + { + "epoch": 0.9285826832096381, + "grad_norm": 468.233154296875, + "learning_rate": 7.871296413814388e-07, + "loss": 102.8293, + "step": 229840 + }, + { + "epoch": 0.9286230844750057, + "grad_norm": 809.6490478515625, + "learning_rate": 7.863541089947202e-07, + "loss": 75.3238, + "step": 229850 + }, + { + "epoch": 0.9286634857403734, + "grad_norm": 633.1370849609375, + "learning_rate": 7.855789511879152e-07, + "loss": 64.7446, + "step": 229860 + }, + { + "epoch": 0.928703887005741, + "grad_norm": 365.8032531738281, + "learning_rate": 7.848041679761475e-07, + "loss": 63.5789, + "step": 229870 + }, + { + "epoch": 0.9287442882711087, + "grad_norm": 539.9057006835938, + "learning_rate": 7.840297593745117e-07, + "loss": 50.0288, + "step": 229880 + }, + { + "epoch": 0.9287846895364763, + "grad_norm": 503.654541015625, + "learning_rate": 7.832557253981088e-07, + "loss": 43.8816, + "step": 229890 + }, + { + "epoch": 0.9288250908018439, + "grad_norm": 396.4350280761719, + "learning_rate": 7.824820660620314e-07, + "loss": 72.0587, + "step": 229900 + }, + { + "epoch": 0.9288654920672116, + "grad_norm": 587.5791625976562, + "learning_rate": 7.81708781381354e-07, + "loss": 65.4651, + "step": 229910 + }, + { + "epoch": 0.9289058933325792, + "grad_norm": 1403.5535888671875, + "learning_rate": 7.809358713711646e-07, + "loss": 94.4933, + "step": 229920 + }, + { + "epoch": 0.9289462945979468, + "grad_norm": 741.0574340820312, + "learning_rate": 7.801633360465199e-07, + "loss": 56.6982, + "step": 229930 + }, + { + "epoch": 0.9289866958633144, + "grad_norm": 575.1888427734375, + "learning_rate": 7.793911754224904e-07, + "loss": 44.927, + "step": 229940 + }, + { + "epoch": 0.929027097128682, + "grad_norm": 544.247802734375, + "learning_rate": 7.786193895141237e-07, + "loss": 91.702, + "step": 229950 + }, + { + "epoch": 0.9290674983940497, + "grad_norm": 804.1387939453125, + "learning_rate": 7.778479783364656e-07, + "loss": 40.562, + "step": 229960 + }, + { + "epoch": 0.9291078996594173, + "grad_norm": 772.8467407226562, + "learning_rate": 7.770769419045621e-07, + "loss": 57.7622, + "step": 229970 + }, + { + "epoch": 0.929148300924785, + "grad_norm": 842.8298950195312, + "learning_rate": 7.763062802334387e-07, + "loss": 74.1833, + "step": 229980 + }, + { + "epoch": 0.9291887021901526, + "grad_norm": 634.4865112304688, + "learning_rate": 7.75535993338119e-07, + "loss": 54.3455, + "step": 229990 + }, + { + "epoch": 0.9292291034555202, + "grad_norm": 428.2165222167969, + "learning_rate": 7.747660812336221e-07, + "loss": 57.9572, + "step": 230000 + }, + { + "epoch": 0.9292695047208879, + "grad_norm": 733.5465087890625, + "learning_rate": 7.739965439349606e-07, + "loss": 62.5643, + "step": 230010 + }, + { + "epoch": 0.9293099059862555, + "grad_norm": 950.0711669921875, + "learning_rate": 7.732273814571334e-07, + "loss": 53.0478, + "step": 230020 + }, + { + "epoch": 0.9293503072516232, + "grad_norm": 740.8470458984375, + "learning_rate": 7.724585938151397e-07, + "loss": 59.4953, + "step": 230030 + }, + { + "epoch": 0.9293907085169908, + "grad_norm": 711.0886840820312, + "learning_rate": 7.71690181023963e-07, + "loss": 56.8726, + "step": 230040 + }, + { + "epoch": 0.9294311097823584, + "grad_norm": 943.4730834960938, + "learning_rate": 7.709221430985848e-07, + "loss": 70.7828, + "step": 230050 + }, + { + "epoch": 0.929471511047726, + "grad_norm": 999.2098388671875, + "learning_rate": 7.701544800539817e-07, + "loss": 40.7489, + "step": 230060 + }, + { + "epoch": 0.9295119123130936, + "grad_norm": 474.64886474609375, + "learning_rate": 7.693871919051133e-07, + "loss": 48.7094, + "step": 230070 + }, + { + "epoch": 0.9295523135784612, + "grad_norm": 948.08056640625, + "learning_rate": 7.68620278666945e-07, + "loss": 76.1984, + "step": 230080 + }, + { + "epoch": 0.9295927148438289, + "grad_norm": 891.1578369140625, + "learning_rate": 7.678537403544206e-07, + "loss": 68.1797, + "step": 230090 + }, + { + "epoch": 0.9296331161091965, + "grad_norm": 759.309326171875, + "learning_rate": 7.670875769824948e-07, + "loss": 64.3742, + "step": 230100 + }, + { + "epoch": 0.9296735173745642, + "grad_norm": 605.8822021484375, + "learning_rate": 7.663217885660979e-07, + "loss": 62.2212, + "step": 230110 + }, + { + "epoch": 0.9297139186399318, + "grad_norm": 670.1693115234375, + "learning_rate": 7.655563751201578e-07, + "loss": 73.6362, + "step": 230120 + }, + { + "epoch": 0.9297543199052994, + "grad_norm": 835.7003784179688, + "learning_rate": 7.647913366596005e-07, + "loss": 87.3958, + "step": 230130 + }, + { + "epoch": 0.9297947211706671, + "grad_norm": 454.6091613769531, + "learning_rate": 7.640266731993384e-07, + "loss": 68.9505, + "step": 230140 + }, + { + "epoch": 0.9298351224360347, + "grad_norm": 435.1728210449219, + "learning_rate": 7.632623847542775e-07, + "loss": 88.9217, + "step": 230150 + }, + { + "epoch": 0.9298755237014024, + "grad_norm": 898.0064697265625, + "learning_rate": 7.624984713393214e-07, + "loss": 64.2094, + "step": 230160 + }, + { + "epoch": 0.92991592496677, + "grad_norm": 2255.42578125, + "learning_rate": 7.617349329693624e-07, + "loss": 96.4544, + "step": 230170 + }, + { + "epoch": 0.9299563262321376, + "grad_norm": 484.9825744628906, + "learning_rate": 7.609717696592845e-07, + "loss": 76.7607, + "step": 230180 + }, + { + "epoch": 0.9299967274975052, + "grad_norm": 711.434326171875, + "learning_rate": 7.602089814239688e-07, + "loss": 65.4605, + "step": 230190 + }, + { + "epoch": 0.9300371287628728, + "grad_norm": 373.5561218261719, + "learning_rate": 7.594465682782815e-07, + "loss": 82.7867, + "step": 230200 + }, + { + "epoch": 0.9300775300282405, + "grad_norm": 607.8157348632812, + "learning_rate": 7.586845302370926e-07, + "loss": 70.3225, + "step": 230210 + }, + { + "epoch": 0.9301179312936081, + "grad_norm": 620.6288452148438, + "learning_rate": 7.579228673152528e-07, + "loss": 43.9605, + "step": 230220 + }, + { + "epoch": 0.9301583325589757, + "grad_norm": 423.108154296875, + "learning_rate": 7.571615795276122e-07, + "loss": 64.7619, + "step": 230230 + }, + { + "epoch": 0.9301987338243434, + "grad_norm": 497.7677917480469, + "learning_rate": 7.564006668890167e-07, + "loss": 109.9139, + "step": 230240 + }, + { + "epoch": 0.930239135089711, + "grad_norm": 257.3434143066406, + "learning_rate": 7.556401294142923e-07, + "loss": 61.4518, + "step": 230250 + }, + { + "epoch": 0.9302795363550787, + "grad_norm": 592.4563598632812, + "learning_rate": 7.54879967118276e-07, + "loss": 116.2042, + "step": 230260 + }, + { + "epoch": 0.9303199376204463, + "grad_norm": 586.4590454101562, + "learning_rate": 7.541201800157805e-07, + "loss": 52.4232, + "step": 230270 + }, + { + "epoch": 0.9303603388858139, + "grad_norm": 245.4807891845703, + "learning_rate": 7.533607681216204e-07, + "loss": 37.6341, + "step": 230280 + }, + { + "epoch": 0.9304007401511816, + "grad_norm": 751.5016479492188, + "learning_rate": 7.526017314506018e-07, + "loss": 76.7587, + "step": 230290 + }, + { + "epoch": 0.9304411414165492, + "grad_norm": 931.086181640625, + "learning_rate": 7.518430700175238e-07, + "loss": 63.1316, + "step": 230300 + }, + { + "epoch": 0.9304815426819169, + "grad_norm": 1147.1370849609375, + "learning_rate": 7.510847838371726e-07, + "loss": 67.3305, + "step": 230310 + }, + { + "epoch": 0.9305219439472844, + "grad_norm": 783.20068359375, + "learning_rate": 7.503268729243318e-07, + "loss": 54.0194, + "step": 230320 + }, + { + "epoch": 0.930562345212652, + "grad_norm": 706.6619873046875, + "learning_rate": 7.495693372937807e-07, + "loss": 74.6418, + "step": 230330 + }, + { + "epoch": 0.9306027464780197, + "grad_norm": 879.536865234375, + "learning_rate": 7.488121769602852e-07, + "loss": 82.6009, + "step": 230340 + }, + { + "epoch": 0.9306431477433873, + "grad_norm": 1477.2313232421875, + "learning_rate": 7.480553919386113e-07, + "loss": 85.7123, + "step": 230350 + }, + { + "epoch": 0.9306835490087549, + "grad_norm": 388.9001770019531, + "learning_rate": 7.472989822435028e-07, + "loss": 55.3922, + "step": 230360 + }, + { + "epoch": 0.9307239502741226, + "grad_norm": 876.6964111328125, + "learning_rate": 7.465429478897191e-07, + "loss": 57.7453, + "step": 230370 + }, + { + "epoch": 0.9307643515394902, + "grad_norm": 1151.5103759765625, + "learning_rate": 7.457872888919881e-07, + "loss": 80.1198, + "step": 230380 + }, + { + "epoch": 0.9308047528048579, + "grad_norm": 1477.904541015625, + "learning_rate": 7.450320052650494e-07, + "loss": 106.3551, + "step": 230390 + }, + { + "epoch": 0.9308451540702255, + "grad_norm": 685.5193481445312, + "learning_rate": 7.442770970236246e-07, + "loss": 47.3922, + "step": 230400 + }, + { + "epoch": 0.9308855553355931, + "grad_norm": 867.4537353515625, + "learning_rate": 7.435225641824284e-07, + "loss": 48.7756, + "step": 230410 + }, + { + "epoch": 0.9309259566009608, + "grad_norm": 612.0108032226562, + "learning_rate": 7.42768406756178e-07, + "loss": 57.8277, + "step": 230420 + }, + { + "epoch": 0.9309663578663284, + "grad_norm": 850.3787841796875, + "learning_rate": 7.420146247595683e-07, + "loss": 56.2179, + "step": 230430 + }, + { + "epoch": 0.9310067591316961, + "grad_norm": 676.94580078125, + "learning_rate": 7.412612182073009e-07, + "loss": 59.1952, + "step": 230440 + }, + { + "epoch": 0.9310471603970636, + "grad_norm": 482.73968505859375, + "learning_rate": 7.405081871140595e-07, + "loss": 32.9996, + "step": 230450 + }, + { + "epoch": 0.9310875616624312, + "grad_norm": 634.5457763671875, + "learning_rate": 7.397555314945259e-07, + "loss": 47.6102, + "step": 230460 + }, + { + "epoch": 0.9311279629277989, + "grad_norm": 523.4039916992188, + "learning_rate": 7.390032513633771e-07, + "loss": 71.5205, + "step": 230470 + }, + { + "epoch": 0.9311683641931665, + "grad_norm": 1763.243408203125, + "learning_rate": 7.382513467352726e-07, + "loss": 61.3038, + "step": 230480 + }, + { + "epoch": 0.9312087654585341, + "grad_norm": 681.1908569335938, + "learning_rate": 7.37499817624876e-07, + "loss": 81.3037, + "step": 230490 + }, + { + "epoch": 0.9312491667239018, + "grad_norm": 956.3349609375, + "learning_rate": 7.36748664046838e-07, + "loss": 42.7449, + "step": 230500 + }, + { + "epoch": 0.9312895679892694, + "grad_norm": 524.75146484375, + "learning_rate": 7.359978860158023e-07, + "loss": 69.8002, + "step": 230510 + }, + { + "epoch": 0.9313299692546371, + "grad_norm": 519.6278686523438, + "learning_rate": 7.352474835464018e-07, + "loss": 65.4647, + "step": 230520 + }, + { + "epoch": 0.9313703705200047, + "grad_norm": 376.7368469238281, + "learning_rate": 7.344974566532759e-07, + "loss": 61.5749, + "step": 230530 + }, + { + "epoch": 0.9314107717853723, + "grad_norm": 757.0547485351562, + "learning_rate": 7.33747805351035e-07, + "loss": 53.9601, + "step": 230540 + }, + { + "epoch": 0.93145117305074, + "grad_norm": 470.8480529785156, + "learning_rate": 7.329985296543052e-07, + "loss": 54.3688, + "step": 230550 + }, + { + "epoch": 0.9314915743161076, + "grad_norm": 1268.7437744140625, + "learning_rate": 7.322496295776838e-07, + "loss": 81.3716, + "step": 230560 + }, + { + "epoch": 0.9315319755814752, + "grad_norm": 262.56787109375, + "learning_rate": 7.315011051357767e-07, + "loss": 53.7753, + "step": 230570 + }, + { + "epoch": 0.9315723768468428, + "grad_norm": 561.1670532226562, + "learning_rate": 7.307529563431792e-07, + "loss": 49.7347, + "step": 230580 + }, + { + "epoch": 0.9316127781122104, + "grad_norm": 805.1236572265625, + "learning_rate": 7.300051832144684e-07, + "loss": 91.4116, + "step": 230590 + }, + { + "epoch": 0.9316531793775781, + "grad_norm": 529.5875244140625, + "learning_rate": 7.292577857642302e-07, + "loss": 60.1163, + "step": 230600 + }, + { + "epoch": 0.9316935806429457, + "grad_norm": 713.3728637695312, + "learning_rate": 7.28510764007031e-07, + "loss": 77.0996, + "step": 230610 + }, + { + "epoch": 0.9317339819083134, + "grad_norm": 726.7280883789062, + "learning_rate": 7.277641179574368e-07, + "loss": 85.2771, + "step": 230620 + }, + { + "epoch": 0.931774383173681, + "grad_norm": 555.1173706054688, + "learning_rate": 7.270178476300027e-07, + "loss": 75.1859, + "step": 230630 + }, + { + "epoch": 0.9318147844390486, + "grad_norm": 624.400634765625, + "learning_rate": 7.262719530392814e-07, + "loss": 44.4513, + "step": 230640 + }, + { + "epoch": 0.9318551857044163, + "grad_norm": 804.9248046875, + "learning_rate": 7.255264341998058e-07, + "loss": 76.0521, + "step": 230650 + }, + { + "epoch": 0.9318955869697839, + "grad_norm": 513.8983154296875, + "learning_rate": 7.247812911261199e-07, + "loss": 78.1206, + "step": 230660 + }, + { + "epoch": 0.9319359882351516, + "grad_norm": 991.1480712890625, + "learning_rate": 7.240365238327451e-07, + "loss": 62.8402, + "step": 230670 + }, + { + "epoch": 0.9319763895005192, + "grad_norm": 1143.223876953125, + "learning_rate": 7.23292132334199e-07, + "loss": 62.9539, + "step": 230680 + }, + { + "epoch": 0.9320167907658868, + "grad_norm": 440.544189453125, + "learning_rate": 7.22548116645001e-07, + "loss": 48.8203, + "step": 230690 + }, + { + "epoch": 0.9320571920312544, + "grad_norm": 807.284423828125, + "learning_rate": 7.218044767796484e-07, + "loss": 80.1345, + "step": 230700 + }, + { + "epoch": 0.932097593296622, + "grad_norm": 788.62646484375, + "learning_rate": 7.210612127526473e-07, + "loss": 73.4582, + "step": 230710 + }, + { + "epoch": 0.9321379945619896, + "grad_norm": 340.9614562988281, + "learning_rate": 7.203183245784817e-07, + "loss": 46.7992, + "step": 230720 + }, + { + "epoch": 0.9321783958273573, + "grad_norm": 744.9443969726562, + "learning_rate": 7.195758122716334e-07, + "loss": 118.2582, + "step": 230730 + }, + { + "epoch": 0.9322187970927249, + "grad_norm": 706.22314453125, + "learning_rate": 7.188336758465863e-07, + "loss": 86.0973, + "step": 230740 + }, + { + "epoch": 0.9322591983580926, + "grad_norm": 662.9901123046875, + "learning_rate": 7.180919153178e-07, + "loss": 106.4228, + "step": 230750 + }, + { + "epoch": 0.9322995996234602, + "grad_norm": 323.66168212890625, + "learning_rate": 7.173505306997386e-07, + "loss": 52.3298, + "step": 230760 + }, + { + "epoch": 0.9323400008888278, + "grad_norm": 687.504150390625, + "learning_rate": 7.166095220068547e-07, + "loss": 64.6181, + "step": 230770 + }, + { + "epoch": 0.9323804021541955, + "grad_norm": 914.6375122070312, + "learning_rate": 7.15868889253597e-07, + "loss": 82.6416, + "step": 230780 + }, + { + "epoch": 0.9324208034195631, + "grad_norm": 838.144287109375, + "learning_rate": 7.151286324544027e-07, + "loss": 58.2898, + "step": 230790 + }, + { + "epoch": 0.9324612046849308, + "grad_norm": 274.6681823730469, + "learning_rate": 7.143887516237092e-07, + "loss": 48.8652, + "step": 230800 + }, + { + "epoch": 0.9325016059502984, + "grad_norm": 361.8494567871094, + "learning_rate": 7.136492467759315e-07, + "loss": 55.5397, + "step": 230810 + }, + { + "epoch": 0.932542007215666, + "grad_norm": 643.9947509765625, + "learning_rate": 7.129101179254915e-07, + "loss": 51.3844, + "step": 230820 + }, + { + "epoch": 0.9325824084810336, + "grad_norm": 429.9317932128906, + "learning_rate": 7.121713650867978e-07, + "loss": 62.0041, + "step": 230830 + }, + { + "epoch": 0.9326228097464012, + "grad_norm": 790.6885986328125, + "learning_rate": 7.114329882742521e-07, + "loss": 74.602, + "step": 230840 + }, + { + "epoch": 0.9326632110117689, + "grad_norm": 1181.88818359375, + "learning_rate": 7.106949875022562e-07, + "loss": 107.0887, + "step": 230850 + }, + { + "epoch": 0.9327036122771365, + "grad_norm": 801.3294067382812, + "learning_rate": 7.099573627851852e-07, + "loss": 80.3811, + "step": 230860 + }, + { + "epoch": 0.9327440135425041, + "grad_norm": 799.3873291015625, + "learning_rate": 7.092201141374322e-07, + "loss": 46.4265, + "step": 230870 + }, + { + "epoch": 0.9327844148078718, + "grad_norm": 184.85781860351562, + "learning_rate": 7.084832415733633e-07, + "loss": 39.779, + "step": 230880 + }, + { + "epoch": 0.9328248160732394, + "grad_norm": 534.5520629882812, + "learning_rate": 7.07746745107345e-07, + "loss": 38.7508, + "step": 230890 + }, + { + "epoch": 0.9328652173386071, + "grad_norm": 519.8212280273438, + "learning_rate": 7.070106247537345e-07, + "loss": 44.7578, + "step": 230900 + }, + { + "epoch": 0.9329056186039747, + "grad_norm": 797.7544555664062, + "learning_rate": 7.062748805268871e-07, + "loss": 70.7074, + "step": 230910 + }, + { + "epoch": 0.9329460198693423, + "grad_norm": 762.2279052734375, + "learning_rate": 7.055395124411446e-07, + "loss": 95.0273, + "step": 230920 + }, + { + "epoch": 0.93298642113471, + "grad_norm": 1471.9866943359375, + "learning_rate": 7.048045205108401e-07, + "loss": 72.9148, + "step": 230930 + }, + { + "epoch": 0.9330268224000776, + "grad_norm": 499.7298278808594, + "learning_rate": 7.040699047503064e-07, + "loss": 57.9517, + "step": 230940 + }, + { + "epoch": 0.9330672236654453, + "grad_norm": 928.5980224609375, + "learning_rate": 7.033356651738632e-07, + "loss": 71.3346, + "step": 230950 + }, + { + "epoch": 0.9331076249308128, + "grad_norm": 1196.57275390625, + "learning_rate": 7.026018017958303e-07, + "loss": 79.5478, + "step": 230960 + }, + { + "epoch": 0.9331480261961804, + "grad_norm": 576.0173950195312, + "learning_rate": 7.018683146305028e-07, + "loss": 49.7893, + "step": 230970 + }, + { + "epoch": 0.9331884274615481, + "grad_norm": 807.575439453125, + "learning_rate": 7.011352036921937e-07, + "loss": 52.8547, + "step": 230980 + }, + { + "epoch": 0.9332288287269157, + "grad_norm": 777.4636840820312, + "learning_rate": 7.004024689951871e-07, + "loss": 80.9184, + "step": 230990 + }, + { + "epoch": 0.9332692299922833, + "grad_norm": 623.2559204101562, + "learning_rate": 6.996701105537718e-07, + "loss": 59.6594, + "step": 231000 + }, + { + "epoch": 0.933309631257651, + "grad_norm": 297.4967041015625, + "learning_rate": 6.98938128382225e-07, + "loss": 48.3383, + "step": 231010 + }, + { + "epoch": 0.9333500325230186, + "grad_norm": 656.0429077148438, + "learning_rate": 6.982065224948109e-07, + "loss": 40.0686, + "step": 231020 + }, + { + "epoch": 0.9333904337883863, + "grad_norm": 290.918212890625, + "learning_rate": 6.974752929058048e-07, + "loss": 53.3443, + "step": 231030 + }, + { + "epoch": 0.9334308350537539, + "grad_norm": 524.0316772460938, + "learning_rate": 6.967444396294509e-07, + "loss": 49.8017, + "step": 231040 + }, + { + "epoch": 0.9334712363191215, + "grad_norm": 687.8283081054688, + "learning_rate": 6.960139626800044e-07, + "loss": 75.5844, + "step": 231050 + }, + { + "epoch": 0.9335116375844892, + "grad_norm": 2209.503662109375, + "learning_rate": 6.952838620717029e-07, + "loss": 66.8198, + "step": 231060 + }, + { + "epoch": 0.9335520388498568, + "grad_norm": 681.1060180664062, + "learning_rate": 6.945541378187793e-07, + "loss": 62.3607, + "step": 231070 + }, + { + "epoch": 0.9335924401152244, + "grad_norm": 960.9407348632812, + "learning_rate": 6.938247899354666e-07, + "loss": 63.7145, + "step": 231080 + }, + { + "epoch": 0.933632841380592, + "grad_norm": 1092.3902587890625, + "learning_rate": 6.93095818435976e-07, + "loss": 81.3989, + "step": 231090 + }, + { + "epoch": 0.9336732426459596, + "grad_norm": 531.9747924804688, + "learning_rate": 6.923672233345225e-07, + "loss": 68.8976, + "step": 231100 + }, + { + "epoch": 0.9337136439113273, + "grad_norm": 639.6200561523438, + "learning_rate": 6.916390046453103e-07, + "loss": 67.0148, + "step": 231110 + }, + { + "epoch": 0.9337540451766949, + "grad_norm": 351.0632629394531, + "learning_rate": 6.909111623825393e-07, + "loss": 65.0348, + "step": 231120 + }, + { + "epoch": 0.9337944464420626, + "grad_norm": 338.8407897949219, + "learning_rate": 6.901836965603914e-07, + "loss": 65.4407, + "step": 231130 + }, + { + "epoch": 0.9338348477074302, + "grad_norm": 947.0685424804688, + "learning_rate": 6.894566071930575e-07, + "loss": 62.6744, + "step": 231140 + }, + { + "epoch": 0.9338752489727978, + "grad_norm": 541.746337890625, + "learning_rate": 6.887298942947063e-07, + "loss": 71.0997, + "step": 231150 + }, + { + "epoch": 0.9339156502381655, + "grad_norm": 834.7459106445312, + "learning_rate": 6.880035578795085e-07, + "loss": 63.1322, + "step": 231160 + }, + { + "epoch": 0.9339560515035331, + "grad_norm": 588.7003173828125, + "learning_rate": 6.872775979616242e-07, + "loss": 63.551, + "step": 231170 + }, + { + "epoch": 0.9339964527689008, + "grad_norm": 628.9923706054688, + "learning_rate": 6.865520145552041e-07, + "loss": 69.25, + "step": 231180 + }, + { + "epoch": 0.9340368540342684, + "grad_norm": 735.1594848632812, + "learning_rate": 6.858268076743991e-07, + "loss": 69.8038, + "step": 231190 + }, + { + "epoch": 0.934077255299636, + "grad_norm": 1622.4154052734375, + "learning_rate": 6.851019773333423e-07, + "loss": 71.2018, + "step": 231200 + }, + { + "epoch": 0.9341176565650036, + "grad_norm": 615.1414794921875, + "learning_rate": 6.843775235461669e-07, + "loss": 51.8232, + "step": 231210 + }, + { + "epoch": 0.9341580578303712, + "grad_norm": 861.1585693359375, + "learning_rate": 6.836534463269951e-07, + "loss": 72.2473, + "step": 231220 + }, + { + "epoch": 0.9341984590957388, + "grad_norm": 625.6737670898438, + "learning_rate": 6.829297456899442e-07, + "loss": 36.4905, + "step": 231230 + }, + { + "epoch": 0.9342388603611065, + "grad_norm": 305.00732421875, + "learning_rate": 6.822064216491231e-07, + "loss": 61.0199, + "step": 231240 + }, + { + "epoch": 0.9342792616264741, + "grad_norm": 649.8922729492188, + "learning_rate": 6.814834742186361e-07, + "loss": 87.4988, + "step": 231250 + }, + { + "epoch": 0.9343196628918418, + "grad_norm": 1414.251953125, + "learning_rate": 6.807609034125717e-07, + "loss": 81.2259, + "step": 231260 + }, + { + "epoch": 0.9343600641572094, + "grad_norm": 783.3069458007812, + "learning_rate": 6.800387092450189e-07, + "loss": 87.9148, + "step": 231270 + }, + { + "epoch": 0.934400465422577, + "grad_norm": 567.1847534179688, + "learning_rate": 6.793168917300619e-07, + "loss": 74.0091, + "step": 231280 + }, + { + "epoch": 0.9344408666879447, + "grad_norm": 1298.284912109375, + "learning_rate": 6.785954508817627e-07, + "loss": 84.7164, + "step": 231290 + }, + { + "epoch": 0.9344812679533123, + "grad_norm": 569.042724609375, + "learning_rate": 6.778743867141991e-07, + "loss": 69.1159, + "step": 231300 + }, + { + "epoch": 0.93452166921868, + "grad_norm": 523.5701904296875, + "learning_rate": 6.771536992414174e-07, + "loss": 32.9348, + "step": 231310 + }, + { + "epoch": 0.9345620704840476, + "grad_norm": 524.421875, + "learning_rate": 6.764333884774754e-07, + "loss": 66.5881, + "step": 231320 + }, + { + "epoch": 0.9346024717494152, + "grad_norm": 826.166015625, + "learning_rate": 6.757134544364108e-07, + "loss": 54.4557, + "step": 231330 + }, + { + "epoch": 0.9346428730147828, + "grad_norm": 118.9662857055664, + "learning_rate": 6.749938971322633e-07, + "loss": 80.5897, + "step": 231340 + }, + { + "epoch": 0.9346832742801504, + "grad_norm": 434.6936340332031, + "learning_rate": 6.742747165790597e-07, + "loss": 40.1486, + "step": 231350 + }, + { + "epoch": 0.934723675545518, + "grad_norm": 931.8153686523438, + "learning_rate": 6.735559127908175e-07, + "loss": 94.8194, + "step": 231360 + }, + { + "epoch": 0.9347640768108857, + "grad_norm": 721.9061279296875, + "learning_rate": 6.728374857815523e-07, + "loss": 94.1188, + "step": 231370 + }, + { + "epoch": 0.9348044780762533, + "grad_norm": 965.0362548828125, + "learning_rate": 6.721194355652705e-07, + "loss": 68.1843, + "step": 231380 + }, + { + "epoch": 0.934844879341621, + "grad_norm": 747.5338745117188, + "learning_rate": 6.714017621559721e-07, + "loss": 106.0858, + "step": 231390 + }, + { + "epoch": 0.9348852806069886, + "grad_norm": 1088.69482421875, + "learning_rate": 6.706844655676481e-07, + "loss": 69.7384, + "step": 231400 + }, + { + "epoch": 0.9349256818723563, + "grad_norm": 752.2070922851562, + "learning_rate": 6.699675458142829e-07, + "loss": 105.0485, + "step": 231410 + }, + { + "epoch": 0.9349660831377239, + "grad_norm": 345.35491943359375, + "learning_rate": 6.692510029098498e-07, + "loss": 81.3561, + "step": 231420 + }, + { + "epoch": 0.9350064844030915, + "grad_norm": 480.0761413574219, + "learning_rate": 6.685348368683219e-07, + "loss": 58.1819, + "step": 231430 + }, + { + "epoch": 0.9350468856684592, + "grad_norm": 95.40776824951172, + "learning_rate": 6.678190477036572e-07, + "loss": 50.6943, + "step": 231440 + }, + { + "epoch": 0.9350872869338268, + "grad_norm": 382.2948913574219, + "learning_rate": 6.671036354298155e-07, + "loss": 76.9703, + "step": 231450 + }, + { + "epoch": 0.9351276881991945, + "grad_norm": 555.5907592773438, + "learning_rate": 6.663886000607433e-07, + "loss": 77.9213, + "step": 231460 + }, + { + "epoch": 0.935168089464562, + "grad_norm": 846.5916137695312, + "learning_rate": 6.656739416103741e-07, + "loss": 77.1912, + "step": 231470 + }, + { + "epoch": 0.9352084907299296, + "grad_norm": 332.3277587890625, + "learning_rate": 6.6495966009265e-07, + "loss": 46.7777, + "step": 231480 + }, + { + "epoch": 0.9352488919952973, + "grad_norm": 811.2119750976562, + "learning_rate": 6.642457555214887e-07, + "loss": 58.5204, + "step": 231490 + }, + { + "epoch": 0.9352892932606649, + "grad_norm": 1216.3533935546875, + "learning_rate": 6.635322279108125e-07, + "loss": 78.1324, + "step": 231500 + }, + { + "epoch": 0.9353296945260325, + "grad_norm": 585.4556884765625, + "learning_rate": 6.628190772745302e-07, + "loss": 68.5448, + "step": 231510 + }, + { + "epoch": 0.9353700957914002, + "grad_norm": 764.5619506835938, + "learning_rate": 6.62106303626544e-07, + "loss": 67.0104, + "step": 231520 + }, + { + "epoch": 0.9354104970567678, + "grad_norm": 405.2339782714844, + "learning_rate": 6.613939069807562e-07, + "loss": 54.3628, + "step": 231530 + }, + { + "epoch": 0.9354508983221355, + "grad_norm": 479.2991638183594, + "learning_rate": 6.606818873510468e-07, + "loss": 49.5944, + "step": 231540 + }, + { + "epoch": 0.9354912995875031, + "grad_norm": 301.5000305175781, + "learning_rate": 6.599702447513001e-07, + "loss": 88.1479, + "step": 231550 + }, + { + "epoch": 0.9355317008528707, + "grad_norm": 306.16278076171875, + "learning_rate": 6.592589791953896e-07, + "loss": 68.3857, + "step": 231560 + }, + { + "epoch": 0.9355721021182384, + "grad_norm": 786.3103637695312, + "learning_rate": 6.585480906971863e-07, + "loss": 69.2168, + "step": 231570 + }, + { + "epoch": 0.935612503383606, + "grad_norm": 743.0350952148438, + "learning_rate": 6.578375792705416e-07, + "loss": 71.4567, + "step": 231580 + }, + { + "epoch": 0.9356529046489737, + "grad_norm": 480.2101135253906, + "learning_rate": 6.57127444929313e-07, + "loss": 57.4373, + "step": 231590 + }, + { + "epoch": 0.9356933059143412, + "grad_norm": 815.6358032226562, + "learning_rate": 6.56417687687343e-07, + "loss": 81.0268, + "step": 231600 + }, + { + "epoch": 0.9357337071797088, + "grad_norm": 642.8241577148438, + "learning_rate": 6.557083075584691e-07, + "loss": 43.8267, + "step": 231610 + }, + { + "epoch": 0.9357741084450765, + "grad_norm": 374.35040283203125, + "learning_rate": 6.549993045565229e-07, + "loss": 81.3432, + "step": 231620 + }, + { + "epoch": 0.9358145097104441, + "grad_norm": 563.8860473632812, + "learning_rate": 6.542906786953196e-07, + "loss": 82.1695, + "step": 231630 + }, + { + "epoch": 0.9358549109758117, + "grad_norm": 242.18435668945312, + "learning_rate": 6.535824299886862e-07, + "loss": 36.8594, + "step": 231640 + }, + { + "epoch": 0.9358953122411794, + "grad_norm": 898.7394409179688, + "learning_rate": 6.528745584504181e-07, + "loss": 52.1091, + "step": 231650 + }, + { + "epoch": 0.935935713506547, + "grad_norm": 419.59698486328125, + "learning_rate": 6.521670640943222e-07, + "loss": 57.9848, + "step": 231660 + }, + { + "epoch": 0.9359761147719147, + "grad_norm": 558.0735473632812, + "learning_rate": 6.51459946934192e-07, + "loss": 48.763, + "step": 231670 + }, + { + "epoch": 0.9360165160372823, + "grad_norm": 465.77392578125, + "learning_rate": 6.507532069838097e-07, + "loss": 47.1634, + "step": 231680 + }, + { + "epoch": 0.93605691730265, + "grad_norm": 196.2353515625, + "learning_rate": 6.500468442569574e-07, + "loss": 63.277, + "step": 231690 + }, + { + "epoch": 0.9360973185680176, + "grad_norm": 1113.720703125, + "learning_rate": 6.493408587674022e-07, + "loss": 61.0079, + "step": 231700 + }, + { + "epoch": 0.9361377198333852, + "grad_norm": 760.9076538085938, + "learning_rate": 6.486352505289084e-07, + "loss": 69.3344, + "step": 231710 + }, + { + "epoch": 0.9361781210987528, + "grad_norm": 1249.4071044921875, + "learning_rate": 6.47930019555234e-07, + "loss": 91.1854, + "step": 231720 + }, + { + "epoch": 0.9362185223641204, + "grad_norm": 353.5001220703125, + "learning_rate": 6.472251658601302e-07, + "loss": 49.8285, + "step": 231730 + }, + { + "epoch": 0.936258923629488, + "grad_norm": 534.2552490234375, + "learning_rate": 6.465206894573306e-07, + "loss": 45.3929, + "step": 231740 + }, + { + "epoch": 0.9362993248948557, + "grad_norm": 501.2189025878906, + "learning_rate": 6.458165903605773e-07, + "loss": 61.0935, + "step": 231750 + }, + { + "epoch": 0.9363397261602233, + "grad_norm": 1089.2813720703125, + "learning_rate": 6.451128685835905e-07, + "loss": 50.4551, + "step": 231760 + }, + { + "epoch": 0.936380127425591, + "grad_norm": 612.9863891601562, + "learning_rate": 6.44409524140095e-07, + "loss": 65.2305, + "step": 231770 + }, + { + "epoch": 0.9364205286909586, + "grad_norm": 1391.2137451171875, + "learning_rate": 6.437065570437995e-07, + "loss": 68.0887, + "step": 231780 + }, + { + "epoch": 0.9364609299563262, + "grad_norm": 985.8492431640625, + "learning_rate": 6.43003967308411e-07, + "loss": 96.2377, + "step": 231790 + }, + { + "epoch": 0.9365013312216939, + "grad_norm": 461.2491455078125, + "learning_rate": 6.423017549476274e-07, + "loss": 58.0297, + "step": 231800 + }, + { + "epoch": 0.9365417324870615, + "grad_norm": 278.1769104003906, + "learning_rate": 6.415999199751355e-07, + "loss": 46.4557, + "step": 231810 + }, + { + "epoch": 0.9365821337524292, + "grad_norm": 637.2348022460938, + "learning_rate": 6.408984624046199e-07, + "loss": 72.4431, + "step": 231820 + }, + { + "epoch": 0.9366225350177968, + "grad_norm": 457.7890625, + "learning_rate": 6.401973822497543e-07, + "loss": 39.2155, + "step": 231830 + }, + { + "epoch": 0.9366629362831644, + "grad_norm": 466.3041687011719, + "learning_rate": 6.394966795242075e-07, + "loss": 87.8276, + "step": 231840 + }, + { + "epoch": 0.936703337548532, + "grad_norm": 682.7880249023438, + "learning_rate": 6.38796354241642e-07, + "loss": 63.6744, + "step": 231850 + }, + { + "epoch": 0.9367437388138996, + "grad_norm": 831.1063232421875, + "learning_rate": 6.380964064157113e-07, + "loss": 105.1446, + "step": 231860 + }, + { + "epoch": 0.9367841400792672, + "grad_norm": 254.3018035888672, + "learning_rate": 6.373968360600558e-07, + "loss": 74.3905, + "step": 231870 + }, + { + "epoch": 0.9368245413446349, + "grad_norm": 645.0233764648438, + "learning_rate": 6.366976431883177e-07, + "loss": 79.2497, + "step": 231880 + }, + { + "epoch": 0.9368649426100025, + "grad_norm": 1102.1402587890625, + "learning_rate": 6.359988278141305e-07, + "loss": 50.4851, + "step": 231890 + }, + { + "epoch": 0.9369053438753702, + "grad_norm": 948.9331665039062, + "learning_rate": 6.353003899511123e-07, + "loss": 58.7698, + "step": 231900 + }, + { + "epoch": 0.9369457451407378, + "grad_norm": 833.417236328125, + "learning_rate": 6.346023296128856e-07, + "loss": 104.3775, + "step": 231910 + }, + { + "epoch": 0.9369861464061054, + "grad_norm": 817.9498901367188, + "learning_rate": 6.339046468130527e-07, + "loss": 66.9323, + "step": 231920 + }, + { + "epoch": 0.9370265476714731, + "grad_norm": 593.5517578125, + "learning_rate": 6.33207341565223e-07, + "loss": 52.1291, + "step": 231930 + }, + { + "epoch": 0.9370669489368407, + "grad_norm": 660.0868530273438, + "learning_rate": 6.325104138829852e-07, + "loss": 52.3458, + "step": 231940 + }, + { + "epoch": 0.9371073502022084, + "grad_norm": 554.7782592773438, + "learning_rate": 6.318138637799265e-07, + "loss": 59.277, + "step": 231950 + }, + { + "epoch": 0.937147751467576, + "grad_norm": 155.5161590576172, + "learning_rate": 6.311176912696315e-07, + "loss": 66.3044, + "step": 231960 + }, + { + "epoch": 0.9371881527329436, + "grad_norm": 838.4068603515625, + "learning_rate": 6.30421896365665e-07, + "loss": 72.2177, + "step": 231970 + }, + { + "epoch": 0.9372285539983112, + "grad_norm": 563.1986694335938, + "learning_rate": 6.297264790815982e-07, + "loss": 56.777, + "step": 231980 + }, + { + "epoch": 0.9372689552636788, + "grad_norm": 591.3935546875, + "learning_rate": 6.290314394309849e-07, + "loss": 60.4648, + "step": 231990 + }, + { + "epoch": 0.9373093565290465, + "grad_norm": 577.384765625, + "learning_rate": 6.283367774273785e-07, + "loss": 75.9834, + "step": 232000 + }, + { + "epoch": 0.9373497577944141, + "grad_norm": 1009.8219604492188, + "learning_rate": 6.276424930843173e-07, + "loss": 131.3798, + "step": 232010 + }, + { + "epoch": 0.9373901590597817, + "grad_norm": 581.4556884765625, + "learning_rate": 6.269485864153413e-07, + "loss": 63.7619, + "step": 232020 + }, + { + "epoch": 0.9374305603251494, + "grad_norm": 952.448486328125, + "learning_rate": 6.262550574339754e-07, + "loss": 75.2632, + "step": 232030 + }, + { + "epoch": 0.937470961590517, + "grad_norm": 564.2669067382812, + "learning_rate": 6.255619061537422e-07, + "loss": 55.9475, + "step": 232040 + }, + { + "epoch": 0.9375113628558847, + "grad_norm": 511.57855224609375, + "learning_rate": 6.248691325881529e-07, + "loss": 46.6712, + "step": 232050 + }, + { + "epoch": 0.9375517641212523, + "grad_norm": 921.8944091796875, + "learning_rate": 6.241767367507168e-07, + "loss": 69.7356, + "step": 232060 + }, + { + "epoch": 0.9375921653866199, + "grad_norm": 622.5736694335938, + "learning_rate": 6.234847186549298e-07, + "loss": 51.5182, + "step": 232070 + }, + { + "epoch": 0.9376325666519876, + "grad_norm": 658.3596801757812, + "learning_rate": 6.227930783142811e-07, + "loss": 70.4682, + "step": 232080 + }, + { + "epoch": 0.9376729679173552, + "grad_norm": 1060.59033203125, + "learning_rate": 6.22101815742262e-07, + "loss": 53.6365, + "step": 232090 + }, + { + "epoch": 0.9377133691827229, + "grad_norm": 1105.40185546875, + "learning_rate": 6.21410930952342e-07, + "loss": 124.5828, + "step": 232100 + }, + { + "epoch": 0.9377537704480904, + "grad_norm": 1394.16162109375, + "learning_rate": 6.207204239579923e-07, + "loss": 63.654, + "step": 232110 + }, + { + "epoch": 0.937794171713458, + "grad_norm": 495.09698486328125, + "learning_rate": 6.200302947726755e-07, + "loss": 72.5452, + "step": 232120 + }, + { + "epoch": 0.9378345729788257, + "grad_norm": 495.81329345703125, + "learning_rate": 6.193405434098454e-07, + "loss": 116.3624, + "step": 232130 + }, + { + "epoch": 0.9378749742441933, + "grad_norm": 1391.865234375, + "learning_rate": 6.186511698829512e-07, + "loss": 65.982, + "step": 232140 + }, + { + "epoch": 0.937915375509561, + "grad_norm": 743.4743041992188, + "learning_rate": 6.179621742054287e-07, + "loss": 63.6263, + "step": 232150 + }, + { + "epoch": 0.9379557767749286, + "grad_norm": 1064.9490966796875, + "learning_rate": 6.172735563907117e-07, + "loss": 76.9667, + "step": 232160 + }, + { + "epoch": 0.9379961780402962, + "grad_norm": 535.4686279296875, + "learning_rate": 6.165853164522273e-07, + "loss": 68.7917, + "step": 232170 + }, + { + "epoch": 0.9380365793056639, + "grad_norm": 733.9143676757812, + "learning_rate": 6.158974544033914e-07, + "loss": 43.5435, + "step": 232180 + }, + { + "epoch": 0.9380769805710315, + "grad_norm": 1673.8306884765625, + "learning_rate": 6.152099702576109e-07, + "loss": 99.2797, + "step": 232190 + }, + { + "epoch": 0.9381173818363991, + "grad_norm": 903.548095703125, + "learning_rate": 6.145228640282974e-07, + "loss": 58.6404, + "step": 232200 + }, + { + "epoch": 0.9381577831017668, + "grad_norm": 971.37646484375, + "learning_rate": 6.13836135728838e-07, + "loss": 66.0346, + "step": 232210 + }, + { + "epoch": 0.9381981843671344, + "grad_norm": 528.739501953125, + "learning_rate": 6.13149785372622e-07, + "loss": 61.9793, + "step": 232220 + }, + { + "epoch": 0.9382385856325021, + "grad_norm": 378.21826171875, + "learning_rate": 6.124638129730365e-07, + "loss": 68.4955, + "step": 232230 + }, + { + "epoch": 0.9382789868978696, + "grad_norm": 909.1775512695312, + "learning_rate": 6.117782185434463e-07, + "loss": 68.1048, + "step": 232240 + }, + { + "epoch": 0.9383193881632372, + "grad_norm": 492.5785827636719, + "learning_rate": 6.110930020972227e-07, + "loss": 53.7505, + "step": 232250 + }, + { + "epoch": 0.9383597894286049, + "grad_norm": 1018.951904296875, + "learning_rate": 6.104081636477244e-07, + "loss": 42.0367, + "step": 232260 + }, + { + "epoch": 0.9384001906939725, + "grad_norm": 685.1436157226562, + "learning_rate": 6.097237032083003e-07, + "loss": 67.439, + "step": 232270 + }, + { + "epoch": 0.9384405919593402, + "grad_norm": 872.3475952148438, + "learning_rate": 6.090396207922933e-07, + "loss": 53.3225, + "step": 232280 + }, + { + "epoch": 0.9384809932247078, + "grad_norm": 475.8158874511719, + "learning_rate": 6.083559164130437e-07, + "loss": 57.6364, + "step": 232290 + }, + { + "epoch": 0.9385213944900754, + "grad_norm": 686.8587646484375, + "learning_rate": 6.076725900838809e-07, + "loss": 46.0159, + "step": 232300 + }, + { + "epoch": 0.9385617957554431, + "grad_norm": 543.3079223632812, + "learning_rate": 6.069896418181232e-07, + "loss": 57.3313, + "step": 232310 + }, + { + "epoch": 0.9386021970208107, + "grad_norm": 644.0975341796875, + "learning_rate": 6.063070716290864e-07, + "loss": 58.7601, + "step": 232320 + }, + { + "epoch": 0.9386425982861784, + "grad_norm": 786.9959716796875, + "learning_rate": 6.056248795300779e-07, + "loss": 38.9612, + "step": 232330 + }, + { + "epoch": 0.938682999551546, + "grad_norm": 702.2594604492188, + "learning_rate": 6.049430655344001e-07, + "loss": 48.4395, + "step": 232340 + }, + { + "epoch": 0.9387234008169136, + "grad_norm": 1109.822998046875, + "learning_rate": 6.042616296553361e-07, + "loss": 91.0666, + "step": 232350 + }, + { + "epoch": 0.9387638020822812, + "grad_norm": 1134.848876953125, + "learning_rate": 6.035805719061838e-07, + "loss": 75.0635, + "step": 232360 + }, + { + "epoch": 0.9388042033476488, + "grad_norm": 826.2975463867188, + "learning_rate": 6.028998923002105e-07, + "loss": 66.2828, + "step": 232370 + }, + { + "epoch": 0.9388446046130164, + "grad_norm": 1658.83154296875, + "learning_rate": 6.022195908506901e-07, + "loss": 103.4737, + "step": 232380 + }, + { + "epoch": 0.9388850058783841, + "grad_norm": 583.5703125, + "learning_rate": 6.015396675708896e-07, + "loss": 69.2696, + "step": 232390 + }, + { + "epoch": 0.9389254071437517, + "grad_norm": 612.55859375, + "learning_rate": 6.00860122474054e-07, + "loss": 76.211, + "step": 232400 + }, + { + "epoch": 0.9389658084091194, + "grad_norm": 346.0386047363281, + "learning_rate": 6.001809555734417e-07, + "loss": 47.0483, + "step": 232410 + }, + { + "epoch": 0.939006209674487, + "grad_norm": 687.4683227539062, + "learning_rate": 5.995021668822886e-07, + "loss": 53.166, + "step": 232420 + }, + { + "epoch": 0.9390466109398546, + "grad_norm": 392.22442626953125, + "learning_rate": 5.988237564138266e-07, + "loss": 47.8001, + "step": 232430 + }, + { + "epoch": 0.9390870122052223, + "grad_norm": 559.8594970703125, + "learning_rate": 5.981457241812872e-07, + "loss": 79.9336, + "step": 232440 + }, + { + "epoch": 0.9391274134705899, + "grad_norm": 415.6376037597656, + "learning_rate": 5.974680701978842e-07, + "loss": 48.3445, + "step": 232450 + }, + { + "epoch": 0.9391678147359576, + "grad_norm": 510.16046142578125, + "learning_rate": 5.967907944768314e-07, + "loss": 37.1386, + "step": 232460 + }, + { + "epoch": 0.9392082160013252, + "grad_norm": 770.1859130859375, + "learning_rate": 5.961138970313296e-07, + "loss": 109.5518, + "step": 232470 + }, + { + "epoch": 0.9392486172666928, + "grad_norm": 571.654296875, + "learning_rate": 5.95437377874577e-07, + "loss": 71.745, + "step": 232480 + }, + { + "epoch": 0.9392890185320604, + "grad_norm": 559.6614990234375, + "learning_rate": 5.947612370197631e-07, + "loss": 59.6679, + "step": 232490 + }, + { + "epoch": 0.939329419797428, + "grad_norm": 304.14849853515625, + "learning_rate": 5.940854744800706e-07, + "loss": 82.2653, + "step": 232500 + }, + { + "epoch": 0.9393698210627957, + "grad_norm": 1149.1768798828125, + "learning_rate": 5.934100902686712e-07, + "loss": 79.4274, + "step": 232510 + }, + { + "epoch": 0.9394102223281633, + "grad_norm": 1015.5127563476562, + "learning_rate": 5.927350843987367e-07, + "loss": 62.6799, + "step": 232520 + }, + { + "epoch": 0.9394506235935309, + "grad_norm": 865.3230590820312, + "learning_rate": 5.920604568834188e-07, + "loss": 70.1016, + "step": 232530 + }, + { + "epoch": 0.9394910248588986, + "grad_norm": 423.7525329589844, + "learning_rate": 5.913862077358779e-07, + "loss": 59.9482, + "step": 232540 + }, + { + "epoch": 0.9395314261242662, + "grad_norm": 592.7890014648438, + "learning_rate": 5.907123369692525e-07, + "loss": 60.8065, + "step": 232550 + }, + { + "epoch": 0.9395718273896339, + "grad_norm": 615.0166625976562, + "learning_rate": 5.900388445966832e-07, + "loss": 61.9847, + "step": 232560 + }, + { + "epoch": 0.9396122286550015, + "grad_norm": 776.2672119140625, + "learning_rate": 5.893657306313016e-07, + "loss": 49.4096, + "step": 232570 + }, + { + "epoch": 0.9396526299203691, + "grad_norm": 557.9415283203125, + "learning_rate": 5.886929950862241e-07, + "loss": 46.1425, + "step": 232580 + }, + { + "epoch": 0.9396930311857368, + "grad_norm": 579.3458862304688, + "learning_rate": 5.880206379745735e-07, + "loss": 69.5143, + "step": 232590 + }, + { + "epoch": 0.9397334324511044, + "grad_norm": 537.0725708007812, + "learning_rate": 5.873486593094546e-07, + "loss": 74.9625, + "step": 232600 + }, + { + "epoch": 0.939773833716472, + "grad_norm": 144.75250244140625, + "learning_rate": 5.866770591039661e-07, + "loss": 62.1965, + "step": 232610 + }, + { + "epoch": 0.9398142349818396, + "grad_norm": 771.6122436523438, + "learning_rate": 5.860058373712041e-07, + "loss": 72.9812, + "step": 232620 + }, + { + "epoch": 0.9398546362472072, + "grad_norm": 816.1385498046875, + "learning_rate": 5.853349941242536e-07, + "loss": 58.6357, + "step": 232630 + }, + { + "epoch": 0.9398950375125749, + "grad_norm": 2184.882080078125, + "learning_rate": 5.846645293761932e-07, + "loss": 69.6407, + "step": 232640 + }, + { + "epoch": 0.9399354387779425, + "grad_norm": 855.1461181640625, + "learning_rate": 5.839944431400924e-07, + "loss": 75.0688, + "step": 232650 + }, + { + "epoch": 0.9399758400433101, + "grad_norm": 956.5422973632812, + "learning_rate": 5.833247354290161e-07, + "loss": 76.2763, + "step": 232660 + }, + { + "epoch": 0.9400162413086778, + "grad_norm": 339.6118469238281, + "learning_rate": 5.826554062560185e-07, + "loss": 56.2541, + "step": 232670 + }, + { + "epoch": 0.9400566425740454, + "grad_norm": 585.213623046875, + "learning_rate": 5.819864556341559e-07, + "loss": 77.1985, + "step": 232680 + }, + { + "epoch": 0.9400970438394131, + "grad_norm": 552.614501953125, + "learning_rate": 5.813178835764577e-07, + "loss": 61.1772, + "step": 232690 + }, + { + "epoch": 0.9401374451047807, + "grad_norm": 641.9323120117188, + "learning_rate": 5.806496900959691e-07, + "loss": 70.5791, + "step": 232700 + }, + { + "epoch": 0.9401778463701483, + "grad_norm": 638.9924926757812, + "learning_rate": 5.799818752057129e-07, + "loss": 72.9906, + "step": 232710 + }, + { + "epoch": 0.940218247635516, + "grad_norm": 692.3328857421875, + "learning_rate": 5.793144389187056e-07, + "loss": 67.6606, + "step": 232720 + }, + { + "epoch": 0.9402586489008836, + "grad_norm": 682.2972412109375, + "learning_rate": 5.786473812479654e-07, + "loss": 56.7806, + "step": 232730 + }, + { + "epoch": 0.9402990501662513, + "grad_norm": 660.9271240234375, + "learning_rate": 5.779807022064865e-07, + "loss": 48.1778, + "step": 232740 + }, + { + "epoch": 0.9403394514316188, + "grad_norm": 299.3976745605469, + "learning_rate": 5.773144018072807e-07, + "loss": 104.3877, + "step": 232750 + }, + { + "epoch": 0.9403798526969864, + "grad_norm": 838.256591796875, + "learning_rate": 5.766484800633265e-07, + "loss": 50.7196, + "step": 232760 + }, + { + "epoch": 0.9404202539623541, + "grad_norm": 1080.2464599609375, + "learning_rate": 5.759829369876091e-07, + "loss": 73.0915, + "step": 232770 + }, + { + "epoch": 0.9404606552277217, + "grad_norm": 456.9873352050781, + "learning_rate": 5.753177725931048e-07, + "loss": 83.5664, + "step": 232780 + }, + { + "epoch": 0.9405010564930893, + "grad_norm": 693.7952880859375, + "learning_rate": 5.74652986892783e-07, + "loss": 75.1072, + "step": 232790 + }, + { + "epoch": 0.940541457758457, + "grad_norm": 567.8163452148438, + "learning_rate": 5.739885798996003e-07, + "loss": 54.7689, + "step": 232800 + }, + { + "epoch": 0.9405818590238246, + "grad_norm": 913.959716796875, + "learning_rate": 5.733245516265129e-07, + "loss": 73.3126, + "step": 232810 + }, + { + "epoch": 0.9406222602891923, + "grad_norm": 739.0476684570312, + "learning_rate": 5.726609020864637e-07, + "loss": 45.7233, + "step": 232820 + }, + { + "epoch": 0.9406626615545599, + "grad_norm": 461.8160095214844, + "learning_rate": 5.719976312923914e-07, + "loss": 69.6333, + "step": 232830 + }, + { + "epoch": 0.9407030628199275, + "grad_norm": 883.0531616210938, + "learning_rate": 5.713347392572299e-07, + "loss": 54.7677, + "step": 232840 + }, + { + "epoch": 0.9407434640852952, + "grad_norm": 1204.668701171875, + "learning_rate": 5.706722259938957e-07, + "loss": 52.4954, + "step": 232850 + }, + { + "epoch": 0.9407838653506628, + "grad_norm": 769.3101806640625, + "learning_rate": 5.700100915153139e-07, + "loss": 50.1221, + "step": 232860 + }, + { + "epoch": 0.9408242666160305, + "grad_norm": 639.3986206054688, + "learning_rate": 5.693483358343877e-07, + "loss": 84.0421, + "step": 232870 + }, + { + "epoch": 0.940864667881398, + "grad_norm": 587.334228515625, + "learning_rate": 5.686869589640176e-07, + "loss": 78.4212, + "step": 232880 + }, + { + "epoch": 0.9409050691467656, + "grad_norm": 521.3841552734375, + "learning_rate": 5.680259609171002e-07, + "loss": 61.1137, + "step": 232890 + }, + { + "epoch": 0.9409454704121333, + "grad_norm": 580.7958984375, + "learning_rate": 5.673653417065206e-07, + "loss": 58.0167, + "step": 232900 + }, + { + "epoch": 0.9409858716775009, + "grad_norm": 660.4609375, + "learning_rate": 5.667051013451597e-07, + "loss": 53.935, + "step": 232910 + }, + { + "epoch": 0.9410262729428686, + "grad_norm": 748.0093383789062, + "learning_rate": 5.660452398458871e-07, + "loss": 73.7994, + "step": 232920 + }, + { + "epoch": 0.9410666742082362, + "grad_norm": 373.8475341796875, + "learning_rate": 5.653857572215682e-07, + "loss": 81.1044, + "step": 232930 + }, + { + "epoch": 0.9411070754736038, + "grad_norm": 437.2550354003906, + "learning_rate": 5.647266534850571e-07, + "loss": 60.7011, + "step": 232940 + }, + { + "epoch": 0.9411474767389715, + "grad_norm": 688.7047729492188, + "learning_rate": 5.640679286492101e-07, + "loss": 73.9881, + "step": 232950 + }, + { + "epoch": 0.9411878780043391, + "grad_norm": 1003.673583984375, + "learning_rate": 5.634095827268593e-07, + "loss": 52.8826, + "step": 232960 + }, + { + "epoch": 0.9412282792697068, + "grad_norm": 481.8900451660156, + "learning_rate": 5.6275161573085e-07, + "loss": 93.0032, + "step": 232970 + }, + { + "epoch": 0.9412686805350744, + "grad_norm": 328.2328796386719, + "learning_rate": 5.620940276740028e-07, + "loss": 63.2641, + "step": 232980 + }, + { + "epoch": 0.941309081800442, + "grad_norm": 570.8117065429688, + "learning_rate": 5.61436818569141e-07, + "loss": 50.6531, + "step": 232990 + }, + { + "epoch": 0.9413494830658096, + "grad_norm": 880.4193725585938, + "learning_rate": 5.607799884290765e-07, + "loss": 70.8753, + "step": 233000 + }, + { + "epoch": 0.9413898843311772, + "grad_norm": 789.6047973632812, + "learning_rate": 5.601235372666125e-07, + "loss": 49.4698, + "step": 233010 + }, + { + "epoch": 0.9414302855965448, + "grad_norm": 874.7656860351562, + "learning_rate": 5.594674650945498e-07, + "loss": 79.7633, + "step": 233020 + }, + { + "epoch": 0.9414706868619125, + "grad_norm": 518.7997436523438, + "learning_rate": 5.588117719256736e-07, + "loss": 91.9855, + "step": 233030 + }, + { + "epoch": 0.9415110881272801, + "grad_norm": 543.8492431640625, + "learning_rate": 5.58156457772776e-07, + "loss": 77.0155, + "step": 233040 + }, + { + "epoch": 0.9415514893926478, + "grad_norm": 849.5704345703125, + "learning_rate": 5.575015226486269e-07, + "loss": 63.4321, + "step": 233050 + }, + { + "epoch": 0.9415918906580154, + "grad_norm": 715.1990356445312, + "learning_rate": 5.568469665659937e-07, + "loss": 64.6647, + "step": 233060 + }, + { + "epoch": 0.941632291923383, + "grad_norm": 428.60638427734375, + "learning_rate": 5.561927895376417e-07, + "loss": 59.6277, + "step": 233070 + }, + { + "epoch": 0.9416726931887507, + "grad_norm": 420.31927490234375, + "learning_rate": 5.555389915763188e-07, + "loss": 69.4363, + "step": 233080 + }, + { + "epoch": 0.9417130944541183, + "grad_norm": 472.1800537109375, + "learning_rate": 5.548855726947721e-07, + "loss": 47.78, + "step": 233090 + }, + { + "epoch": 0.941753495719486, + "grad_norm": 420.8257751464844, + "learning_rate": 5.542325329057452e-07, + "loss": 56.7775, + "step": 233100 + }, + { + "epoch": 0.9417938969848536, + "grad_norm": 460.8014221191406, + "learning_rate": 5.535798722219654e-07, + "loss": 55.9527, + "step": 233110 + }, + { + "epoch": 0.9418342982502212, + "grad_norm": 655.0885009765625, + "learning_rate": 5.529275906561582e-07, + "loss": 68.6533, + "step": 233120 + }, + { + "epoch": 0.9418746995155888, + "grad_norm": 741.286865234375, + "learning_rate": 5.5227568822104e-07, + "loss": 44.4661, + "step": 233130 + }, + { + "epoch": 0.9419151007809564, + "grad_norm": 652.399658203125, + "learning_rate": 5.516241649293186e-07, + "loss": 50.6049, + "step": 233140 + }, + { + "epoch": 0.9419555020463241, + "grad_norm": 596.3739013671875, + "learning_rate": 5.509730207936969e-07, + "loss": 70.9184, + "step": 233150 + }, + { + "epoch": 0.9419959033116917, + "grad_norm": 954.5722045898438, + "learning_rate": 5.503222558268695e-07, + "loss": 67.6284, + "step": 233160 + }, + { + "epoch": 0.9420363045770593, + "grad_norm": 484.7266540527344, + "learning_rate": 5.496718700415238e-07, + "loss": 72.2451, + "step": 233170 + }, + { + "epoch": 0.942076705842427, + "grad_norm": 443.8197937011719, + "learning_rate": 5.490218634503386e-07, + "loss": 50.324, + "step": 233180 + }, + { + "epoch": 0.9421171071077946, + "grad_norm": 561.6012573242188, + "learning_rate": 5.483722360659838e-07, + "loss": 93.2675, + "step": 233190 + }, + { + "epoch": 0.9421575083731623, + "grad_norm": 860.2669677734375, + "learning_rate": 5.477229879011293e-07, + "loss": 69.9677, + "step": 233200 + }, + { + "epoch": 0.9421979096385299, + "grad_norm": 538.6619873046875, + "learning_rate": 5.470741189684292e-07, + "loss": 83.0132, + "step": 233210 + }, + { + "epoch": 0.9422383109038975, + "grad_norm": 668.9439086914062, + "learning_rate": 5.464256292805337e-07, + "loss": 81.2732, + "step": 233220 + }, + { + "epoch": 0.9422787121692652, + "grad_norm": 643.2591552734375, + "learning_rate": 5.457775188500858e-07, + "loss": 75.4532, + "step": 233230 + }, + { + "epoch": 0.9423191134346328, + "grad_norm": 1342.03564453125, + "learning_rate": 5.451297876897244e-07, + "loss": 84.6217, + "step": 233240 + }, + { + "epoch": 0.9423595147000005, + "grad_norm": 400.55303955078125, + "learning_rate": 5.444824358120703e-07, + "loss": 47.6116, + "step": 233250 + }, + { + "epoch": 0.942399915965368, + "grad_norm": 526.3870239257812, + "learning_rate": 5.438354632297471e-07, + "loss": 48.4432, + "step": 233260 + }, + { + "epoch": 0.9424403172307356, + "grad_norm": 821.7506713867188, + "learning_rate": 5.431888699553689e-07, + "loss": 77.21, + "step": 233270 + }, + { + "epoch": 0.9424807184961033, + "grad_norm": 463.8674011230469, + "learning_rate": 5.425426560015412e-07, + "loss": 37.2708, + "step": 233280 + }, + { + "epoch": 0.9425211197614709, + "grad_norm": 544.7167358398438, + "learning_rate": 5.418968213808628e-07, + "loss": 66.1087, + "step": 233290 + }, + { + "epoch": 0.9425615210268385, + "grad_norm": 653.4033203125, + "learning_rate": 5.412513661059193e-07, + "loss": 59.744, + "step": 233300 + }, + { + "epoch": 0.9426019222922062, + "grad_norm": 736.4451904296875, + "learning_rate": 5.406062901893028e-07, + "loss": 59.1249, + "step": 233310 + }, + { + "epoch": 0.9426423235575738, + "grad_norm": 946.2900390625, + "learning_rate": 5.399615936435832e-07, + "loss": 79.6682, + "step": 233320 + }, + { + "epoch": 0.9426827248229415, + "grad_norm": 1057.247802734375, + "learning_rate": 5.393172764813326e-07, + "loss": 80.164, + "step": 233330 + }, + { + "epoch": 0.9427231260883091, + "grad_norm": 816.2527465820312, + "learning_rate": 5.3867333871511e-07, + "loss": 54.4701, + "step": 233340 + }, + { + "epoch": 0.9427635273536767, + "grad_norm": 830.9785766601562, + "learning_rate": 5.380297803574675e-07, + "loss": 72.2102, + "step": 233350 + }, + { + "epoch": 0.9428039286190444, + "grad_norm": 1296.4459228515625, + "learning_rate": 5.373866014209594e-07, + "loss": 80.7996, + "step": 233360 + }, + { + "epoch": 0.942844329884412, + "grad_norm": 674.247802734375, + "learning_rate": 5.367438019181159e-07, + "loss": 46.5961, + "step": 233370 + }, + { + "epoch": 0.9428847311497797, + "grad_norm": 722.7642211914062, + "learning_rate": 5.361013818614713e-07, + "loss": 50.1285, + "step": 233380 + }, + { + "epoch": 0.9429251324151472, + "grad_norm": 749.7070922851562, + "learning_rate": 5.354593412635533e-07, + "loss": 71.7706, + "step": 233390 + }, + { + "epoch": 0.9429655336805148, + "grad_norm": 821.1887817382812, + "learning_rate": 5.348176801368765e-07, + "loss": 69.0778, + "step": 233400 + }, + { + "epoch": 0.9430059349458825, + "grad_norm": 636.2194213867188, + "learning_rate": 5.341763984939508e-07, + "loss": 65.1464, + "step": 233410 + }, + { + "epoch": 0.9430463362112501, + "grad_norm": 1140.3255615234375, + "learning_rate": 5.33535496347275e-07, + "loss": 51.9347, + "step": 233420 + }, + { + "epoch": 0.9430867374766178, + "grad_norm": 349.1161804199219, + "learning_rate": 5.32894973709348e-07, + "loss": 70.3478, + "step": 233430 + }, + { + "epoch": 0.9431271387419854, + "grad_norm": 331.68011474609375, + "learning_rate": 5.322548305926556e-07, + "loss": 59.0157, + "step": 233440 + }, + { + "epoch": 0.943167540007353, + "grad_norm": 569.0093994140625, + "learning_rate": 5.316150670096809e-07, + "loss": 68.3342, + "step": 233450 + }, + { + "epoch": 0.9432079412727207, + "grad_norm": 697.9094848632812, + "learning_rate": 5.309756829728896e-07, + "loss": 56.9794, + "step": 233460 + }, + { + "epoch": 0.9432483425380883, + "grad_norm": 575.2535400390625, + "learning_rate": 5.303366784947539e-07, + "loss": 47.7325, + "step": 233470 + }, + { + "epoch": 0.943288743803456, + "grad_norm": 1027.1251220703125, + "learning_rate": 5.296980535877261e-07, + "loss": 75.1986, + "step": 233480 + }, + { + "epoch": 0.9433291450688236, + "grad_norm": 220.10845947265625, + "learning_rate": 5.290598082642584e-07, + "loss": 67.2978, + "step": 233490 + }, + { + "epoch": 0.9433695463341912, + "grad_norm": 487.2839660644531, + "learning_rate": 5.284219425367943e-07, + "loss": 43.065, + "step": 233500 + }, + { + "epoch": 0.9434099475995589, + "grad_norm": 772.3554077148438, + "learning_rate": 5.277844564177703e-07, + "loss": 82.1555, + "step": 233510 + }, + { + "epoch": 0.9434503488649264, + "grad_norm": 641.5513305664062, + "learning_rate": 5.271473499196145e-07, + "loss": 56.7871, + "step": 233520 + }, + { + "epoch": 0.943490750130294, + "grad_norm": 637.4270629882812, + "learning_rate": 5.265106230547435e-07, + "loss": 82.3227, + "step": 233530 + }, + { + "epoch": 0.9435311513956617, + "grad_norm": 731.3258056640625, + "learning_rate": 5.25874275835574e-07, + "loss": 51.157, + "step": 233540 + }, + { + "epoch": 0.9435715526610293, + "grad_norm": 496.38037109375, + "learning_rate": 5.252383082745116e-07, + "loss": 44.0464, + "step": 233550 + }, + { + "epoch": 0.943611953926397, + "grad_norm": 604.409423828125, + "learning_rate": 5.246027203839554e-07, + "loss": 53.1972, + "step": 233560 + }, + { + "epoch": 0.9436523551917646, + "grad_norm": 675.2469482421875, + "learning_rate": 5.239675121762955e-07, + "loss": 74.3548, + "step": 233570 + }, + { + "epoch": 0.9436927564571322, + "grad_norm": 514.409912109375, + "learning_rate": 5.233326836639175e-07, + "loss": 57.9667, + "step": 233580 + }, + { + "epoch": 0.9437331577224999, + "grad_norm": 808.4920654296875, + "learning_rate": 5.226982348591936e-07, + "loss": 57.3726, + "step": 233590 + }, + { + "epoch": 0.9437735589878675, + "grad_norm": 461.0391540527344, + "learning_rate": 5.220641657744963e-07, + "loss": 65.2094, + "step": 233600 + }, + { + "epoch": 0.9438139602532352, + "grad_norm": 979.0601806640625, + "learning_rate": 5.214304764221866e-07, + "loss": 80.2964, + "step": 233610 + }, + { + "epoch": 0.9438543615186028, + "grad_norm": 542.9027709960938, + "learning_rate": 5.20797166814615e-07, + "loss": 50.4692, + "step": 233620 + }, + { + "epoch": 0.9438947627839704, + "grad_norm": 556.4044799804688, + "learning_rate": 5.201642369641358e-07, + "loss": 84.0967, + "step": 233630 + }, + { + "epoch": 0.943935164049338, + "grad_norm": 780.5877075195312, + "learning_rate": 5.195316868830791e-07, + "loss": 77.6155, + "step": 233640 + }, + { + "epoch": 0.9439755653147056, + "grad_norm": 493.207763671875, + "learning_rate": 5.18899516583784e-07, + "loss": 40.6605, + "step": 233650 + }, + { + "epoch": 0.9440159665800733, + "grad_norm": 809.64599609375, + "learning_rate": 5.182677260785718e-07, + "loss": 49.7655, + "step": 233660 + }, + { + "epoch": 0.9440563678454409, + "grad_norm": 1439.0283203125, + "learning_rate": 5.176363153797614e-07, + "loss": 94.8918, + "step": 233670 + }, + { + "epoch": 0.9440967691108085, + "grad_norm": 452.51409912109375, + "learning_rate": 5.170052844996609e-07, + "loss": 65.4146, + "step": 233680 + }, + { + "epoch": 0.9441371703761762, + "grad_norm": 829.6790771484375, + "learning_rate": 5.163746334505715e-07, + "loss": 69.2506, + "step": 233690 + }, + { + "epoch": 0.9441775716415438, + "grad_norm": 812.5972900390625, + "learning_rate": 5.1574436224479e-07, + "loss": 51.7061, + "step": 233700 + }, + { + "epoch": 0.9442179729069115, + "grad_norm": 520.080810546875, + "learning_rate": 5.151144708946044e-07, + "loss": 82.569, + "step": 233710 + }, + { + "epoch": 0.9442583741722791, + "grad_norm": 569.325439453125, + "learning_rate": 5.144849594122914e-07, + "loss": 85.3528, + "step": 233720 + }, + { + "epoch": 0.9442987754376467, + "grad_norm": 577.88134765625, + "learning_rate": 5.13855827810128e-07, + "loss": 64.7793, + "step": 233730 + }, + { + "epoch": 0.9443391767030144, + "grad_norm": 660.1197509765625, + "learning_rate": 5.132270761003777e-07, + "loss": 49.305, + "step": 233740 + }, + { + "epoch": 0.944379577968382, + "grad_norm": 274.3174133300781, + "learning_rate": 5.125987042952973e-07, + "loss": 89.2656, + "step": 233750 + }, + { + "epoch": 0.9444199792337497, + "grad_norm": 837.4322509765625, + "learning_rate": 5.119707124071372e-07, + "loss": 65.9154, + "step": 233760 + }, + { + "epoch": 0.9444603804991172, + "grad_norm": 400.54229736328125, + "learning_rate": 5.113431004481406e-07, + "loss": 49.7108, + "step": 233770 + }, + { + "epoch": 0.9445007817644848, + "grad_norm": 436.5148620605469, + "learning_rate": 5.107158684305446e-07, + "loss": 61.1186, + "step": 233780 + }, + { + "epoch": 0.9445411830298525, + "grad_norm": 859.5880126953125, + "learning_rate": 5.100890163665773e-07, + "loss": 79.5558, + "step": 233790 + }, + { + "epoch": 0.9445815842952201, + "grad_norm": 643.0777587890625, + "learning_rate": 5.094625442684554e-07, + "loss": 41.2299, + "step": 233800 + }, + { + "epoch": 0.9446219855605877, + "grad_norm": 563.9541625976562, + "learning_rate": 5.088364521484002e-07, + "loss": 89.5492, + "step": 233810 + }, + { + "epoch": 0.9446623868259554, + "grad_norm": 483.50537109375, + "learning_rate": 5.08210740018611e-07, + "loss": 45.4187, + "step": 233820 + }, + { + "epoch": 0.944702788091323, + "grad_norm": 1201.1007080078125, + "learning_rate": 5.07585407891289e-07, + "loss": 63.9869, + "step": 233830 + }, + { + "epoch": 0.9447431893566907, + "grad_norm": 1341.4981689453125, + "learning_rate": 5.069604557786245e-07, + "loss": 82.1359, + "step": 233840 + }, + { + "epoch": 0.9447835906220583, + "grad_norm": 534.2138671875, + "learning_rate": 5.063358836928011e-07, + "loss": 67.7625, + "step": 233850 + }, + { + "epoch": 0.9448239918874259, + "grad_norm": 547.2694702148438, + "learning_rate": 5.057116916459959e-07, + "loss": 56.7278, + "step": 233860 + }, + { + "epoch": 0.9448643931527936, + "grad_norm": 507.2535095214844, + "learning_rate": 5.050878796503789e-07, + "loss": 98.6698, + "step": 233870 + }, + { + "epoch": 0.9449047944181612, + "grad_norm": 718.0900268554688, + "learning_rate": 5.044644477181093e-07, + "loss": 53.3072, + "step": 233880 + }, + { + "epoch": 0.9449451956835289, + "grad_norm": 678.206787109375, + "learning_rate": 5.03841395861342e-07, + "loss": 51.0337, + "step": 233890 + }, + { + "epoch": 0.9449855969488964, + "grad_norm": 498.00750732421875, + "learning_rate": 5.032187240922248e-07, + "loss": 102.9989, + "step": 233900 + }, + { + "epoch": 0.945025998214264, + "grad_norm": 450.82379150390625, + "learning_rate": 5.025964324228927e-07, + "loss": 56.9308, + "step": 233910 + }, + { + "epoch": 0.9450663994796317, + "grad_norm": 585.2506103515625, + "learning_rate": 5.01974520865487e-07, + "loss": 41.4641, + "step": 233920 + }, + { + "epoch": 0.9451068007449993, + "grad_norm": 815.4320068359375, + "learning_rate": 5.013529894321223e-07, + "loss": 77.6053, + "step": 233930 + }, + { + "epoch": 0.945147202010367, + "grad_norm": 682.5119018554688, + "learning_rate": 5.007318381349202e-07, + "loss": 54.2079, + "step": 233940 + }, + { + "epoch": 0.9451876032757346, + "grad_norm": 399.4913635253906, + "learning_rate": 5.00111066985991e-07, + "loss": 60.099, + "step": 233950 + }, + { + "epoch": 0.9452280045411022, + "grad_norm": 695.31689453125, + "learning_rate": 4.994906759974338e-07, + "loss": 40.5378, + "step": 233960 + }, + { + "epoch": 0.9452684058064699, + "grad_norm": 841.4829711914062, + "learning_rate": 4.98870665181348e-07, + "loss": 75.6695, + "step": 233970 + }, + { + "epoch": 0.9453088070718375, + "grad_norm": 669.4159545898438, + "learning_rate": 4.98251034549817e-07, + "loss": 56.4644, + "step": 233980 + }, + { + "epoch": 0.9453492083372051, + "grad_norm": 505.64349365234375, + "learning_rate": 4.976317841149226e-07, + "loss": 60.6081, + "step": 233990 + }, + { + "epoch": 0.9453896096025728, + "grad_norm": 1377.4168701171875, + "learning_rate": 4.970129138887347e-07, + "loss": 57.3453, + "step": 234000 + }, + { + "epoch": 0.9454300108679404, + "grad_norm": 486.94610595703125, + "learning_rate": 4.963944238833241e-07, + "loss": 50.3941, + "step": 234010 + }, + { + "epoch": 0.9454704121333081, + "grad_norm": 971.5831298828125, + "learning_rate": 4.957763141107452e-07, + "loss": 73.0122, + "step": 234020 + }, + { + "epoch": 0.9455108133986756, + "grad_norm": 528.9116821289062, + "learning_rate": 4.951585845830465e-07, + "loss": 65.3132, + "step": 234030 + }, + { + "epoch": 0.9455512146640432, + "grad_norm": 741.6577758789062, + "learning_rate": 4.945412353122736e-07, + "loss": 83.2501, + "step": 234040 + }, + { + "epoch": 0.9455916159294109, + "grad_norm": 625.7125854492188, + "learning_rate": 4.939242663104615e-07, + "loss": 45.7582, + "step": 234050 + }, + { + "epoch": 0.9456320171947785, + "grad_norm": 486.0951843261719, + "learning_rate": 4.933076775896406e-07, + "loss": 70.3402, + "step": 234060 + }, + { + "epoch": 0.9456724184601462, + "grad_norm": 357.4646301269531, + "learning_rate": 4.926914691618235e-07, + "loss": 60.3143, + "step": 234070 + }, + { + "epoch": 0.9457128197255138, + "grad_norm": 737.1961059570312, + "learning_rate": 4.920756410390359e-07, + "loss": 68.6782, + "step": 234080 + }, + { + "epoch": 0.9457532209908814, + "grad_norm": 650.2977294921875, + "learning_rate": 4.91460193233273e-07, + "loss": 57.6223, + "step": 234090 + }, + { + "epoch": 0.9457936222562491, + "grad_norm": 445.2493896484375, + "learning_rate": 4.908451257565383e-07, + "loss": 95.6821, + "step": 234100 + }, + { + "epoch": 0.9458340235216167, + "grad_norm": 866.89404296875, + "learning_rate": 4.902304386208223e-07, + "loss": 52.2081, + "step": 234110 + }, + { + "epoch": 0.9458744247869844, + "grad_norm": 514.55615234375, + "learning_rate": 4.896161318381065e-07, + "loss": 56.5735, + "step": 234120 + }, + { + "epoch": 0.945914826052352, + "grad_norm": 389.8757019042969, + "learning_rate": 4.890022054203724e-07, + "loss": 39.2701, + "step": 234130 + }, + { + "epoch": 0.9459552273177196, + "grad_norm": 360.9391784667969, + "learning_rate": 4.883886593795817e-07, + "loss": 85.9885, + "step": 234140 + }, + { + "epoch": 0.9459956285830872, + "grad_norm": 412.4652099609375, + "learning_rate": 4.877754937277001e-07, + "loss": 67.6308, + "step": 234150 + }, + { + "epoch": 0.9460360298484548, + "grad_norm": 394.6324768066406, + "learning_rate": 4.871627084766806e-07, + "loss": 45.5205, + "step": 234160 + }, + { + "epoch": 0.9460764311138224, + "grad_norm": 1402.5692138671875, + "learning_rate": 4.865503036384712e-07, + "loss": 90.4207, + "step": 234170 + }, + { + "epoch": 0.9461168323791901, + "grad_norm": 583.8250732421875, + "learning_rate": 4.859382792250089e-07, + "loss": 81.1434, + "step": 234180 + }, + { + "epoch": 0.9461572336445577, + "grad_norm": 379.8134460449219, + "learning_rate": 4.853266352482289e-07, + "loss": 55.3861, + "step": 234190 + }, + { + "epoch": 0.9461976349099254, + "grad_norm": 483.7864685058594, + "learning_rate": 4.847153717200504e-07, + "loss": 66.1068, + "step": 234200 + }, + { + "epoch": 0.946238036175293, + "grad_norm": 633.4390869140625, + "learning_rate": 4.841044886523926e-07, + "loss": 72.6785, + "step": 234210 + }, + { + "epoch": 0.9462784374406606, + "grad_norm": 560.8759155273438, + "learning_rate": 4.834939860571686e-07, + "loss": 65.7403, + "step": 234220 + }, + { + "epoch": 0.9463188387060283, + "grad_norm": 1293.218017578125, + "learning_rate": 4.828838639462708e-07, + "loss": 85.0531, + "step": 234230 + }, + { + "epoch": 0.9463592399713959, + "grad_norm": 1107.97705078125, + "learning_rate": 4.822741223316052e-07, + "loss": 77.7415, + "step": 234240 + }, + { + "epoch": 0.9463996412367636, + "grad_norm": 648.7860107421875, + "learning_rate": 4.816647612250513e-07, + "loss": 71.4328, + "step": 234250 + }, + { + "epoch": 0.9464400425021312, + "grad_norm": 1165.600341796875, + "learning_rate": 4.810557806384953e-07, + "loss": 69.7843, + "step": 234260 + }, + { + "epoch": 0.9464804437674988, + "grad_norm": 513.9786987304688, + "learning_rate": 4.804471805838029e-07, + "loss": 69.8819, + "step": 234270 + }, + { + "epoch": 0.9465208450328664, + "grad_norm": 259.2418518066406, + "learning_rate": 4.798389610728426e-07, + "loss": 74.123, + "step": 234280 + }, + { + "epoch": 0.946561246298234, + "grad_norm": 1325.107177734375, + "learning_rate": 4.792311221174716e-07, + "loss": 47.7177, + "step": 234290 + }, + { + "epoch": 0.9466016475636017, + "grad_norm": 569.3814086914062, + "learning_rate": 4.786236637295405e-07, + "loss": 98.9959, + "step": 234300 + }, + { + "epoch": 0.9466420488289693, + "grad_norm": 723.4222412109375, + "learning_rate": 4.780165859208908e-07, + "loss": 61.2639, + "step": 234310 + }, + { + "epoch": 0.9466824500943369, + "grad_norm": 757.9484252929688, + "learning_rate": 4.774098887033574e-07, + "loss": 55.0138, + "step": 234320 + }, + { + "epoch": 0.9467228513597046, + "grad_norm": 624.0610961914062, + "learning_rate": 4.768035720887687e-07, + "loss": 65.4259, + "step": 234330 + }, + { + "epoch": 0.9467632526250722, + "grad_norm": 987.3033447265625, + "learning_rate": 4.7619763608894644e-07, + "loss": 85.9264, + "step": 234340 + }, + { + "epoch": 0.9468036538904399, + "grad_norm": 175.9085235595703, + "learning_rate": 4.7559208071570553e-07, + "loss": 67.2286, + "step": 234350 + }, + { + "epoch": 0.9468440551558075, + "grad_norm": 784.851806640625, + "learning_rate": 4.749869059808454e-07, + "loss": 83.7883, + "step": 234360 + }, + { + "epoch": 0.9468844564211751, + "grad_norm": 576.2943115234375, + "learning_rate": 4.7438211189616776e-07, + "loss": 44.6514, + "step": 234370 + }, + { + "epoch": 0.9469248576865428, + "grad_norm": 1042.057373046875, + "learning_rate": 4.7377769847346544e-07, + "loss": 91.9144, + "step": 234380 + }, + { + "epoch": 0.9469652589519104, + "grad_norm": 960.2606811523438, + "learning_rate": 4.7317366572451784e-07, + "loss": 60.1363, + "step": 234390 + }, + { + "epoch": 0.9470056602172781, + "grad_norm": 371.18975830078125, + "learning_rate": 4.7257001366110445e-07, + "loss": 82.6072, + "step": 234400 + }, + { + "epoch": 0.9470460614826456, + "grad_norm": 529.1356201171875, + "learning_rate": 4.719667422949892e-07, + "loss": 71.6634, + "step": 234410 + }, + { + "epoch": 0.9470864627480132, + "grad_norm": 1337.120361328125, + "learning_rate": 4.713638516379382e-07, + "loss": 70.0406, + "step": 234420 + }, + { + "epoch": 0.9471268640133809, + "grad_norm": 407.44598388671875, + "learning_rate": 4.7076134170170206e-07, + "loss": 41.8893, + "step": 234430 + }, + { + "epoch": 0.9471672652787485, + "grad_norm": 713.663818359375, + "learning_rate": 4.701592124980292e-07, + "loss": 51.2761, + "step": 234440 + }, + { + "epoch": 0.9472076665441161, + "grad_norm": 405.1700744628906, + "learning_rate": 4.6955746403865466e-07, + "loss": 46.6649, + "step": 234450 + }, + { + "epoch": 0.9472480678094838, + "grad_norm": 589.8349609375, + "learning_rate": 4.689560963353157e-07, + "loss": 62.4622, + "step": 234460 + }, + { + "epoch": 0.9472884690748514, + "grad_norm": 1347.6436767578125, + "learning_rate": 4.6835510939973183e-07, + "loss": 92.2076, + "step": 234470 + }, + { + "epoch": 0.9473288703402191, + "grad_norm": 712.451904296875, + "learning_rate": 4.6775450324361813e-07, + "loss": 54.6632, + "step": 234480 + }, + { + "epoch": 0.9473692716055867, + "grad_norm": 315.2299499511719, + "learning_rate": 4.6715427787868975e-07, + "loss": 57.737, + "step": 234490 + }, + { + "epoch": 0.9474096728709543, + "grad_norm": 400.6459655761719, + "learning_rate": 4.665544333166416e-07, + "loss": 60.1925, + "step": 234500 + }, + { + "epoch": 0.947450074136322, + "grad_norm": 422.9964904785156, + "learning_rate": 4.6595496956917563e-07, + "loss": 67.6037, + "step": 234510 + }, + { + "epoch": 0.9474904754016896, + "grad_norm": 526.9540405273438, + "learning_rate": 4.6535588664796907e-07, + "loss": 50.454, + "step": 234520 + }, + { + "epoch": 0.9475308766670573, + "grad_norm": 426.8117980957031, + "learning_rate": 4.647571845647103e-07, + "loss": 55.5882, + "step": 234530 + }, + { + "epoch": 0.9475712779324248, + "grad_norm": 327.9891357421875, + "learning_rate": 4.6415886333106563e-07, + "loss": 55.3658, + "step": 234540 + }, + { + "epoch": 0.9476116791977924, + "grad_norm": 953.9613647460938, + "learning_rate": 4.6356092295870345e-07, + "loss": 80.7846, + "step": 234550 + }, + { + "epoch": 0.9476520804631601, + "grad_norm": 599.8555297851562, + "learning_rate": 4.6296336345927893e-07, + "loss": 64.7256, + "step": 234560 + }, + { + "epoch": 0.9476924817285277, + "grad_norm": 279.24591064453125, + "learning_rate": 4.623661848444383e-07, + "loss": 38.4856, + "step": 234570 + }, + { + "epoch": 0.9477328829938954, + "grad_norm": 848.0572509765625, + "learning_rate": 4.617693871258322e-07, + "loss": 76.485, + "step": 234580 + }, + { + "epoch": 0.947773284259263, + "grad_norm": 848.8544921875, + "learning_rate": 4.6117297031508913e-07, + "loss": 78.8971, + "step": 234590 + }, + { + "epoch": 0.9478136855246306, + "grad_norm": 852.462890625, + "learning_rate": 4.6057693442383756e-07, + "loss": 51.6667, + "step": 234600 + }, + { + "epoch": 0.9478540867899983, + "grad_norm": 1217.737548828125, + "learning_rate": 4.599812794636993e-07, + "loss": 74.3829, + "step": 234610 + }, + { + "epoch": 0.9478944880553659, + "grad_norm": 401.5720520019531, + "learning_rate": 4.593860054462851e-07, + "loss": 64.9881, + "step": 234620 + }, + { + "epoch": 0.9479348893207336, + "grad_norm": 457.0976257324219, + "learning_rate": 4.587911123832034e-07, + "loss": 55.9204, + "step": 234630 + }, + { + "epoch": 0.9479752905861012, + "grad_norm": 663.7340087890625, + "learning_rate": 4.5819660028604717e-07, + "loss": 51.3308, + "step": 234640 + }, + { + "epoch": 0.9480156918514688, + "grad_norm": 686.4746704101562, + "learning_rate": 4.576024691664094e-07, + "loss": 65.2347, + "step": 234650 + }, + { + "epoch": 0.9480560931168365, + "grad_norm": 480.69061279296875, + "learning_rate": 4.5700871903587184e-07, + "loss": 38.4723, + "step": 234660 + }, + { + "epoch": 0.948096494382204, + "grad_norm": 493.3192138671875, + "learning_rate": 4.56415349906012e-07, + "loss": 74.3777, + "step": 234670 + }, + { + "epoch": 0.9481368956475716, + "grad_norm": 835.7096557617188, + "learning_rate": 4.558223617883939e-07, + "loss": 70.8036, + "step": 234680 + }, + { + "epoch": 0.9481772969129393, + "grad_norm": 879.497802734375, + "learning_rate": 4.5522975469458384e-07, + "loss": 61.0246, + "step": 234690 + }, + { + "epoch": 0.9482176981783069, + "grad_norm": 731.796142578125, + "learning_rate": 4.546375286361304e-07, + "loss": 51.4992, + "step": 234700 + }, + { + "epoch": 0.9482580994436746, + "grad_norm": 796.3087768554688, + "learning_rate": 4.540456836245821e-07, + "loss": 73.659, + "step": 234710 + }, + { + "epoch": 0.9482985007090422, + "grad_norm": 246.27073669433594, + "learning_rate": 4.534542196714786e-07, + "loss": 63.3071, + "step": 234720 + }, + { + "epoch": 0.9483389019744098, + "grad_norm": 907.9060668945312, + "learning_rate": 4.528631367883418e-07, + "loss": 72.3688, + "step": 234730 + }, + { + "epoch": 0.9483793032397775, + "grad_norm": 942.4686279296875, + "learning_rate": 4.5227243498670693e-07, + "loss": 78.0983, + "step": 234740 + }, + { + "epoch": 0.9484197045051451, + "grad_norm": 1060.8941650390625, + "learning_rate": 4.5168211427808253e-07, + "loss": 71.8777, + "step": 234750 + }, + { + "epoch": 0.9484601057705128, + "grad_norm": 567.6964721679688, + "learning_rate": 4.5109217467398157e-07, + "loss": 41.9889, + "step": 234760 + }, + { + "epoch": 0.9485005070358804, + "grad_norm": 732.0798950195312, + "learning_rate": 4.505026161858994e-07, + "loss": 80.5222, + "step": 234770 + }, + { + "epoch": 0.948540908301248, + "grad_norm": 693.46630859375, + "learning_rate": 4.499134388253357e-07, + "loss": 56.3927, + "step": 234780 + }, + { + "epoch": 0.9485813095666156, + "grad_norm": 558.8549194335938, + "learning_rate": 4.493246426037745e-07, + "loss": 54.9188, + "step": 234790 + }, + { + "epoch": 0.9486217108319832, + "grad_norm": 695.6931762695312, + "learning_rate": 4.4873622753269786e-07, + "loss": 46.7354, + "step": 234800 + }, + { + "epoch": 0.9486621120973509, + "grad_norm": 837.0137329101562, + "learning_rate": 4.481481936235699e-07, + "loss": 51.3797, + "step": 234810 + }, + { + "epoch": 0.9487025133627185, + "grad_norm": 693.4644165039062, + "learning_rate": 4.4756054088785916e-07, + "loss": 73.5012, + "step": 234820 + }, + { + "epoch": 0.9487429146280861, + "grad_norm": 389.30108642578125, + "learning_rate": 4.4697326933702545e-07, + "loss": 53.1282, + "step": 234830 + }, + { + "epoch": 0.9487833158934538, + "grad_norm": 830.2282104492188, + "learning_rate": 4.4638637898250846e-07, + "loss": 69.6325, + "step": 234840 + }, + { + "epoch": 0.9488237171588214, + "grad_norm": 670.7050170898438, + "learning_rate": 4.457998698357613e-07, + "loss": 85.737, + "step": 234850 + }, + { + "epoch": 0.948864118424189, + "grad_norm": 1145.4442138671875, + "learning_rate": 4.452137419082103e-07, + "loss": 62.0017, + "step": 234860 + }, + { + "epoch": 0.9489045196895567, + "grad_norm": 698.0494384765625, + "learning_rate": 4.446279952112864e-07, + "loss": 85.1408, + "step": 234870 + }, + { + "epoch": 0.9489449209549243, + "grad_norm": 677.453125, + "learning_rate": 4.440426297564049e-07, + "loss": 60.8706, + "step": 234880 + }, + { + "epoch": 0.948985322220292, + "grad_norm": 603.5988159179688, + "learning_rate": 4.4345764555498325e-07, + "loss": 97.9383, + "step": 234890 + }, + { + "epoch": 0.9490257234856596, + "grad_norm": 205.38543701171875, + "learning_rate": 4.428730426184236e-07, + "loss": 39.8957, + "step": 234900 + }, + { + "epoch": 0.9490661247510273, + "grad_norm": 826.8739624023438, + "learning_rate": 4.422888209581211e-07, + "loss": 42.145, + "step": 234910 + }, + { + "epoch": 0.9491065260163948, + "grad_norm": 693.9481811523438, + "learning_rate": 4.4170498058546674e-07, + "loss": 51.9375, + "step": 234920 + }, + { + "epoch": 0.9491469272817624, + "grad_norm": 656.6866455078125, + "learning_rate": 4.411215215118425e-07, + "loss": 64.0886, + "step": 234930 + }, + { + "epoch": 0.9491873285471301, + "grad_norm": 444.6067199707031, + "learning_rate": 4.405384437486238e-07, + "loss": 50.6989, + "step": 234940 + }, + { + "epoch": 0.9492277298124977, + "grad_norm": 607.0841674804688, + "learning_rate": 4.3995574730717915e-07, + "loss": 58.1033, + "step": 234950 + }, + { + "epoch": 0.9492681310778653, + "grad_norm": 800.9891357421875, + "learning_rate": 4.393734321988685e-07, + "loss": 55.9204, + "step": 234960 + }, + { + "epoch": 0.949308532343233, + "grad_norm": 700.8385620117188, + "learning_rate": 4.387914984350405e-07, + "loss": 48.4846, + "step": 234970 + }, + { + "epoch": 0.9493489336086006, + "grad_norm": 616.773193359375, + "learning_rate": 4.3820994602704615e-07, + "loss": 47.192, + "step": 234980 + }, + { + "epoch": 0.9493893348739683, + "grad_norm": 1435.4676513671875, + "learning_rate": 4.376287749862185e-07, + "loss": 86.6772, + "step": 234990 + }, + { + "epoch": 0.9494297361393359, + "grad_norm": 596.7413330078125, + "learning_rate": 4.3704798532388624e-07, + "loss": 53.4369, + "step": 235000 + }, + { + "epoch": 0.9494701374047035, + "grad_norm": 364.4523010253906, + "learning_rate": 4.364675770513804e-07, + "loss": 66.4853, + "step": 235010 + }, + { + "epoch": 0.9495105386700712, + "grad_norm": 828.792724609375, + "learning_rate": 4.358875501800075e-07, + "loss": 53.717, + "step": 235020 + }, + { + "epoch": 0.9495509399354388, + "grad_norm": 1017.1085815429688, + "learning_rate": 4.3530790472108065e-07, + "loss": 61.2, + "step": 235030 + }, + { + "epoch": 0.9495913412008065, + "grad_norm": 1375.7813720703125, + "learning_rate": 4.347286406858975e-07, + "loss": 102.4714, + "step": 235040 + }, + { + "epoch": 0.949631742466174, + "grad_norm": 627.3450927734375, + "learning_rate": 4.3414975808575343e-07, + "loss": 41.6343, + "step": 235050 + }, + { + "epoch": 0.9496721437315416, + "grad_norm": 798.2015991210938, + "learning_rate": 4.33571256931935e-07, + "loss": 46.5809, + "step": 235060 + }, + { + "epoch": 0.9497125449969093, + "grad_norm": 1114.8077392578125, + "learning_rate": 4.3299313723571324e-07, + "loss": 69.8704, + "step": 235070 + }, + { + "epoch": 0.9497529462622769, + "grad_norm": 960.6730346679688, + "learning_rate": 4.3241539900837016e-07, + "loss": 56.6316, + "step": 235080 + }, + { + "epoch": 0.9497933475276445, + "grad_norm": 972.1450805664062, + "learning_rate": 4.3183804226115897e-07, + "loss": 80.3338, + "step": 235090 + }, + { + "epoch": 0.9498337487930122, + "grad_norm": 1154.9783935546875, + "learning_rate": 4.3126106700533966e-07, + "loss": 98.4001, + "step": 235100 + }, + { + "epoch": 0.9498741500583798, + "grad_norm": 841.7286987304688, + "learning_rate": 4.306844732521609e-07, + "loss": 69.3721, + "step": 235110 + }, + { + "epoch": 0.9499145513237475, + "grad_norm": 748.2470092773438, + "learning_rate": 4.3010826101286487e-07, + "loss": 44.9221, + "step": 235120 + }, + { + "epoch": 0.9499549525891151, + "grad_norm": 605.0430908203125, + "learning_rate": 4.2953243029868254e-07, + "loss": 58.2044, + "step": 235130 + }, + { + "epoch": 0.9499953538544827, + "grad_norm": 538.6113891601562, + "learning_rate": 4.289569811208405e-07, + "loss": 54.1663, + "step": 235140 + }, + { + "epoch": 0.9500357551198504, + "grad_norm": 1221.5484619140625, + "learning_rate": 4.283819134905587e-07, + "loss": 66.2714, + "step": 235150 + }, + { + "epoch": 0.950076156385218, + "grad_norm": 484.6291198730469, + "learning_rate": 4.278072274190459e-07, + "loss": 78.0136, + "step": 235160 + }, + { + "epoch": 0.9501165576505857, + "grad_norm": 846.1554565429688, + "learning_rate": 4.2723292291751097e-07, + "loss": 62.3136, + "step": 235170 + }, + { + "epoch": 0.9501569589159532, + "grad_norm": 1358.036376953125, + "learning_rate": 4.2665899999714265e-07, + "loss": 90.3919, + "step": 235180 + }, + { + "epoch": 0.9501973601813208, + "grad_norm": 689.8988647460938, + "learning_rate": 4.2608545866913873e-07, + "loss": 62.9876, + "step": 235190 + }, + { + "epoch": 0.9502377614466885, + "grad_norm": 1116.51708984375, + "learning_rate": 4.255122989446725e-07, + "loss": 74.6675, + "step": 235200 + }, + { + "epoch": 0.9502781627120561, + "grad_norm": 695.93994140625, + "learning_rate": 4.2493952083492387e-07, + "loss": 70.5049, + "step": 235210 + }, + { + "epoch": 0.9503185639774238, + "grad_norm": 715.4544067382812, + "learning_rate": 4.24367124351055e-07, + "loss": 56.9771, + "step": 235220 + }, + { + "epoch": 0.9503589652427914, + "grad_norm": 731.966796875, + "learning_rate": 4.237951095042281e-07, + "loss": 45.1958, + "step": 235230 + }, + { + "epoch": 0.950399366508159, + "grad_norm": 548.3452758789062, + "learning_rate": 4.232234763055965e-07, + "loss": 51.1183, + "step": 235240 + }, + { + "epoch": 0.9504397677735267, + "grad_norm": 768.3292846679688, + "learning_rate": 4.226522247663001e-07, + "loss": 73.2974, + "step": 235250 + }, + { + "epoch": 0.9504801690388943, + "grad_norm": 491.4766845703125, + "learning_rate": 4.220813548974767e-07, + "loss": 64.3005, + "step": 235260 + }, + { + "epoch": 0.950520570304262, + "grad_norm": 435.3142395019531, + "learning_rate": 4.2151086671025744e-07, + "loss": 60.4496, + "step": 235270 + }, + { + "epoch": 0.9505609715696296, + "grad_norm": 424.6413269042969, + "learning_rate": 4.209407602157667e-07, + "loss": 77.5203, + "step": 235280 + }, + { + "epoch": 0.9506013728349972, + "grad_norm": 624.95458984375, + "learning_rate": 4.2037103542511117e-07, + "loss": 57.0154, + "step": 235290 + }, + { + "epoch": 0.9506417741003649, + "grad_norm": 906.6744995117188, + "learning_rate": 4.1980169234940415e-07, + "loss": 59.0553, + "step": 235300 + }, + { + "epoch": 0.9506821753657324, + "grad_norm": 836.2215576171875, + "learning_rate": 4.192327309997435e-07, + "loss": 43.3436, + "step": 235310 + }, + { + "epoch": 0.9507225766311, + "grad_norm": 677.5031127929688, + "learning_rate": 4.186641513872225e-07, + "loss": 50.3293, + "step": 235320 + }, + { + "epoch": 0.9507629778964677, + "grad_norm": 489.1700134277344, + "learning_rate": 4.1809595352292564e-07, + "loss": 54.0315, + "step": 235330 + }, + { + "epoch": 0.9508033791618353, + "grad_norm": 693.84716796875, + "learning_rate": 4.175281374179263e-07, + "loss": 95.3561, + "step": 235340 + }, + { + "epoch": 0.950843780427203, + "grad_norm": 475.7438049316406, + "learning_rate": 4.1696070308330225e-07, + "loss": 68.9755, + "step": 235350 + }, + { + "epoch": 0.9508841816925706, + "grad_norm": 520.8013305664062, + "learning_rate": 4.1639365053010693e-07, + "loss": 83.7089, + "step": 235360 + }, + { + "epoch": 0.9509245829579382, + "grad_norm": 724.9129638671875, + "learning_rate": 4.158269797694026e-07, + "loss": 72.6468, + "step": 235370 + }, + { + "epoch": 0.9509649842233059, + "grad_norm": 2468.4033203125, + "learning_rate": 4.152606908122314e-07, + "loss": 77.5177, + "step": 235380 + }, + { + "epoch": 0.9510053854886735, + "grad_norm": 496.4300537109375, + "learning_rate": 4.14694783669638e-07, + "loss": 42.5771, + "step": 235390 + }, + { + "epoch": 0.9510457867540412, + "grad_norm": 827.1248779296875, + "learning_rate": 4.141292583526535e-07, + "loss": 73.1997, + "step": 235400 + }, + { + "epoch": 0.9510861880194088, + "grad_norm": 512.2448120117188, + "learning_rate": 4.135641148723002e-07, + "loss": 71.7726, + "step": 235410 + }, + { + "epoch": 0.9511265892847764, + "grad_norm": 554.381103515625, + "learning_rate": 4.129993532396004e-07, + "loss": 88.0088, + "step": 235420 + }, + { + "epoch": 0.951166990550144, + "grad_norm": 554.2191162109375, + "learning_rate": 4.1243497346556084e-07, + "loss": 75.483, + "step": 235430 + }, + { + "epoch": 0.9512073918155116, + "grad_norm": 461.0707702636719, + "learning_rate": 4.1187097556118827e-07, + "loss": 54.9427, + "step": 235440 + }, + { + "epoch": 0.9512477930808793, + "grad_norm": 319.05572509765625, + "learning_rate": 4.113073595374717e-07, + "loss": 44.3392, + "step": 235450 + }, + { + "epoch": 0.9512881943462469, + "grad_norm": 240.14779663085938, + "learning_rate": 4.1074412540540677e-07, + "loss": 50.6272, + "step": 235460 + }, + { + "epoch": 0.9513285956116145, + "grad_norm": 722.4546508789062, + "learning_rate": 4.1018127317596913e-07, + "loss": 54.7639, + "step": 235470 + }, + { + "epoch": 0.9513689968769822, + "grad_norm": 700.0820922851562, + "learning_rate": 4.0961880286013224e-07, + "loss": 50.7018, + "step": 235480 + }, + { + "epoch": 0.9514093981423498, + "grad_norm": 750.8668212890625, + "learning_rate": 4.0905671446886284e-07, + "loss": 48.9748, + "step": 235490 + }, + { + "epoch": 0.9514497994077175, + "grad_norm": 803.359375, + "learning_rate": 4.0849500801311893e-07, + "loss": 54.2249, + "step": 235500 + }, + { + "epoch": 0.9514902006730851, + "grad_norm": 1167.4573974609375, + "learning_rate": 4.0793368350385167e-07, + "loss": 54.4831, + "step": 235510 + }, + { + "epoch": 0.9515306019384527, + "grad_norm": 839.8875732421875, + "learning_rate": 4.073727409520034e-07, + "loss": 48.0644, + "step": 235520 + }, + { + "epoch": 0.9515710032038204, + "grad_norm": 725.3522338867188, + "learning_rate": 4.068121803685121e-07, + "loss": 60.7041, + "step": 235530 + }, + { + "epoch": 0.951611404469188, + "grad_norm": 624.162353515625, + "learning_rate": 4.0625200176430237e-07, + "loss": 79.7272, + "step": 235540 + }, + { + "epoch": 0.9516518057345557, + "grad_norm": 1107.8369140625, + "learning_rate": 4.056922051502987e-07, + "loss": 60.9895, + "step": 235550 + }, + { + "epoch": 0.9516922069999232, + "grad_norm": 2111.531494140625, + "learning_rate": 4.051327905374125e-07, + "loss": 84.4139, + "step": 235560 + }, + { + "epoch": 0.9517326082652908, + "grad_norm": 505.4308776855469, + "learning_rate": 4.0457375793655273e-07, + "loss": 42.467, + "step": 235570 + }, + { + "epoch": 0.9517730095306585, + "grad_norm": 512.9215698242188, + "learning_rate": 4.040151073586129e-07, + "loss": 79.6846, + "step": 235580 + }, + { + "epoch": 0.9518134107960261, + "grad_norm": 671.8743286132812, + "learning_rate": 4.0345683881448884e-07, + "loss": 58.3531, + "step": 235590 + }, + { + "epoch": 0.9518538120613937, + "grad_norm": 463.037841796875, + "learning_rate": 4.028989523150628e-07, + "loss": 90.8914, + "step": 235600 + }, + { + "epoch": 0.9518942133267614, + "grad_norm": 409.0661315917969, + "learning_rate": 4.0234144787121065e-07, + "loss": 66.4023, + "step": 235610 + }, + { + "epoch": 0.951934614592129, + "grad_norm": 571.927734375, + "learning_rate": 4.0178432549380365e-07, + "loss": 65.2371, + "step": 235620 + }, + { + "epoch": 0.9519750158574967, + "grad_norm": 648.2694702148438, + "learning_rate": 4.012275851936953e-07, + "loss": 91.4356, + "step": 235630 + }, + { + "epoch": 0.9520154171228643, + "grad_norm": 516.4229736328125, + "learning_rate": 4.0067122698175253e-07, + "loss": 68.2252, + "step": 235640 + }, + { + "epoch": 0.952055818388232, + "grad_norm": 796.6845092773438, + "learning_rate": 4.001152508688089e-07, + "loss": 46.5367, + "step": 235650 + }, + { + "epoch": 0.9520962196535996, + "grad_norm": 697.3612060546875, + "learning_rate": 3.9955965686571117e-07, + "loss": 64.562, + "step": 235660 + }, + { + "epoch": 0.9521366209189672, + "grad_norm": 840.4072875976562, + "learning_rate": 3.9900444498329084e-07, + "loss": 48.1811, + "step": 235670 + }, + { + "epoch": 0.9521770221843349, + "grad_norm": 993.1727905273438, + "learning_rate": 3.9844961523236803e-07, + "loss": 66.8511, + "step": 235680 + }, + { + "epoch": 0.9522174234497024, + "grad_norm": 954.697509765625, + "learning_rate": 3.9789516762376303e-07, + "loss": 86.1278, + "step": 235690 + }, + { + "epoch": 0.95225782471507, + "grad_norm": 661.9251098632812, + "learning_rate": 3.9734110216828047e-07, + "loss": 79.7957, + "step": 235700 + }, + { + "epoch": 0.9522982259804377, + "grad_norm": 1036.08349609375, + "learning_rate": 3.9678741887672733e-07, + "loss": 102.3452, + "step": 235710 + }, + { + "epoch": 0.9523386272458053, + "grad_norm": 868.0712890625, + "learning_rate": 3.9623411775989496e-07, + "loss": 79.5763, + "step": 235720 + }, + { + "epoch": 0.952379028511173, + "grad_norm": 453.89764404296875, + "learning_rate": 3.956811988285747e-07, + "loss": 51.0986, + "step": 235730 + }, + { + "epoch": 0.9524194297765406, + "grad_norm": 655.70947265625, + "learning_rate": 3.9512866209354016e-07, + "loss": 62.8921, + "step": 235740 + }, + { + "epoch": 0.9524598310419082, + "grad_norm": 388.8795471191406, + "learning_rate": 3.9457650756556497e-07, + "loss": 101.7145, + "step": 235750 + }, + { + "epoch": 0.9525002323072759, + "grad_norm": 729.4425659179688, + "learning_rate": 3.9402473525541384e-07, + "loss": 38.8526, + "step": 235760 + }, + { + "epoch": 0.9525406335726435, + "grad_norm": 377.1321105957031, + "learning_rate": 3.9347334517384483e-07, + "loss": 46.5019, + "step": 235770 + }, + { + "epoch": 0.9525810348380112, + "grad_norm": 570.2969970703125, + "learning_rate": 3.929223373316071e-07, + "loss": 56.8005, + "step": 235780 + }, + { + "epoch": 0.9526214361033788, + "grad_norm": 349.6820068359375, + "learning_rate": 3.92371711739441e-07, + "loss": 56.1707, + "step": 235790 + }, + { + "epoch": 0.9526618373687464, + "grad_norm": 684.0291748046875, + "learning_rate": 3.9182146840808676e-07, + "loss": 58.1934, + "step": 235800 + }, + { + "epoch": 0.9527022386341141, + "grad_norm": 570.1740112304688, + "learning_rate": 3.9127160734826696e-07, + "loss": 65.0825, + "step": 235810 + }, + { + "epoch": 0.9527426398994816, + "grad_norm": 1957.2869873046875, + "learning_rate": 3.9072212857070193e-07, + "loss": 86.4069, + "step": 235820 + }, + { + "epoch": 0.9527830411648492, + "grad_norm": 320.3884582519531, + "learning_rate": 3.90173032086103e-07, + "loss": 49.7391, + "step": 235830 + }, + { + "epoch": 0.9528234424302169, + "grad_norm": 423.0434875488281, + "learning_rate": 3.896243179051773e-07, + "loss": 63.225, + "step": 235840 + }, + { + "epoch": 0.9528638436955845, + "grad_norm": 942.9512329101562, + "learning_rate": 3.8907598603862506e-07, + "loss": 90.9296, + "step": 235850 + }, + { + "epoch": 0.9529042449609522, + "grad_norm": 772.11962890625, + "learning_rate": 3.885280364971289e-07, + "loss": 78.4651, + "step": 235860 + }, + { + "epoch": 0.9529446462263198, + "grad_norm": 442.9648132324219, + "learning_rate": 3.879804692913758e-07, + "loss": 44.213, + "step": 235870 + }, + { + "epoch": 0.9529850474916874, + "grad_norm": 32.1263313293457, + "learning_rate": 3.874332844320416e-07, + "loss": 41.1149, + "step": 235880 + }, + { + "epoch": 0.9530254487570551, + "grad_norm": 440.0835266113281, + "learning_rate": 3.868864819297935e-07, + "loss": 96.6042, + "step": 235890 + }, + { + "epoch": 0.9530658500224227, + "grad_norm": 686.2116088867188, + "learning_rate": 3.863400617952873e-07, + "loss": 55.6019, + "step": 235900 + }, + { + "epoch": 0.9531062512877904, + "grad_norm": 633.6091918945312, + "learning_rate": 3.857940240391833e-07, + "loss": 68.6228, + "step": 235910 + }, + { + "epoch": 0.953146652553158, + "grad_norm": 689.2178344726562, + "learning_rate": 3.85248368672122e-07, + "loss": 51.3372, + "step": 235920 + }, + { + "epoch": 0.9531870538185256, + "grad_norm": 509.5731506347656, + "learning_rate": 3.847030957047415e-07, + "loss": 65.0596, + "step": 235930 + }, + { + "epoch": 0.9532274550838933, + "grad_norm": 741.503173828125, + "learning_rate": 3.8415820514767333e-07, + "loss": 51.1944, + "step": 235940 + }, + { + "epoch": 0.9532678563492608, + "grad_norm": 808.8563232421875, + "learning_rate": 3.8361369701153786e-07, + "loss": 56.1013, + "step": 235950 + }, + { + "epoch": 0.9533082576146285, + "grad_norm": 510.282470703125, + "learning_rate": 3.8306957130695546e-07, + "loss": 43.1821, + "step": 235960 + }, + { + "epoch": 0.9533486588799961, + "grad_norm": 938.8242797851562, + "learning_rate": 3.8252582804452877e-07, + "loss": 95.6011, + "step": 235970 + }, + { + "epoch": 0.9533890601453637, + "grad_norm": 1272.068359375, + "learning_rate": 3.8198246723486263e-07, + "loss": 70.7242, + "step": 235980 + }, + { + "epoch": 0.9534294614107314, + "grad_norm": 387.8425598144531, + "learning_rate": 3.814394888885464e-07, + "loss": 61.7216, + "step": 235990 + }, + { + "epoch": 0.953469862676099, + "grad_norm": 1271.0938720703125, + "learning_rate": 3.8089689301616936e-07, + "loss": 80.6947, + "step": 236000 + }, + { + "epoch": 0.9535102639414667, + "grad_norm": 638.90087890625, + "learning_rate": 3.803546796283075e-07, + "loss": 67.1363, + "step": 236010 + }, + { + "epoch": 0.9535506652068343, + "grad_norm": 983.7998046875, + "learning_rate": 3.7981284873553235e-07, + "loss": 74.3561, + "step": 236020 + }, + { + "epoch": 0.9535910664722019, + "grad_norm": 741.2149047851562, + "learning_rate": 3.792714003484066e-07, + "loss": 52.1, + "step": 236030 + }, + { + "epoch": 0.9536314677375696, + "grad_norm": 785.932861328125, + "learning_rate": 3.787303344774862e-07, + "loss": 49.8547, + "step": 236040 + }, + { + "epoch": 0.9536718690029372, + "grad_norm": 1281.7872314453125, + "learning_rate": 3.7818965113332053e-07, + "loss": 54.5387, + "step": 236050 + }, + { + "epoch": 0.9537122702683049, + "grad_norm": 321.94244384765625, + "learning_rate": 3.7764935032644557e-07, + "loss": 63.0385, + "step": 236060 + }, + { + "epoch": 0.9537526715336724, + "grad_norm": 280.6268615722656, + "learning_rate": 3.7710943206740405e-07, + "loss": 36.6843, + "step": 236070 + }, + { + "epoch": 0.95379307279904, + "grad_norm": 1159.315673828125, + "learning_rate": 3.7656989636671416e-07, + "loss": 77.439, + "step": 236080 + }, + { + "epoch": 0.9538334740644077, + "grad_norm": 312.18609619140625, + "learning_rate": 3.7603074323489643e-07, + "loss": 52.3374, + "step": 236090 + }, + { + "epoch": 0.9538738753297753, + "grad_norm": 541.7654418945312, + "learning_rate": 3.754919726824646e-07, + "loss": 53.0667, + "step": 236100 + }, + { + "epoch": 0.9539142765951429, + "grad_norm": 478.04510498046875, + "learning_rate": 3.7495358471991706e-07, + "loss": 56.2687, + "step": 236110 + }, + { + "epoch": 0.9539546778605106, + "grad_norm": 448.8958435058594, + "learning_rate": 3.744155793577564e-07, + "loss": 71.8129, + "step": 236120 + }, + { + "epoch": 0.9539950791258782, + "grad_norm": 670.3041381835938, + "learning_rate": 3.7387795660646543e-07, + "loss": 65.5519, + "step": 236130 + }, + { + "epoch": 0.9540354803912459, + "grad_norm": 538.302734375, + "learning_rate": 3.7334071647653124e-07, + "loss": 48.0451, + "step": 236140 + }, + { + "epoch": 0.9540758816566135, + "grad_norm": 864.119873046875, + "learning_rate": 3.7280385897842107e-07, + "loss": 83.7917, + "step": 236150 + }, + { + "epoch": 0.9541162829219811, + "grad_norm": 730.9628295898438, + "learning_rate": 3.7226738412260434e-07, + "loss": 52.3533, + "step": 236160 + }, + { + "epoch": 0.9541566841873488, + "grad_norm": 753.4501342773438, + "learning_rate": 3.717312919195415e-07, + "loss": 65.4793, + "step": 236170 + }, + { + "epoch": 0.9541970854527164, + "grad_norm": 240.24183654785156, + "learning_rate": 3.71195582379682e-07, + "loss": 68.4022, + "step": 236180 + }, + { + "epoch": 0.9542374867180841, + "grad_norm": 1084.245849609375, + "learning_rate": 3.7066025551347083e-07, + "loss": 67.6095, + "step": 236190 + }, + { + "epoch": 0.9542778879834516, + "grad_norm": 527.28759765625, + "learning_rate": 3.701253113313419e-07, + "loss": 54.5545, + "step": 236200 + }, + { + "epoch": 0.9543182892488192, + "grad_norm": 326.1788330078125, + "learning_rate": 3.69590749843729e-07, + "loss": 44.8746, + "step": 236210 + }, + { + "epoch": 0.9543586905141869, + "grad_norm": 682.8881225585938, + "learning_rate": 3.690565710610483e-07, + "loss": 60.6038, + "step": 236220 + }, + { + "epoch": 0.9543990917795545, + "grad_norm": 709.4285278320312, + "learning_rate": 3.685227749937181e-07, + "loss": 59.0528, + "step": 236230 + }, + { + "epoch": 0.9544394930449221, + "grad_norm": 634.0092163085938, + "learning_rate": 3.6798936165214127e-07, + "loss": 68.8886, + "step": 236240 + }, + { + "epoch": 0.9544798943102898, + "grad_norm": 859.6212158203125, + "learning_rate": 3.674563310467205e-07, + "loss": 56.0494, + "step": 236250 + }, + { + "epoch": 0.9545202955756574, + "grad_norm": 345.4490661621094, + "learning_rate": 3.6692368318784753e-07, + "loss": 57.541, + "step": 236260 + }, + { + "epoch": 0.9545606968410251, + "grad_norm": 474.47076416015625, + "learning_rate": 3.6639141808590295e-07, + "loss": 63.8743, + "step": 236270 + }, + { + "epoch": 0.9546010981063927, + "grad_norm": 727.4961547851562, + "learning_rate": 3.658595357512673e-07, + "loss": 48.834, + "step": 236280 + }, + { + "epoch": 0.9546414993717603, + "grad_norm": 527.1844482421875, + "learning_rate": 3.653280361943057e-07, + "loss": 56.6826, + "step": 236290 + }, + { + "epoch": 0.954681900637128, + "grad_norm": 946.5283203125, + "learning_rate": 3.647969194253853e-07, + "loss": 68.0202, + "step": 236300 + }, + { + "epoch": 0.9547223019024956, + "grad_norm": 713.9017944335938, + "learning_rate": 3.642661854548579e-07, + "loss": 69.9208, + "step": 236310 + }, + { + "epoch": 0.9547627031678633, + "grad_norm": 422.16351318359375, + "learning_rate": 3.6373583429306856e-07, + "loss": 53.2153, + "step": 236320 + }, + { + "epoch": 0.9548031044332308, + "grad_norm": 466.31317138671875, + "learning_rate": 3.6320586595036013e-07, + "loss": 100.5584, + "step": 236330 + }, + { + "epoch": 0.9548435056985984, + "grad_norm": 489.7778625488281, + "learning_rate": 3.6267628043706203e-07, + "loss": 53.3589, + "step": 236340 + }, + { + "epoch": 0.9548839069639661, + "grad_norm": 1150.7440185546875, + "learning_rate": 3.6214707776350166e-07, + "loss": 94.0911, + "step": 236350 + }, + { + "epoch": 0.9549243082293337, + "grad_norm": 697.0868530273438, + "learning_rate": 3.6161825793999075e-07, + "loss": 67.8925, + "step": 236360 + }, + { + "epoch": 0.9549647094947014, + "grad_norm": 456.2440185546875, + "learning_rate": 3.610898209768454e-07, + "loss": 65.1313, + "step": 236370 + }, + { + "epoch": 0.955005110760069, + "grad_norm": 326.5404968261719, + "learning_rate": 3.6056176688436195e-07, + "loss": 58.8965, + "step": 236380 + }, + { + "epoch": 0.9550455120254366, + "grad_norm": 535.1408081054688, + "learning_rate": 3.600340956728432e-07, + "loss": 68.1938, + "step": 236390 + }, + { + "epoch": 0.9550859132908043, + "grad_norm": 588.58837890625, + "learning_rate": 3.5950680735256537e-07, + "loss": 67.0128, + "step": 236400 + }, + { + "epoch": 0.9551263145561719, + "grad_norm": 574.8043823242188, + "learning_rate": 3.5897990193381583e-07, + "loss": 92.801, + "step": 236410 + }, + { + "epoch": 0.9551667158215396, + "grad_norm": 542.266845703125, + "learning_rate": 3.584533794268663e-07, + "loss": 46.1939, + "step": 236420 + }, + { + "epoch": 0.9552071170869072, + "grad_norm": 564.576171875, + "learning_rate": 3.5792723984197973e-07, + "loss": 61.7703, + "step": 236430 + }, + { + "epoch": 0.9552475183522748, + "grad_norm": 601.9371948242188, + "learning_rate": 3.574014831894124e-07, + "loss": 55.0693, + "step": 236440 + }, + { + "epoch": 0.9552879196176425, + "grad_norm": 415.26092529296875, + "learning_rate": 3.5687610947941596e-07, + "loss": 64.347, + "step": 236450 + }, + { + "epoch": 0.95532832088301, + "grad_norm": 2099.2041015625, + "learning_rate": 3.5635111872223574e-07, + "loss": 79.3616, + "step": 236460 + }, + { + "epoch": 0.9553687221483776, + "grad_norm": 711.826904296875, + "learning_rate": 3.5582651092810117e-07, + "loss": 52.4093, + "step": 236470 + }, + { + "epoch": 0.9554091234137453, + "grad_norm": 526.6881713867188, + "learning_rate": 3.5530228610724416e-07, + "loss": 52.232, + "step": 236480 + }, + { + "epoch": 0.9554495246791129, + "grad_norm": 361.52667236328125, + "learning_rate": 3.54778444269881e-07, + "loss": 37.3851, + "step": 236490 + }, + { + "epoch": 0.9554899259444806, + "grad_norm": 1164.1512451171875, + "learning_rate": 3.5425498542622784e-07, + "loss": 65.2844, + "step": 236500 + }, + { + "epoch": 0.9555303272098482, + "grad_norm": 620.1638793945312, + "learning_rate": 3.5373190958648553e-07, + "loss": 59.5116, + "step": 236510 + }, + { + "epoch": 0.9555707284752158, + "grad_norm": 469.30731201171875, + "learning_rate": 3.5320921676085697e-07, + "loss": 53.9594, + "step": 236520 + }, + { + "epoch": 0.9556111297405835, + "grad_norm": 817.2886352539062, + "learning_rate": 3.5268690695952736e-07, + "loss": 114.1811, + "step": 236530 + }, + { + "epoch": 0.9556515310059511, + "grad_norm": 720.5396118164062, + "learning_rate": 3.5216498019268185e-07, + "loss": 59.8288, + "step": 236540 + }, + { + "epoch": 0.9556919322713188, + "grad_norm": 641.9713745117188, + "learning_rate": 3.51643436470499e-07, + "loss": 51.603, + "step": 236550 + }, + { + "epoch": 0.9557323335366864, + "grad_norm": 707.8326416015625, + "learning_rate": 3.511222758031374e-07, + "loss": 81.3619, + "step": 236560 + }, + { + "epoch": 0.955772734802054, + "grad_norm": 934.9730224609375, + "learning_rate": 3.506014982007666e-07, + "loss": 80.4584, + "step": 236570 + }, + { + "epoch": 0.9558131360674217, + "grad_norm": 468.0377502441406, + "learning_rate": 3.500811036735341e-07, + "loss": 55.6818, + "step": 236580 + }, + { + "epoch": 0.9558535373327892, + "grad_norm": 364.25982666015625, + "learning_rate": 3.495610922315873e-07, + "loss": 33.2555, + "step": 236590 + }, + { + "epoch": 0.9558939385981569, + "grad_norm": 2939.62353515625, + "learning_rate": 3.4904146388506475e-07, + "loss": 67.6984, + "step": 236600 + }, + { + "epoch": 0.9559343398635245, + "grad_norm": 444.97674560546875, + "learning_rate": 3.4852221864409396e-07, + "loss": 36.8792, + "step": 236610 + }, + { + "epoch": 0.9559747411288921, + "grad_norm": 543.7769775390625, + "learning_rate": 3.480033565188023e-07, + "loss": 60.7682, + "step": 236620 + }, + { + "epoch": 0.9560151423942598, + "grad_norm": 746.6734619140625, + "learning_rate": 3.474848775193018e-07, + "loss": 75.4444, + "step": 236630 + }, + { + "epoch": 0.9560555436596274, + "grad_norm": 383.5698547363281, + "learning_rate": 3.469667816557021e-07, + "loss": 55.8412, + "step": 236640 + }, + { + "epoch": 0.9560959449249951, + "grad_norm": 1033.5953369140625, + "learning_rate": 3.464490689381017e-07, + "loss": 73.5802, + "step": 236650 + }, + { + "epoch": 0.9561363461903627, + "grad_norm": 491.27142333984375, + "learning_rate": 3.4593173937659486e-07, + "loss": 70.4582, + "step": 236660 + }, + { + "epoch": 0.9561767474557303, + "grad_norm": 784.9024658203125, + "learning_rate": 3.454147929812668e-07, + "loss": 62.2391, + "step": 236670 + }, + { + "epoch": 0.956217148721098, + "grad_norm": 684.3553466796875, + "learning_rate": 3.448982297621983e-07, + "loss": 61.4838, + "step": 236680 + }, + { + "epoch": 0.9562575499864656, + "grad_norm": 693.7616577148438, + "learning_rate": 3.44382049729457e-07, + "loss": 65.2208, + "step": 236690 + }, + { + "epoch": 0.9562979512518333, + "grad_norm": 348.4949035644531, + "learning_rate": 3.438662528931058e-07, + "loss": 71.5766, + "step": 236700 + }, + { + "epoch": 0.9563383525172008, + "grad_norm": 509.93255615234375, + "learning_rate": 3.433508392632057e-07, + "loss": 61.2813, + "step": 236710 + }, + { + "epoch": 0.9563787537825684, + "grad_norm": 442.003173828125, + "learning_rate": 3.428358088497952e-07, + "loss": 69.0677, + "step": 236720 + }, + { + "epoch": 0.9564191550479361, + "grad_norm": 487.98748779296875, + "learning_rate": 3.423211616629263e-07, + "loss": 53.3586, + "step": 236730 + }, + { + "epoch": 0.9564595563133037, + "grad_norm": 1052.620361328125, + "learning_rate": 3.418068977126221e-07, + "loss": 58.9703, + "step": 236740 + }, + { + "epoch": 0.9564999575786713, + "grad_norm": 207.8184814453125, + "learning_rate": 3.412930170089168e-07, + "loss": 90.7152, + "step": 236750 + }, + { + "epoch": 0.956540358844039, + "grad_norm": 1430.4183349609375, + "learning_rate": 3.407795195618224e-07, + "loss": 84.5885, + "step": 236760 + }, + { + "epoch": 0.9565807601094066, + "grad_norm": 606.4662475585938, + "learning_rate": 3.4026640538135313e-07, + "loss": 67.0635, + "step": 236770 + }, + { + "epoch": 0.9566211613747743, + "grad_norm": 975.6383056640625, + "learning_rate": 3.3975367447751207e-07, + "loss": 103.9134, + "step": 236780 + }, + { + "epoch": 0.9566615626401419, + "grad_norm": 794.0836791992188, + "learning_rate": 3.3924132686029564e-07, + "loss": 72.0782, + "step": 236790 + }, + { + "epoch": 0.9567019639055095, + "grad_norm": 366.5487365722656, + "learning_rate": 3.3872936253969147e-07, + "loss": 43.2643, + "step": 236800 + }, + { + "epoch": 0.9567423651708772, + "grad_norm": 1014.2610473632812, + "learning_rate": 3.382177815256804e-07, + "loss": 67.5744, + "step": 236810 + }, + { + "epoch": 0.9567827664362448, + "grad_norm": 526.3309936523438, + "learning_rate": 3.377065838282345e-07, + "loss": 84.3396, + "step": 236820 + }, + { + "epoch": 0.9568231677016125, + "grad_norm": 364.19622802734375, + "learning_rate": 3.3719576945732355e-07, + "loss": 68.52, + "step": 236830 + }, + { + "epoch": 0.95686356896698, + "grad_norm": 486.49041748046875, + "learning_rate": 3.366853384229063e-07, + "loss": 55.953, + "step": 236840 + }, + { + "epoch": 0.9569039702323476, + "grad_norm": 990.9874267578125, + "learning_rate": 3.361752907349258e-07, + "loss": 64.7564, + "step": 236850 + }, + { + "epoch": 0.9569443714977153, + "grad_norm": 668.7334594726562, + "learning_rate": 3.3566562640333645e-07, + "loss": 61.3991, + "step": 236860 + }, + { + "epoch": 0.9569847727630829, + "grad_norm": 466.99383544921875, + "learning_rate": 3.35156345438068e-07, + "loss": 61.2397, + "step": 236870 + }, + { + "epoch": 0.9570251740284506, + "grad_norm": 653.1546020507812, + "learning_rate": 3.3464744784905243e-07, + "loss": 69.4675, + "step": 236880 + }, + { + "epoch": 0.9570655752938182, + "grad_norm": 624.4383544921875, + "learning_rate": 3.3413893364620863e-07, + "loss": 66.9108, + "step": 236890 + }, + { + "epoch": 0.9571059765591858, + "grad_norm": 876.7080688476562, + "learning_rate": 3.336308028394486e-07, + "loss": 68.9139, + "step": 236900 + }, + { + "epoch": 0.9571463778245535, + "grad_norm": 288.67352294921875, + "learning_rate": 3.331230554386822e-07, + "loss": 59.0026, + "step": 236910 + }, + { + "epoch": 0.9571867790899211, + "grad_norm": 284.9959411621094, + "learning_rate": 3.3261569145380813e-07, + "loss": 61.0709, + "step": 236920 + }, + { + "epoch": 0.9572271803552888, + "grad_norm": 344.6959228515625, + "learning_rate": 3.321087108947163e-07, + "loss": 125.9614, + "step": 236930 + }, + { + "epoch": 0.9572675816206564, + "grad_norm": 505.90350341796875, + "learning_rate": 3.316021137712899e-07, + "loss": 68.9417, + "step": 236940 + }, + { + "epoch": 0.957307982886024, + "grad_norm": 557.0088500976562, + "learning_rate": 3.3109590009340777e-07, + "loss": 42.2413, + "step": 236950 + }, + { + "epoch": 0.9573483841513917, + "grad_norm": 790.2202758789062, + "learning_rate": 3.3059006987093525e-07, + "loss": 53.3484, + "step": 236960 + }, + { + "epoch": 0.9573887854167592, + "grad_norm": 565.7665405273438, + "learning_rate": 3.300846231137378e-07, + "loss": 54.214, + "step": 236970 + }, + { + "epoch": 0.9574291866821268, + "grad_norm": 611.58935546875, + "learning_rate": 3.2957955983166534e-07, + "loss": 88.4638, + "step": 236980 + }, + { + "epoch": 0.9574695879474945, + "grad_norm": 1402.4521484375, + "learning_rate": 3.290748800345678e-07, + "loss": 59.8066, + "step": 236990 + }, + { + "epoch": 0.9575099892128621, + "grad_norm": 1021.827392578125, + "learning_rate": 3.285705837322839e-07, + "loss": 76.1837, + "step": 237000 + }, + { + "epoch": 0.9575503904782298, + "grad_norm": 586.8858642578125, + "learning_rate": 3.2806667093463917e-07, + "loss": 66.1892, + "step": 237010 + }, + { + "epoch": 0.9575907917435974, + "grad_norm": 1830.6348876953125, + "learning_rate": 3.27563141651468e-07, + "loss": 85.2898, + "step": 237020 + }, + { + "epoch": 0.957631193008965, + "grad_norm": 582.68896484375, + "learning_rate": 3.270599958925802e-07, + "loss": 56.6746, + "step": 237030 + }, + { + "epoch": 0.9576715942743327, + "grad_norm": 1010.6248168945312, + "learning_rate": 3.2655723366778357e-07, + "loss": 64.7456, + "step": 237040 + }, + { + "epoch": 0.9577119955397003, + "grad_norm": 1027.7203369140625, + "learning_rate": 3.2605485498688583e-07, + "loss": 90.3126, + "step": 237050 + }, + { + "epoch": 0.957752396805068, + "grad_norm": 1272.510986328125, + "learning_rate": 3.255528598596724e-07, + "loss": 121.2079, + "step": 237060 + }, + { + "epoch": 0.9577927980704356, + "grad_norm": 1036.3125, + "learning_rate": 3.2505124829594e-07, + "loss": 85.1359, + "step": 237070 + }, + { + "epoch": 0.9578331993358032, + "grad_norm": 285.6571044921875, + "learning_rate": 3.245500203054608e-07, + "loss": 58.4977, + "step": 237080 + }, + { + "epoch": 0.9578736006011709, + "grad_norm": 769.3865356445312, + "learning_rate": 3.240491758980091e-07, + "loss": 70.7219, + "step": 237090 + }, + { + "epoch": 0.9579140018665384, + "grad_norm": 452.3763427734375, + "learning_rate": 3.2354871508334826e-07, + "loss": 62.9213, + "step": 237100 + }, + { + "epoch": 0.957954403131906, + "grad_norm": 591.8158569335938, + "learning_rate": 3.2304863787123496e-07, + "loss": 63.3343, + "step": 237110 + }, + { + "epoch": 0.9579948043972737, + "grad_norm": 587.1279907226562, + "learning_rate": 3.2254894427141913e-07, + "loss": 67.9495, + "step": 237120 + }, + { + "epoch": 0.9580352056626413, + "grad_norm": 795.4450073242188, + "learning_rate": 3.220496342936441e-07, + "loss": 60.1946, + "step": 237130 + }, + { + "epoch": 0.958075606928009, + "grad_norm": 696.09033203125, + "learning_rate": 3.2155070794763986e-07, + "loss": 62.1566, + "step": 237140 + }, + { + "epoch": 0.9581160081933766, + "grad_norm": 546.9901733398438, + "learning_rate": 3.210521652431364e-07, + "loss": 79.4202, + "step": 237150 + }, + { + "epoch": 0.9581564094587443, + "grad_norm": 1057.6885986328125, + "learning_rate": 3.2055400618985266e-07, + "loss": 92.2643, + "step": 237160 + }, + { + "epoch": 0.9581968107241119, + "grad_norm": 647.5994262695312, + "learning_rate": 3.2005623079749857e-07, + "loss": 90.5568, + "step": 237170 + }, + { + "epoch": 0.9582372119894795, + "grad_norm": 856.5128173828125, + "learning_rate": 3.195588390757842e-07, + "loss": 41.7624, + "step": 237180 + }, + { + "epoch": 0.9582776132548472, + "grad_norm": 844.712158203125, + "learning_rate": 3.1906183103439734e-07, + "loss": 73.5687, + "step": 237190 + }, + { + "epoch": 0.9583180145202148, + "grad_norm": 455.146240234375, + "learning_rate": 3.1856520668303694e-07, + "loss": 65.1199, + "step": 237200 + }, + { + "epoch": 0.9583584157855825, + "grad_norm": 308.130615234375, + "learning_rate": 3.180689660313774e-07, + "loss": 66.7422, + "step": 237210 + }, + { + "epoch": 0.95839881705095, + "grad_norm": 403.02691650390625, + "learning_rate": 3.1757310908909765e-07, + "loss": 51.187, + "step": 237220 + }, + { + "epoch": 0.9584392183163176, + "grad_norm": 964.3786010742188, + "learning_rate": 3.1707763586586336e-07, + "loss": 71.0705, + "step": 237230 + }, + { + "epoch": 0.9584796195816853, + "grad_norm": 182.81382751464844, + "learning_rate": 3.1658254637133566e-07, + "loss": 55.0944, + "step": 237240 + }, + { + "epoch": 0.9585200208470529, + "grad_norm": 1216.205322265625, + "learning_rate": 3.160878406151624e-07, + "loss": 91.8328, + "step": 237250 + }, + { + "epoch": 0.9585604221124205, + "grad_norm": 664.1317138671875, + "learning_rate": 3.1559351860699137e-07, + "loss": 60.7561, + "step": 237260 + }, + { + "epoch": 0.9586008233777882, + "grad_norm": 186.23898315429688, + "learning_rate": 3.1509958035645716e-07, + "loss": 100.8867, + "step": 237270 + }, + { + "epoch": 0.9586412246431558, + "grad_norm": 315.21002197265625, + "learning_rate": 3.146060258731942e-07, + "loss": 34.4113, + "step": 237280 + }, + { + "epoch": 0.9586816259085235, + "grad_norm": 832.4974365234375, + "learning_rate": 3.1411285516682153e-07, + "loss": 71.5955, + "step": 237290 + }, + { + "epoch": 0.9587220271738911, + "grad_norm": 700.0717163085938, + "learning_rate": 3.1362006824695146e-07, + "loss": 60.2028, + "step": 237300 + }, + { + "epoch": 0.9587624284392587, + "grad_norm": 406.5939636230469, + "learning_rate": 3.1312766512319406e-07, + "loss": 55.8131, + "step": 237310 + }, + { + "epoch": 0.9588028297046264, + "grad_norm": 219.94921875, + "learning_rate": 3.126356458051505e-07, + "loss": 63.8075, + "step": 237320 + }, + { + "epoch": 0.958843230969994, + "grad_norm": 546.6136474609375, + "learning_rate": 3.1214401030240646e-07, + "loss": 73.1222, + "step": 237330 + }, + { + "epoch": 0.9588836322353617, + "grad_norm": 470.2092590332031, + "learning_rate": 3.1165275862455436e-07, + "loss": 45.2178, + "step": 237340 + }, + { + "epoch": 0.9589240335007292, + "grad_norm": 1141.865478515625, + "learning_rate": 3.111618907811642e-07, + "loss": 69.0831, + "step": 237350 + }, + { + "epoch": 0.9589644347660968, + "grad_norm": 715.4208374023438, + "learning_rate": 3.1067140678181504e-07, + "loss": 71.7908, + "step": 237360 + }, + { + "epoch": 0.9590048360314645, + "grad_norm": 397.2091979980469, + "learning_rate": 3.101813066360593e-07, + "loss": 77.1359, + "step": 237370 + }, + { + "epoch": 0.9590452372968321, + "grad_norm": 253.82997131347656, + "learning_rate": 3.096915903534581e-07, + "loss": 44.6525, + "step": 237380 + }, + { + "epoch": 0.9590856385621997, + "grad_norm": 577.0535278320312, + "learning_rate": 3.092022579435572e-07, + "loss": 85.4273, + "step": 237390 + }, + { + "epoch": 0.9591260398275674, + "grad_norm": 353.1013488769531, + "learning_rate": 3.087133094158934e-07, + "loss": 52.5947, + "step": 237400 + }, + { + "epoch": 0.959166441092935, + "grad_norm": 427.22747802734375, + "learning_rate": 3.082247447800013e-07, + "loss": 53.0579, + "step": 237410 + }, + { + "epoch": 0.9592068423583027, + "grad_norm": 470.34063720703125, + "learning_rate": 3.0773656404540664e-07, + "loss": 58.6698, + "step": 237420 + }, + { + "epoch": 0.9592472436236703, + "grad_norm": 867.1759033203125, + "learning_rate": 3.072487672216262e-07, + "loss": 52.6999, + "step": 237430 + }, + { + "epoch": 0.959287644889038, + "grad_norm": 520.039794921875, + "learning_rate": 3.067613543181702e-07, + "loss": 76.7095, + "step": 237440 + }, + { + "epoch": 0.9593280461544056, + "grad_norm": 781.5200805664062, + "learning_rate": 3.0627432534453994e-07, + "loss": 65.4539, + "step": 237450 + }, + { + "epoch": 0.9593684474197732, + "grad_norm": 1116.1805419921875, + "learning_rate": 3.0578768031023e-07, + "loss": 47.9738, + "step": 237460 + }, + { + "epoch": 0.9594088486851409, + "grad_norm": 635.7155151367188, + "learning_rate": 3.0530141922473057e-07, + "loss": 79.9326, + "step": 237470 + }, + { + "epoch": 0.9594492499505084, + "grad_norm": 590.8184814453125, + "learning_rate": 3.048155420975163e-07, + "loss": 59.4266, + "step": 237480 + }, + { + "epoch": 0.959489651215876, + "grad_norm": 747.4346923828125, + "learning_rate": 3.0433004893806404e-07, + "loss": 71.8779, + "step": 237490 + }, + { + "epoch": 0.9595300524812437, + "grad_norm": 984.066650390625, + "learning_rate": 3.038449397558396e-07, + "loss": 75.6136, + "step": 237500 + }, + { + "epoch": 0.9595704537466113, + "grad_norm": 826.5072631835938, + "learning_rate": 3.0336021456029543e-07, + "loss": 76.4427, + "step": 237510 + }, + { + "epoch": 0.959610855011979, + "grad_norm": 576.12451171875, + "learning_rate": 3.028758733608861e-07, + "loss": 75.3455, + "step": 237520 + }, + { + "epoch": 0.9596512562773466, + "grad_norm": 563.9835815429688, + "learning_rate": 3.02391916167053e-07, + "loss": 55.7865, + "step": 237530 + }, + { + "epoch": 0.9596916575427142, + "grad_norm": 613.47021484375, + "learning_rate": 3.019083429882286e-07, + "loss": 41.8822, + "step": 237540 + }, + { + "epoch": 0.9597320588080819, + "grad_norm": 640.5046997070312, + "learning_rate": 3.0142515383384307e-07, + "loss": 55.9609, + "step": 237550 + }, + { + "epoch": 0.9597724600734495, + "grad_norm": 682.7437744140625, + "learning_rate": 3.009423487133156e-07, + "loss": 67.8603, + "step": 237560 + }, + { + "epoch": 0.9598128613388172, + "grad_norm": 635.9102783203125, + "learning_rate": 3.0045992763605867e-07, + "loss": 50.7217, + "step": 237570 + }, + { + "epoch": 0.9598532626041848, + "grad_norm": 355.4181213378906, + "learning_rate": 2.999778906114781e-07, + "loss": 53.4709, + "step": 237580 + }, + { + "epoch": 0.9598936638695524, + "grad_norm": 873.8099975585938, + "learning_rate": 2.994962376489707e-07, + "loss": 58.4338, + "step": 237590 + }, + { + "epoch": 0.9599340651349201, + "grad_norm": 578.554443359375, + "learning_rate": 2.990149687579247e-07, + "loss": 57.3361, + "step": 237600 + }, + { + "epoch": 0.9599744664002876, + "grad_norm": 604.4491577148438, + "learning_rate": 2.9853408394772796e-07, + "loss": 56.3572, + "step": 237610 + }, + { + "epoch": 0.9600148676656552, + "grad_norm": 491.0281982421875, + "learning_rate": 2.980535832277487e-07, + "loss": 55.7151, + "step": 237620 + }, + { + "epoch": 0.9600552689310229, + "grad_norm": 800.9895629882812, + "learning_rate": 2.9757346660736154e-07, + "loss": 74.1822, + "step": 237630 + }, + { + "epoch": 0.9600956701963905, + "grad_norm": 1879.914794921875, + "learning_rate": 2.9709373409592125e-07, + "loss": 86.5248, + "step": 237640 + }, + { + "epoch": 0.9601360714617582, + "grad_norm": 625.5106201171875, + "learning_rate": 2.966143857027826e-07, + "loss": 84.4623, + "step": 237650 + }, + { + "epoch": 0.9601764727271258, + "grad_norm": 858.6591186523438, + "learning_rate": 2.9613542143729136e-07, + "loss": 53.3546, + "step": 237660 + }, + { + "epoch": 0.9602168739924934, + "grad_norm": 553.5979614257812, + "learning_rate": 2.956568413087824e-07, + "loss": 74.5497, + "step": 237670 + }, + { + "epoch": 0.9602572752578611, + "grad_norm": 656.2960205078125, + "learning_rate": 2.9517864532659037e-07, + "loss": 42.0252, + "step": 237680 + }, + { + "epoch": 0.9602976765232287, + "grad_norm": 924.9300537109375, + "learning_rate": 2.947008335000323e-07, + "loss": 50.0276, + "step": 237690 + }, + { + "epoch": 0.9603380777885964, + "grad_norm": 529.0926513671875, + "learning_rate": 2.942234058384297e-07, + "loss": 53.1343, + "step": 237700 + }, + { + "epoch": 0.960378479053964, + "grad_norm": 854.9511108398438, + "learning_rate": 2.937463623510839e-07, + "loss": 62.2509, + "step": 237710 + }, + { + "epoch": 0.9604188803193316, + "grad_norm": 779.2728881835938, + "learning_rate": 2.9326970304729865e-07, + "loss": 77.1449, + "step": 237720 + }, + { + "epoch": 0.9604592815846993, + "grad_norm": 444.2777099609375, + "learning_rate": 2.9279342793636867e-07, + "loss": 59.5576, + "step": 237730 + }, + { + "epoch": 0.9604996828500668, + "grad_norm": 519.0897827148438, + "learning_rate": 2.923175370275755e-07, + "loss": 58.8684, + "step": 237740 + }, + { + "epoch": 0.9605400841154345, + "grad_norm": 741.2409057617188, + "learning_rate": 2.9184203033019833e-07, + "loss": 66.8674, + "step": 237750 + }, + { + "epoch": 0.9605804853808021, + "grad_norm": 499.576904296875, + "learning_rate": 2.913669078535075e-07, + "loss": 60.4492, + "step": 237760 + }, + { + "epoch": 0.9606208866461697, + "grad_norm": 987.6194458007812, + "learning_rate": 2.908921696067646e-07, + "loss": 56.9259, + "step": 237770 + }, + { + "epoch": 0.9606612879115374, + "grad_norm": 1171.8226318359375, + "learning_rate": 2.9041781559922434e-07, + "loss": 47.6046, + "step": 237780 + }, + { + "epoch": 0.960701689176905, + "grad_norm": 1046.5909423828125, + "learning_rate": 2.8994384584013936e-07, + "loss": 72.8592, + "step": 237790 + }, + { + "epoch": 0.9607420904422727, + "grad_norm": 448.7000732421875, + "learning_rate": 2.8947026033874446e-07, + "loss": 64.3209, + "step": 237800 + }, + { + "epoch": 0.9607824917076403, + "grad_norm": 676.17236328125, + "learning_rate": 2.8899705910427455e-07, + "loss": 57.7401, + "step": 237810 + }, + { + "epoch": 0.9608228929730079, + "grad_norm": 864.2822265625, + "learning_rate": 2.8852424214595556e-07, + "loss": 64.8299, + "step": 237820 + }, + { + "epoch": 0.9608632942383756, + "grad_norm": 782.625244140625, + "learning_rate": 2.8805180947300447e-07, + "loss": 60.7103, + "step": 237830 + }, + { + "epoch": 0.9609036955037432, + "grad_norm": 586.2252807617188, + "learning_rate": 2.875797610946318e-07, + "loss": 53.0137, + "step": 237840 + }, + { + "epoch": 0.9609440967691109, + "grad_norm": 636.174560546875, + "learning_rate": 2.8710809702003907e-07, + "loss": 69.4985, + "step": 237850 + }, + { + "epoch": 0.9609844980344784, + "grad_norm": 621.7000122070312, + "learning_rate": 2.866368172584255e-07, + "loss": 47.4018, + "step": 237860 + }, + { + "epoch": 0.961024899299846, + "grad_norm": 1107.6776123046875, + "learning_rate": 2.8616592181897276e-07, + "loss": 71.1251, + "step": 237870 + }, + { + "epoch": 0.9610653005652137, + "grad_norm": 792.80615234375, + "learning_rate": 2.856954107108667e-07, + "loss": 79.6587, + "step": 237880 + }, + { + "epoch": 0.9611057018305813, + "grad_norm": 614.4718017578125, + "learning_rate": 2.8522528394327787e-07, + "loss": 58.6839, + "step": 237890 + }, + { + "epoch": 0.961146103095949, + "grad_norm": 734.6215209960938, + "learning_rate": 2.8475554152537445e-07, + "loss": 69.4679, + "step": 237900 + }, + { + "epoch": 0.9611865043613166, + "grad_norm": 460.4333801269531, + "learning_rate": 2.842861834663091e-07, + "loss": 52.1097, + "step": 237910 + }, + { + "epoch": 0.9612269056266842, + "grad_norm": 557.0604858398438, + "learning_rate": 2.838172097752345e-07, + "loss": 45.3927, + "step": 237920 + }, + { + "epoch": 0.9612673068920519, + "grad_norm": 462.8584289550781, + "learning_rate": 2.8334862046129677e-07, + "loss": 65.2551, + "step": 237930 + }, + { + "epoch": 0.9613077081574195, + "grad_norm": 395.9146423339844, + "learning_rate": 2.8288041553362623e-07, + "loss": 41.8047, + "step": 237940 + }, + { + "epoch": 0.9613481094227871, + "grad_norm": 306.8257141113281, + "learning_rate": 2.8241259500135344e-07, + "loss": 53.4526, + "step": 237950 + }, + { + "epoch": 0.9613885106881548, + "grad_norm": 663.0679931640625, + "learning_rate": 2.8194515887359777e-07, + "loss": 70.3689, + "step": 237960 + }, + { + "epoch": 0.9614289119535224, + "grad_norm": 810.52685546875, + "learning_rate": 2.814781071594741e-07, + "loss": 69.069, + "step": 237970 + }, + { + "epoch": 0.9614693132188901, + "grad_norm": 894.155029296875, + "learning_rate": 2.810114398680863e-07, + "loss": 76.8452, + "step": 237980 + }, + { + "epoch": 0.9615097144842576, + "grad_norm": 441.2276611328125, + "learning_rate": 2.805451570085316e-07, + "loss": 67.0265, + "step": 237990 + }, + { + "epoch": 0.9615501157496252, + "grad_norm": 544.8783569335938, + "learning_rate": 2.800792585899026e-07, + "loss": 69.9917, + "step": 238000 + }, + { + "epoch": 0.9615905170149929, + "grad_norm": 748.579345703125, + "learning_rate": 2.79613744621281e-07, + "loss": 68.9065, + "step": 238010 + }, + { + "epoch": 0.9616309182803605, + "grad_norm": 619.5339965820312, + "learning_rate": 2.7914861511173954e-07, + "loss": 55.6961, + "step": 238020 + }, + { + "epoch": 0.9616713195457282, + "grad_norm": 643.2796630859375, + "learning_rate": 2.786838700703509e-07, + "loss": 56.1951, + "step": 238030 + }, + { + "epoch": 0.9617117208110958, + "grad_norm": 590.427734375, + "learning_rate": 2.7821950950617236e-07, + "loss": 56.3969, + "step": 238040 + }, + { + "epoch": 0.9617521220764634, + "grad_norm": 550.16162109375, + "learning_rate": 2.7775553342825887e-07, + "loss": 62.0961, + "step": 238050 + }, + { + "epoch": 0.9617925233418311, + "grad_norm": 523.8089599609375, + "learning_rate": 2.7729194184565654e-07, + "loss": 55.2767, + "step": 238060 + }, + { + "epoch": 0.9618329246071987, + "grad_norm": 798.9484252929688, + "learning_rate": 2.7682873476740035e-07, + "loss": 62.5404, + "step": 238070 + }, + { + "epoch": 0.9618733258725664, + "grad_norm": 1065.71435546875, + "learning_rate": 2.7636591220252084e-07, + "loss": 93.4984, + "step": 238080 + }, + { + "epoch": 0.961913727137934, + "grad_norm": 1111.328857421875, + "learning_rate": 2.7590347416004193e-07, + "loss": 62.581, + "step": 238090 + }, + { + "epoch": 0.9619541284033016, + "grad_norm": 335.28668212890625, + "learning_rate": 2.754414206489808e-07, + "loss": 70.0959, + "step": 238100 + }, + { + "epoch": 0.9619945296686693, + "grad_norm": 925.1477661132812, + "learning_rate": 2.749797516783459e-07, + "loss": 66.3337, + "step": 238110 + }, + { + "epoch": 0.9620349309340368, + "grad_norm": 809.6612548828125, + "learning_rate": 2.7451846725713215e-07, + "loss": 76.5141, + "step": 238120 + }, + { + "epoch": 0.9620753321994044, + "grad_norm": 625.2645263671875, + "learning_rate": 2.7405756739433907e-07, + "loss": 80.207, + "step": 238130 + }, + { + "epoch": 0.9621157334647721, + "grad_norm": 420.97979736328125, + "learning_rate": 2.735970520989484e-07, + "loss": 63.0843, + "step": 238140 + }, + { + "epoch": 0.9621561347301397, + "grad_norm": 3212.067626953125, + "learning_rate": 2.731369213799395e-07, + "loss": 68.2814, + "step": 238150 + }, + { + "epoch": 0.9621965359955074, + "grad_norm": 541.0936889648438, + "learning_rate": 2.726771752462809e-07, + "loss": 44.8941, + "step": 238160 + }, + { + "epoch": 0.962236937260875, + "grad_norm": 807.3759155273438, + "learning_rate": 2.7221781370693867e-07, + "loss": 71.6132, + "step": 238170 + }, + { + "epoch": 0.9622773385262426, + "grad_norm": 501.63494873046875, + "learning_rate": 2.717588367708657e-07, + "loss": 66.9234, + "step": 238180 + }, + { + "epoch": 0.9623177397916103, + "grad_norm": 761.35498046875, + "learning_rate": 2.713002444470125e-07, + "loss": 64.8864, + "step": 238190 + }, + { + "epoch": 0.9623581410569779, + "grad_norm": 166.34239196777344, + "learning_rate": 2.7084203674431653e-07, + "loss": 57.8662, + "step": 238200 + }, + { + "epoch": 0.9623985423223456, + "grad_norm": 266.5375061035156, + "learning_rate": 2.7038421367171274e-07, + "loss": 67.7053, + "step": 238210 + }, + { + "epoch": 0.9624389435877132, + "grad_norm": 792.2747192382812, + "learning_rate": 2.6992677523812736e-07, + "loss": 67.2225, + "step": 238220 + }, + { + "epoch": 0.9624793448530808, + "grad_norm": 751.8923950195312, + "learning_rate": 2.694697214524733e-07, + "loss": 83.7484, + "step": 238230 + }, + { + "epoch": 0.9625197461184485, + "grad_norm": 748.3713989257812, + "learning_rate": 2.6901305232367003e-07, + "loss": 81.4049, + "step": 238240 + }, + { + "epoch": 0.962560147383816, + "grad_norm": 1541.6109619140625, + "learning_rate": 2.685567678606127e-07, + "loss": 92.0971, + "step": 238250 + }, + { + "epoch": 0.9626005486491837, + "grad_norm": 631.953125, + "learning_rate": 2.681008680721986e-07, + "loss": 65.2741, + "step": 238260 + }, + { + "epoch": 0.9626409499145513, + "grad_norm": 1502.8875732421875, + "learning_rate": 2.6764535296732064e-07, + "loss": 83.4348, + "step": 238270 + }, + { + "epoch": 0.9626813511799189, + "grad_norm": 611.470703125, + "learning_rate": 2.671902225548517e-07, + "loss": 61.5426, + "step": 238280 + }, + { + "epoch": 0.9627217524452866, + "grad_norm": 561.8081665039062, + "learning_rate": 2.667354768436692e-07, + "loss": 72.7703, + "step": 238290 + }, + { + "epoch": 0.9627621537106542, + "grad_norm": 396.6075134277344, + "learning_rate": 2.662811158426393e-07, + "loss": 51.0336, + "step": 238300 + }, + { + "epoch": 0.9628025549760219, + "grad_norm": 722.416748046875, + "learning_rate": 2.658271395606171e-07, + "loss": 47.2492, + "step": 238310 + }, + { + "epoch": 0.9628429562413895, + "grad_norm": 809.1995849609375, + "learning_rate": 2.6537354800645567e-07, + "loss": 83.8917, + "step": 238320 + }, + { + "epoch": 0.9628833575067571, + "grad_norm": 583.2030639648438, + "learning_rate": 2.649203411889967e-07, + "loss": 48.954, + "step": 238330 + }, + { + "epoch": 0.9629237587721248, + "grad_norm": 471.8607177734375, + "learning_rate": 2.6446751911707757e-07, + "loss": 65.1218, + "step": 238340 + }, + { + "epoch": 0.9629641600374924, + "grad_norm": 672.5091552734375, + "learning_rate": 2.640150817995224e-07, + "loss": 74.0207, + "step": 238350 + }, + { + "epoch": 0.96300456130286, + "grad_norm": 861.0545043945312, + "learning_rate": 2.635630292451552e-07, + "loss": 62.9171, + "step": 238360 + }, + { + "epoch": 0.9630449625682277, + "grad_norm": 540.7075805664062, + "learning_rate": 2.631113614627867e-07, + "loss": 68.303, + "step": 238370 + }, + { + "epoch": 0.9630853638335952, + "grad_norm": 576.4608154296875, + "learning_rate": 2.6266007846122544e-07, + "loss": 52.6034, + "step": 238380 + }, + { + "epoch": 0.9631257650989629, + "grad_norm": 569.84716796875, + "learning_rate": 2.6220918024926433e-07, + "loss": 61.5136, + "step": 238390 + }, + { + "epoch": 0.9631661663643305, + "grad_norm": 332.0077819824219, + "learning_rate": 2.617586668356986e-07, + "loss": 63.4586, + "step": 238400 + }, + { + "epoch": 0.9632065676296981, + "grad_norm": 621.762939453125, + "learning_rate": 2.6130853822931013e-07, + "loss": 60.6265, + "step": 238410 + }, + { + "epoch": 0.9632469688950658, + "grad_norm": 594.49267578125, + "learning_rate": 2.6085879443887407e-07, + "loss": 52.0177, + "step": 238420 + }, + { + "epoch": 0.9632873701604334, + "grad_norm": 687.34228515625, + "learning_rate": 2.604094354731568e-07, + "loss": 69.9298, + "step": 238430 + }, + { + "epoch": 0.9633277714258011, + "grad_norm": 672.52685546875, + "learning_rate": 2.599604613409201e-07, + "loss": 88.6523, + "step": 238440 + }, + { + "epoch": 0.9633681726911687, + "grad_norm": 431.5932922363281, + "learning_rate": 2.595118720509193e-07, + "loss": 64.7259, + "step": 238450 + }, + { + "epoch": 0.9634085739565363, + "grad_norm": 791.3543090820312, + "learning_rate": 2.590636676118963e-07, + "loss": 56.4782, + "step": 238460 + }, + { + "epoch": 0.963448975221904, + "grad_norm": 1201.814208984375, + "learning_rate": 2.5861584803259065e-07, + "loss": 67.6862, + "step": 238470 + }, + { + "epoch": 0.9634893764872716, + "grad_norm": 847.3176879882812, + "learning_rate": 2.58168413321731e-07, + "loss": 66.1271, + "step": 238480 + }, + { + "epoch": 0.9635297777526393, + "grad_norm": 1069.9444580078125, + "learning_rate": 2.577213634880438e-07, + "loss": 81.0704, + "step": 238490 + }, + { + "epoch": 0.9635701790180068, + "grad_norm": 312.40948486328125, + "learning_rate": 2.5727469854024187e-07, + "loss": 72.713, + "step": 238500 + }, + { + "epoch": 0.9636105802833744, + "grad_norm": 594.2342529296875, + "learning_rate": 2.56828418487034e-07, + "loss": 45.2706, + "step": 238510 + }, + { + "epoch": 0.9636509815487421, + "grad_norm": 1141.8321533203125, + "learning_rate": 2.56382523337122e-07, + "loss": 84.0039, + "step": 238520 + }, + { + "epoch": 0.9636913828141097, + "grad_norm": 309.2436828613281, + "learning_rate": 2.559370130991945e-07, + "loss": 45.463, + "step": 238530 + }, + { + "epoch": 0.9637317840794773, + "grad_norm": 2498.50439453125, + "learning_rate": 2.554918877819423e-07, + "loss": 70.2424, + "step": 238540 + }, + { + "epoch": 0.963772185344845, + "grad_norm": 323.5027160644531, + "learning_rate": 2.550471473940386e-07, + "loss": 62.5225, + "step": 238550 + }, + { + "epoch": 0.9638125866102126, + "grad_norm": 352.083251953125, + "learning_rate": 2.546027919441585e-07, + "loss": 96.1996, + "step": 238560 + }, + { + "epoch": 0.9638529878755803, + "grad_norm": 599.2030029296875, + "learning_rate": 2.541588214409618e-07, + "loss": 54.483, + "step": 238570 + }, + { + "epoch": 0.9638933891409479, + "grad_norm": 734.4657592773438, + "learning_rate": 2.5371523589310386e-07, + "loss": 78.1095, + "step": 238580 + }, + { + "epoch": 0.9639337904063155, + "grad_norm": 687.9297485351562, + "learning_rate": 2.532720353092355e-07, + "loss": 61.9822, + "step": 238590 + }, + { + "epoch": 0.9639741916716832, + "grad_norm": 895.3109741210938, + "learning_rate": 2.52829219697992e-07, + "loss": 54.0508, + "step": 238600 + }, + { + "epoch": 0.9640145929370508, + "grad_norm": 682.6631469726562, + "learning_rate": 2.523867890680132e-07, + "loss": 86.2901, + "step": 238610 + }, + { + "epoch": 0.9640549942024185, + "grad_norm": 786.751220703125, + "learning_rate": 2.5194474342791873e-07, + "loss": 56.4991, + "step": 238620 + }, + { + "epoch": 0.964095395467786, + "grad_norm": 834.8478393554688, + "learning_rate": 2.5150308278632854e-07, + "loss": 43.4635, + "step": 238630 + }, + { + "epoch": 0.9641357967331536, + "grad_norm": 324.77459716796875, + "learning_rate": 2.5106180715185333e-07, + "loss": 45.2342, + "step": 238640 + }, + { + "epoch": 0.9641761979985213, + "grad_norm": 459.7238464355469, + "learning_rate": 2.5062091653309304e-07, + "loss": 90.6589, + "step": 238650 + }, + { + "epoch": 0.9642165992638889, + "grad_norm": 845.9859008789062, + "learning_rate": 2.5018041093864743e-07, + "loss": 75.5609, + "step": 238660 + }, + { + "epoch": 0.9642570005292566, + "grad_norm": 798.792236328125, + "learning_rate": 2.497402903771051e-07, + "loss": 101.1565, + "step": 238670 + }, + { + "epoch": 0.9642974017946242, + "grad_norm": 552.3177490234375, + "learning_rate": 2.4930055485704153e-07, + "loss": 61.952, + "step": 238680 + }, + { + "epoch": 0.9643378030599918, + "grad_norm": 306.35467529296875, + "learning_rate": 2.48861204387032e-07, + "loss": 45.8648, + "step": 238690 + }, + { + "epoch": 0.9643782043253595, + "grad_norm": 986.8060913085938, + "learning_rate": 2.48422238975643e-07, + "loss": 80.8876, + "step": 238700 + }, + { + "epoch": 0.9644186055907271, + "grad_norm": 592.1340942382812, + "learning_rate": 2.4798365863142993e-07, + "loss": 50.4695, + "step": 238710 + }, + { + "epoch": 0.9644590068560948, + "grad_norm": 732.1460571289062, + "learning_rate": 2.475454633629459e-07, + "loss": 74.6082, + "step": 238720 + }, + { + "epoch": 0.9644994081214624, + "grad_norm": 678.3995361328125, + "learning_rate": 2.4710765317872867e-07, + "loss": 63.2745, + "step": 238730 + }, + { + "epoch": 0.96453980938683, + "grad_norm": 733.4623413085938, + "learning_rate": 2.466702280873223e-07, + "loss": 67.4096, + "step": 238740 + }, + { + "epoch": 0.9645802106521977, + "grad_norm": 814.8195190429688, + "learning_rate": 2.462331880972468e-07, + "loss": 71.4601, + "step": 238750 + }, + { + "epoch": 0.9646206119175652, + "grad_norm": 555.2440185546875, + "learning_rate": 2.4579653321702425e-07, + "loss": 46.0871, + "step": 238760 + }, + { + "epoch": 0.9646610131829328, + "grad_norm": 809.9255981445312, + "learning_rate": 2.4536026345516996e-07, + "loss": 67.407, + "step": 238770 + }, + { + "epoch": 0.9647014144483005, + "grad_norm": 592.1710815429688, + "learning_rate": 2.44924378820186e-07, + "loss": 71.8238, + "step": 238780 + }, + { + "epoch": 0.9647418157136681, + "grad_norm": 327.57379150390625, + "learning_rate": 2.4448887932057465e-07, + "loss": 48.1275, + "step": 238790 + }, + { + "epoch": 0.9647822169790358, + "grad_norm": 399.8037109375, + "learning_rate": 2.4405376496482226e-07, + "loss": 68.1027, + "step": 238800 + }, + { + "epoch": 0.9648226182444034, + "grad_norm": 804.4533081054688, + "learning_rate": 2.436190357614132e-07, + "loss": 92.935, + "step": 238810 + }, + { + "epoch": 0.964863019509771, + "grad_norm": 434.1552429199219, + "learning_rate": 2.4318469171882075e-07, + "loss": 54.5504, + "step": 238820 + }, + { + "epoch": 0.9649034207751387, + "grad_norm": 529.0602416992188, + "learning_rate": 2.427507328455159e-07, + "loss": 55.496, + "step": 238830 + }, + { + "epoch": 0.9649438220405063, + "grad_norm": 618.626220703125, + "learning_rate": 2.4231715914995626e-07, + "loss": 58.801, + "step": 238840 + }, + { + "epoch": 0.964984223305874, + "grad_norm": 486.9577941894531, + "learning_rate": 2.418839706405951e-07, + "loss": 73.5014, + "step": 238850 + }, + { + "epoch": 0.9650246245712416, + "grad_norm": 601.5189208984375, + "learning_rate": 2.41451167325879e-07, + "loss": 64.2021, + "step": 238860 + }, + { + "epoch": 0.9650650258366092, + "grad_norm": 473.1072692871094, + "learning_rate": 2.4101874921424575e-07, + "loss": 63.2012, + "step": 238870 + }, + { + "epoch": 0.9651054271019769, + "grad_norm": 586.61279296875, + "learning_rate": 2.40586716314124e-07, + "loss": 58.1892, + "step": 238880 + }, + { + "epoch": 0.9651458283673444, + "grad_norm": 1270.430419921875, + "learning_rate": 2.401550686339338e-07, + "loss": 56.0291, + "step": 238890 + }, + { + "epoch": 0.9651862296327121, + "grad_norm": 590.8409423828125, + "learning_rate": 2.3972380618209723e-07, + "loss": 49.8925, + "step": 238900 + }, + { + "epoch": 0.9652266308980797, + "grad_norm": 440.6155700683594, + "learning_rate": 2.392929289670187e-07, + "loss": 62.2022, + "step": 238910 + }, + { + "epoch": 0.9652670321634473, + "grad_norm": 203.21389770507812, + "learning_rate": 2.38862436997096e-07, + "loss": 68.8632, + "step": 238920 + }, + { + "epoch": 0.965307433428815, + "grad_norm": 267.9991455078125, + "learning_rate": 2.3843233028072454e-07, + "loss": 63.8152, + "step": 238930 + }, + { + "epoch": 0.9653478346941826, + "grad_norm": 160.4390869140625, + "learning_rate": 2.3800260882628657e-07, + "loss": 44.8795, + "step": 238940 + }, + { + "epoch": 0.9653882359595503, + "grad_norm": 989.33251953125, + "learning_rate": 2.3757327264216645e-07, + "loss": 76.4432, + "step": 238950 + }, + { + "epoch": 0.9654286372249179, + "grad_norm": 501.728759765625, + "learning_rate": 2.371443217367264e-07, + "loss": 88.3314, + "step": 238960 + }, + { + "epoch": 0.9654690384902855, + "grad_norm": 754.2576904296875, + "learning_rate": 2.3671575611833307e-07, + "loss": 58.8281, + "step": 238970 + }, + { + "epoch": 0.9655094397556532, + "grad_norm": 875.2591552734375, + "learning_rate": 2.3628757579534202e-07, + "loss": 65.7978, + "step": 238980 + }, + { + "epoch": 0.9655498410210208, + "grad_norm": 801.0933227539062, + "learning_rate": 2.3585978077609983e-07, + "loss": 74.1733, + "step": 238990 + }, + { + "epoch": 0.9655902422863885, + "grad_norm": 1005.6893310546875, + "learning_rate": 2.3543237106894434e-07, + "loss": 84.6594, + "step": 239000 + }, + { + "epoch": 0.9656306435517561, + "grad_norm": 856.9053344726562, + "learning_rate": 2.350053466822111e-07, + "loss": 83.092, + "step": 239010 + }, + { + "epoch": 0.9656710448171236, + "grad_norm": 2000.392822265625, + "learning_rate": 2.3457870762422453e-07, + "loss": 85.5262, + "step": 239020 + }, + { + "epoch": 0.9657114460824913, + "grad_norm": 1089.0926513671875, + "learning_rate": 2.3415245390330242e-07, + "loss": 101.9799, + "step": 239030 + }, + { + "epoch": 0.9657518473478589, + "grad_norm": 654.3712768554688, + "learning_rate": 2.3372658552775596e-07, + "loss": 56.4085, + "step": 239040 + }, + { + "epoch": 0.9657922486132265, + "grad_norm": 498.87432861328125, + "learning_rate": 2.3330110250588067e-07, + "loss": 55.1913, + "step": 239050 + }, + { + "epoch": 0.9658326498785942, + "grad_norm": 872.3959350585938, + "learning_rate": 2.3287600484598328e-07, + "loss": 93.9546, + "step": 239060 + }, + { + "epoch": 0.9658730511439618, + "grad_norm": 695.7781982421875, + "learning_rate": 2.3245129255634158e-07, + "loss": 82.717, + "step": 239070 + }, + { + "epoch": 0.9659134524093295, + "grad_norm": 844.2447509765625, + "learning_rate": 2.3202696564523785e-07, + "loss": 75.8674, + "step": 239080 + }, + { + "epoch": 0.9659538536746971, + "grad_norm": 680.26904296875, + "learning_rate": 2.316030241209477e-07, + "loss": 66.6549, + "step": 239090 + }, + { + "epoch": 0.9659942549400647, + "grad_norm": 654.2516479492188, + "learning_rate": 2.3117946799173342e-07, + "loss": 63.7566, + "step": 239100 + }, + { + "epoch": 0.9660346562054324, + "grad_norm": 556.4915161132812, + "learning_rate": 2.307562972658528e-07, + "loss": 84.1947, + "step": 239110 + }, + { + "epoch": 0.9660750574708, + "grad_norm": 522.1917724609375, + "learning_rate": 2.3033351195155928e-07, + "loss": 87.9652, + "step": 239120 + }, + { + "epoch": 0.9661154587361677, + "grad_norm": 347.4254455566406, + "learning_rate": 2.299111120570885e-07, + "loss": 67.8703, + "step": 239130 + }, + { + "epoch": 0.9661558600015352, + "grad_norm": 1068.9002685546875, + "learning_rate": 2.2948909759067827e-07, + "loss": 71.9628, + "step": 239140 + }, + { + "epoch": 0.9661962612669028, + "grad_norm": 960.6701049804688, + "learning_rate": 2.2906746856055984e-07, + "loss": 58.607, + "step": 239150 + }, + { + "epoch": 0.9662366625322705, + "grad_norm": 753.7484130859375, + "learning_rate": 2.286462249749466e-07, + "loss": 74.8397, + "step": 239160 + }, + { + "epoch": 0.9662770637976381, + "grad_norm": 555.0780639648438, + "learning_rate": 2.2822536684205644e-07, + "loss": 59.8699, + "step": 239170 + }, + { + "epoch": 0.9663174650630058, + "grad_norm": 503.1720886230469, + "learning_rate": 2.2780489417009166e-07, + "loss": 98.4857, + "step": 239180 + }, + { + "epoch": 0.9663578663283734, + "grad_norm": 432.39019775390625, + "learning_rate": 2.273848069672502e-07, + "loss": 51.0702, + "step": 239190 + }, + { + "epoch": 0.966398267593741, + "grad_norm": 330.7643127441406, + "learning_rate": 2.26965105241721e-07, + "loss": 51.3259, + "step": 239200 + }, + { + "epoch": 0.9664386688591087, + "grad_norm": 616.0209350585938, + "learning_rate": 2.2654578900168644e-07, + "loss": 87.1091, + "step": 239210 + }, + { + "epoch": 0.9664790701244763, + "grad_norm": 1890.328125, + "learning_rate": 2.261268582553222e-07, + "loss": 100.6531, + "step": 239220 + }, + { + "epoch": 0.966519471389844, + "grad_norm": 1209.424072265625, + "learning_rate": 2.257083130107929e-07, + "loss": 58.7866, + "step": 239230 + }, + { + "epoch": 0.9665598726552116, + "grad_norm": 759.7603759765625, + "learning_rate": 2.2529015327626525e-07, + "loss": 69.3246, + "step": 239240 + }, + { + "epoch": 0.9666002739205792, + "grad_norm": 713.9703369140625, + "learning_rate": 2.248723790598839e-07, + "loss": 87.3181, + "step": 239250 + }, + { + "epoch": 0.9666406751859469, + "grad_norm": 713.9769287109375, + "learning_rate": 2.2445499036979568e-07, + "loss": 48.137, + "step": 239260 + }, + { + "epoch": 0.9666810764513144, + "grad_norm": 920.229736328125, + "learning_rate": 2.240379872141385e-07, + "loss": 90.1504, + "step": 239270 + }, + { + "epoch": 0.966721477716682, + "grad_norm": 5761.146484375, + "learning_rate": 2.2362136960104585e-07, + "loss": 134.7828, + "step": 239280 + }, + { + "epoch": 0.9667618789820497, + "grad_norm": 568.430419921875, + "learning_rate": 2.2320513753863348e-07, + "loss": 52.8695, + "step": 239290 + }, + { + "epoch": 0.9668022802474173, + "grad_norm": 538.9703979492188, + "learning_rate": 2.2278929103501713e-07, + "loss": 58.7141, + "step": 239300 + }, + { + "epoch": 0.966842681512785, + "grad_norm": 495.8058166503906, + "learning_rate": 2.2237383009830804e-07, + "loss": 82.1491, + "step": 239310 + }, + { + "epoch": 0.9668830827781526, + "grad_norm": 465.28997802734375, + "learning_rate": 2.21958754736602e-07, + "loss": 109.6617, + "step": 239320 + }, + { + "epoch": 0.9669234840435202, + "grad_norm": 427.30279541015625, + "learning_rate": 2.2154406495799475e-07, + "loss": 40.7681, + "step": 239330 + }, + { + "epoch": 0.9669638853088879, + "grad_norm": 601.2344970703125, + "learning_rate": 2.2112976077056424e-07, + "loss": 70.5033, + "step": 239340 + }, + { + "epoch": 0.9670042865742555, + "grad_norm": 757.7578125, + "learning_rate": 2.2071584218239516e-07, + "loss": 65.5229, + "step": 239350 + }, + { + "epoch": 0.9670446878396232, + "grad_norm": 1348.5633544921875, + "learning_rate": 2.2030230920155437e-07, + "loss": 59.1243, + "step": 239360 + }, + { + "epoch": 0.9670850891049908, + "grad_norm": 814.5205078125, + "learning_rate": 2.1988916183610209e-07, + "loss": 108.3471, + "step": 239370 + }, + { + "epoch": 0.9671254903703584, + "grad_norm": 548.885009765625, + "learning_rate": 2.194764000940941e-07, + "loss": 80.6733, + "step": 239380 + }, + { + "epoch": 0.9671658916357261, + "grad_norm": 611.09130859375, + "learning_rate": 2.1906402398357507e-07, + "loss": 67.9041, + "step": 239390 + }, + { + "epoch": 0.9672062929010936, + "grad_norm": 307.37640380859375, + "learning_rate": 2.186520335125919e-07, + "loss": 75.4169, + "step": 239400 + }, + { + "epoch": 0.9672466941664613, + "grad_norm": 758.9827880859375, + "learning_rate": 2.1824042868916707e-07, + "loss": 77.4944, + "step": 239410 + }, + { + "epoch": 0.9672870954318289, + "grad_norm": 855.2761840820312, + "learning_rate": 2.1782920952133192e-07, + "loss": 61.5973, + "step": 239420 + }, + { + "epoch": 0.9673274966971965, + "grad_norm": 448.11749267578125, + "learning_rate": 2.1741837601710004e-07, + "loss": 40.5736, + "step": 239430 + }, + { + "epoch": 0.9673678979625642, + "grad_norm": 396.2466125488281, + "learning_rate": 2.170079281844828e-07, + "loss": 53.2036, + "step": 239440 + }, + { + "epoch": 0.9674082992279318, + "grad_norm": 453.8417053222656, + "learning_rate": 2.1659786603147827e-07, + "loss": 51.221, + "step": 239450 + }, + { + "epoch": 0.9674487004932995, + "grad_norm": 342.3604736328125, + "learning_rate": 2.1618818956608445e-07, + "loss": 90.7734, + "step": 239460 + }, + { + "epoch": 0.9674891017586671, + "grad_norm": 969.844482421875, + "learning_rate": 2.1577889879628833e-07, + "loss": 86.6017, + "step": 239470 + }, + { + "epoch": 0.9675295030240347, + "grad_norm": 399.02996826171875, + "learning_rate": 2.1536999373006794e-07, + "loss": 47.4266, + "step": 239480 + }, + { + "epoch": 0.9675699042894024, + "grad_norm": 483.9787902832031, + "learning_rate": 2.1496147437539472e-07, + "loss": 51.007, + "step": 239490 + }, + { + "epoch": 0.96761030555477, + "grad_norm": 460.67633056640625, + "learning_rate": 2.1455334074023336e-07, + "loss": 35.8392, + "step": 239500 + }, + { + "epoch": 0.9676507068201377, + "grad_norm": 594.317626953125, + "learning_rate": 2.1414559283254198e-07, + "loss": 74.4474, + "step": 239510 + }, + { + "epoch": 0.9676911080855053, + "grad_norm": 356.72442626953125, + "learning_rate": 2.1373823066026755e-07, + "loss": 68.7439, + "step": 239520 + }, + { + "epoch": 0.9677315093508728, + "grad_norm": 612.7587890625, + "learning_rate": 2.133312542313548e-07, + "loss": 84.3841, + "step": 239530 + }, + { + "epoch": 0.9677719106162405, + "grad_norm": 404.39862060546875, + "learning_rate": 2.1292466355373297e-07, + "loss": 85.0833, + "step": 239540 + }, + { + "epoch": 0.9678123118816081, + "grad_norm": 774.3856201171875, + "learning_rate": 2.1251845863533571e-07, + "loss": 80.9824, + "step": 239550 + }, + { + "epoch": 0.9678527131469757, + "grad_norm": 669.4237060546875, + "learning_rate": 2.1211263948407668e-07, + "loss": 58.2485, + "step": 239560 + }, + { + "epoch": 0.9678931144123434, + "grad_norm": 690.1760864257812, + "learning_rate": 2.1170720610786955e-07, + "loss": 84.6492, + "step": 239570 + }, + { + "epoch": 0.967933515677711, + "grad_norm": 555.8327026367188, + "learning_rate": 2.113021585146169e-07, + "loss": 64.5679, + "step": 239580 + }, + { + "epoch": 0.9679739169430787, + "grad_norm": 1033.0609130859375, + "learning_rate": 2.108974967122168e-07, + "loss": 65.5828, + "step": 239590 + }, + { + "epoch": 0.9680143182084463, + "grad_norm": 481.54241943359375, + "learning_rate": 2.104932207085586e-07, + "loss": 57.794, + "step": 239600 + }, + { + "epoch": 0.9680547194738139, + "grad_norm": 722.1646118164062, + "learning_rate": 2.100893305115226e-07, + "loss": 77.6414, + "step": 239610 + }, + { + "epoch": 0.9680951207391816, + "grad_norm": 748.581298828125, + "learning_rate": 2.096858261289869e-07, + "loss": 59.0182, + "step": 239620 + }, + { + "epoch": 0.9681355220045492, + "grad_norm": 511.0944519042969, + "learning_rate": 2.0928270756881198e-07, + "loss": 53.7652, + "step": 239630 + }, + { + "epoch": 0.9681759232699169, + "grad_norm": 453.0155334472656, + "learning_rate": 2.0887997483885814e-07, + "loss": 44.4509, + "step": 239640 + }, + { + "epoch": 0.9682163245352845, + "grad_norm": 780.1581420898438, + "learning_rate": 2.0847762794698135e-07, + "loss": 58.4134, + "step": 239650 + }, + { + "epoch": 0.968256725800652, + "grad_norm": 831.3961791992188, + "learning_rate": 2.0807566690101978e-07, + "loss": 94.866, + "step": 239660 + }, + { + "epoch": 0.9682971270660197, + "grad_norm": 291.07122802734375, + "learning_rate": 2.0767409170881604e-07, + "loss": 80.962, + "step": 239670 + }, + { + "epoch": 0.9683375283313873, + "grad_norm": 888.34228515625, + "learning_rate": 2.0727290237819274e-07, + "loss": 54.4172, + "step": 239680 + }, + { + "epoch": 0.968377929596755, + "grad_norm": 600.847900390625, + "learning_rate": 2.068720989169748e-07, + "loss": 55.6006, + "step": 239690 + }, + { + "epoch": 0.9684183308621226, + "grad_norm": 126.30791473388672, + "learning_rate": 2.064716813329759e-07, + "loss": 56.3398, + "step": 239700 + }, + { + "epoch": 0.9684587321274902, + "grad_norm": 410.1413879394531, + "learning_rate": 2.0607164963400317e-07, + "loss": 57.5323, + "step": 239710 + }, + { + "epoch": 0.9684991333928579, + "grad_norm": 712.3145141601562, + "learning_rate": 2.056720038278548e-07, + "loss": 145.3677, + "step": 239720 + }, + { + "epoch": 0.9685395346582255, + "grad_norm": 601.2294921875, + "learning_rate": 2.0527274392231788e-07, + "loss": 67.3682, + "step": 239730 + }, + { + "epoch": 0.9685799359235931, + "grad_norm": 904.4271850585938, + "learning_rate": 2.0487386992518176e-07, + "loss": 77.6586, + "step": 239740 + }, + { + "epoch": 0.9686203371889608, + "grad_norm": 586.013916015625, + "learning_rate": 2.0447538184422023e-07, + "loss": 72.7095, + "step": 239750 + }, + { + "epoch": 0.9686607384543284, + "grad_norm": 604.9998779296875, + "learning_rate": 2.040772796872048e-07, + "loss": 62.2885, + "step": 239760 + }, + { + "epoch": 0.9687011397196961, + "grad_norm": 576.5552368164062, + "learning_rate": 2.0367956346189155e-07, + "loss": 62.0317, + "step": 239770 + }, + { + "epoch": 0.9687415409850636, + "grad_norm": 479.43218994140625, + "learning_rate": 2.0328223317603868e-07, + "loss": 57.0334, + "step": 239780 + }, + { + "epoch": 0.9687819422504312, + "grad_norm": 719.6140747070312, + "learning_rate": 2.028852888373889e-07, + "loss": 55.6562, + "step": 239790 + }, + { + "epoch": 0.9688223435157989, + "grad_norm": 841.5855102539062, + "learning_rate": 2.0248873045368488e-07, + "loss": 51.0707, + "step": 239800 + }, + { + "epoch": 0.9688627447811665, + "grad_norm": 661.1954956054688, + "learning_rate": 2.0209255803265605e-07, + "loss": 41.1183, + "step": 239810 + }, + { + "epoch": 0.9689031460465342, + "grad_norm": 987.5596313476562, + "learning_rate": 2.0169677158202283e-07, + "loss": 60.7972, + "step": 239820 + }, + { + "epoch": 0.9689435473119018, + "grad_norm": 730.4888916015625, + "learning_rate": 2.0130137110950575e-07, + "loss": 69.7586, + "step": 239830 + }, + { + "epoch": 0.9689839485772694, + "grad_norm": 993.7867431640625, + "learning_rate": 2.0090635662280978e-07, + "loss": 52.6128, + "step": 239840 + }, + { + "epoch": 0.9690243498426371, + "grad_norm": 640.2056274414062, + "learning_rate": 2.005117281296398e-07, + "loss": 76.8328, + "step": 239850 + }, + { + "epoch": 0.9690647511080047, + "grad_norm": 560.0668334960938, + "learning_rate": 2.001174856376853e-07, + "loss": 53.7996, + "step": 239860 + }, + { + "epoch": 0.9691051523733724, + "grad_norm": 498.60650634765625, + "learning_rate": 1.997236291546356e-07, + "loss": 50.3899, + "step": 239870 + }, + { + "epoch": 0.96914555363874, + "grad_norm": 664.0135498046875, + "learning_rate": 1.9933015868816685e-07, + "loss": 77.521, + "step": 239880 + }, + { + "epoch": 0.9691859549041076, + "grad_norm": 584.017578125, + "learning_rate": 1.9893707424595066e-07, + "loss": 56.8237, + "step": 239890 + }, + { + "epoch": 0.9692263561694753, + "grad_norm": 504.1904296875, + "learning_rate": 1.98544375835652e-07, + "loss": 49.8187, + "step": 239900 + }, + { + "epoch": 0.9692667574348428, + "grad_norm": 866.2882080078125, + "learning_rate": 1.9815206346492254e-07, + "loss": 58.9513, + "step": 239910 + }, + { + "epoch": 0.9693071587002104, + "grad_norm": 587.17626953125, + "learning_rate": 1.9776013714141396e-07, + "loss": 63.2829, + "step": 239920 + }, + { + "epoch": 0.9693475599655781, + "grad_norm": 328.779541015625, + "learning_rate": 1.9736859687276678e-07, + "loss": 45.9413, + "step": 239930 + }, + { + "epoch": 0.9693879612309457, + "grad_norm": 4434.16943359375, + "learning_rate": 1.969774426666149e-07, + "loss": 70.5218, + "step": 239940 + }, + { + "epoch": 0.9694283624963134, + "grad_norm": 972.0810546875, + "learning_rate": 1.9658667453058111e-07, + "loss": 83.1756, + "step": 239950 + }, + { + "epoch": 0.969468763761681, + "grad_norm": 378.3077392578125, + "learning_rate": 1.9619629247228823e-07, + "loss": 77.8982, + "step": 239960 + }, + { + "epoch": 0.9695091650270486, + "grad_norm": 564.8917236328125, + "learning_rate": 1.9580629649934346e-07, + "loss": 69.5867, + "step": 239970 + }, + { + "epoch": 0.9695495662924163, + "grad_norm": 988.4263916015625, + "learning_rate": 1.9541668661934965e-07, + "loss": 88.8734, + "step": 239980 + }, + { + "epoch": 0.9695899675577839, + "grad_norm": 547.7122802734375, + "learning_rate": 1.9502746283990514e-07, + "loss": 65.3936, + "step": 239990 + }, + { + "epoch": 0.9696303688231516, + "grad_norm": 1081.6358642578125, + "learning_rate": 1.9463862516859277e-07, + "loss": 70.1006, + "step": 240000 + }, + { + "epoch": 0.9696707700885192, + "grad_norm": 1829.4698486328125, + "learning_rate": 1.9425017361300203e-07, + "loss": 78.1309, + "step": 240010 + }, + { + "epoch": 0.9697111713538868, + "grad_norm": 525.6143188476562, + "learning_rate": 1.9386210818069795e-07, + "loss": 59.6731, + "step": 240020 + }, + { + "epoch": 0.9697515726192545, + "grad_norm": 643.3572998046875, + "learning_rate": 1.9347442887924783e-07, + "loss": 49.6877, + "step": 240030 + }, + { + "epoch": 0.969791973884622, + "grad_norm": 1180.4326171875, + "learning_rate": 1.9308713571621228e-07, + "loss": 60.7889, + "step": 240040 + }, + { + "epoch": 0.9698323751499897, + "grad_norm": 404.8629150390625, + "learning_rate": 1.9270022869914306e-07, + "loss": 62.0819, + "step": 240050 + }, + { + "epoch": 0.9698727764153573, + "grad_norm": 795.3435668945312, + "learning_rate": 1.9231370783557636e-07, + "loss": 59.6359, + "step": 240060 + }, + { + "epoch": 0.9699131776807249, + "grad_norm": 818.3235473632812, + "learning_rate": 1.9192757313305277e-07, + "loss": 61.722, + "step": 240070 + }, + { + "epoch": 0.9699535789460926, + "grad_norm": 415.1429443359375, + "learning_rate": 1.9154182459909963e-07, + "loss": 52.7958, + "step": 240080 + }, + { + "epoch": 0.9699939802114602, + "grad_norm": 622.2362060546875, + "learning_rate": 1.911564622412354e-07, + "loss": 60.1236, + "step": 240090 + }, + { + "epoch": 0.9700343814768279, + "grad_norm": 874.8026733398438, + "learning_rate": 1.9077148606697627e-07, + "loss": 86.4543, + "step": 240100 + }, + { + "epoch": 0.9700747827421955, + "grad_norm": 398.1099548339844, + "learning_rate": 1.9038689608382287e-07, + "loss": 66.6794, + "step": 240110 + }, + { + "epoch": 0.9701151840075631, + "grad_norm": 655.59033203125, + "learning_rate": 1.9000269229927816e-07, + "loss": 51.6716, + "step": 240120 + }, + { + "epoch": 0.9701555852729308, + "grad_norm": 665.3683471679688, + "learning_rate": 1.896188747208294e-07, + "loss": 54.9724, + "step": 240130 + }, + { + "epoch": 0.9701959865382984, + "grad_norm": 492.30731201171875, + "learning_rate": 1.8923544335595957e-07, + "loss": 96.7535, + "step": 240140 + }, + { + "epoch": 0.9702363878036661, + "grad_norm": 824.9630737304688, + "learning_rate": 1.8885239821214262e-07, + "loss": 56.9772, + "step": 240150 + }, + { + "epoch": 0.9702767890690337, + "grad_norm": 551.35693359375, + "learning_rate": 1.884697392968482e-07, + "loss": 78.8713, + "step": 240160 + }, + { + "epoch": 0.9703171903344012, + "grad_norm": 590.560302734375, + "learning_rate": 1.8808746661753697e-07, + "loss": 50.5464, + "step": 240170 + }, + { + "epoch": 0.9703575915997689, + "grad_norm": 304.7107849121094, + "learning_rate": 1.8770558018166073e-07, + "loss": 47.5975, + "step": 240180 + }, + { + "epoch": 0.9703979928651365, + "grad_norm": 583.9700317382812, + "learning_rate": 1.8732407999666247e-07, + "loss": 38.4376, + "step": 240190 + }, + { + "epoch": 0.9704383941305041, + "grad_norm": 859.7391967773438, + "learning_rate": 1.8694296606998286e-07, + "loss": 77.1135, + "step": 240200 + }, + { + "epoch": 0.9704787953958718, + "grad_norm": 1440.9993896484375, + "learning_rate": 1.865622384090493e-07, + "loss": 45.4382, + "step": 240210 + }, + { + "epoch": 0.9705191966612394, + "grad_norm": 820.0663452148438, + "learning_rate": 1.8618189702128476e-07, + "loss": 38.8416, + "step": 240220 + }, + { + "epoch": 0.9705595979266071, + "grad_norm": 827.3706665039062, + "learning_rate": 1.8580194191410772e-07, + "loss": 59.0525, + "step": 240230 + }, + { + "epoch": 0.9705999991919747, + "grad_norm": 788.7394409179688, + "learning_rate": 1.8542237309492117e-07, + "loss": 72.7045, + "step": 240240 + }, + { + "epoch": 0.9706404004573423, + "grad_norm": 284.4632568359375, + "learning_rate": 1.8504319057112808e-07, + "loss": 81.1323, + "step": 240250 + }, + { + "epoch": 0.97068080172271, + "grad_norm": 1059.7027587890625, + "learning_rate": 1.846643943501203e-07, + "loss": 80.9485, + "step": 240260 + }, + { + "epoch": 0.9707212029880776, + "grad_norm": 534.7786865234375, + "learning_rate": 1.8428598443927857e-07, + "loss": 64.1833, + "step": 240270 + }, + { + "epoch": 0.9707616042534453, + "grad_norm": 690.102783203125, + "learning_rate": 1.8390796084598596e-07, + "loss": 71.871, + "step": 240280 + }, + { + "epoch": 0.9708020055188128, + "grad_norm": 803.0875854492188, + "learning_rate": 1.8353032357760536e-07, + "loss": 47.0464, + "step": 240290 + }, + { + "epoch": 0.9708424067841804, + "grad_norm": 841.7188720703125, + "learning_rate": 1.831530726415087e-07, + "loss": 89.6536, + "step": 240300 + }, + { + "epoch": 0.9708828080495481, + "grad_norm": 742.2428588867188, + "learning_rate": 1.8277620804504347e-07, + "loss": 73.4126, + "step": 240310 + }, + { + "epoch": 0.9709232093149157, + "grad_norm": 677.9320678710938, + "learning_rate": 1.823997297955571e-07, + "loss": 74.5443, + "step": 240320 + }, + { + "epoch": 0.9709636105802834, + "grad_norm": 547.9056396484375, + "learning_rate": 1.8202363790039258e-07, + "loss": 62.0698, + "step": 240330 + }, + { + "epoch": 0.971004011845651, + "grad_norm": 442.29180908203125, + "learning_rate": 1.8164793236687962e-07, + "loss": 45.0719, + "step": 240340 + }, + { + "epoch": 0.9710444131110186, + "grad_norm": 698.0214233398438, + "learning_rate": 1.8127261320234345e-07, + "loss": 83.0123, + "step": 240350 + }, + { + "epoch": 0.9710848143763863, + "grad_norm": 606.3641967773438, + "learning_rate": 1.808976804141005e-07, + "loss": 75.315, + "step": 240360 + }, + { + "epoch": 0.9711252156417539, + "grad_norm": 674.1475219726562, + "learning_rate": 1.8052313400946042e-07, + "loss": 62.0637, + "step": 240370 + }, + { + "epoch": 0.9711656169071216, + "grad_norm": 841.340087890625, + "learning_rate": 1.801489739957263e-07, + "loss": 107.4836, + "step": 240380 + }, + { + "epoch": 0.9712060181724892, + "grad_norm": 909.1239013671875, + "learning_rate": 1.7977520038019446e-07, + "loss": 60.769, + "step": 240390 + }, + { + "epoch": 0.9712464194378568, + "grad_norm": 451.76422119140625, + "learning_rate": 1.7940181317014583e-07, + "loss": 66.5611, + "step": 240400 + }, + { + "epoch": 0.9712868207032245, + "grad_norm": 434.9193115234375, + "learning_rate": 1.7902881237286341e-07, + "loss": 55.8109, + "step": 240410 + }, + { + "epoch": 0.971327221968592, + "grad_norm": 728.195068359375, + "learning_rate": 1.7865619799561918e-07, + "loss": 42.8571, + "step": 240420 + }, + { + "epoch": 0.9713676232339596, + "grad_norm": 701.6430053710938, + "learning_rate": 1.7828397004567843e-07, + "loss": 38.5429, + "step": 240430 + }, + { + "epoch": 0.9714080244993273, + "grad_norm": 918.7395629882812, + "learning_rate": 1.7791212853029538e-07, + "loss": 73.2264, + "step": 240440 + }, + { + "epoch": 0.9714484257646949, + "grad_norm": 639.4662475585938, + "learning_rate": 1.7754067345671976e-07, + "loss": 87.4399, + "step": 240450 + }, + { + "epoch": 0.9714888270300626, + "grad_norm": 768.2753295898438, + "learning_rate": 1.771696048321969e-07, + "loss": 79.4394, + "step": 240460 + }, + { + "epoch": 0.9715292282954302, + "grad_norm": 688.4873046875, + "learning_rate": 1.7679892266395437e-07, + "loss": 80.3364, + "step": 240470 + }, + { + "epoch": 0.9715696295607978, + "grad_norm": 576.0776977539062, + "learning_rate": 1.7642862695922415e-07, + "loss": 50.3719, + "step": 240480 + }, + { + "epoch": 0.9716100308261655, + "grad_norm": 1064.277587890625, + "learning_rate": 1.7605871772522488e-07, + "loss": 53.6109, + "step": 240490 + }, + { + "epoch": 0.9716504320915331, + "grad_norm": 379.2320861816406, + "learning_rate": 1.7568919496916636e-07, + "loss": 58.485, + "step": 240500 + }, + { + "epoch": 0.9716908333569008, + "grad_norm": 830.1602783203125, + "learning_rate": 1.7532005869825175e-07, + "loss": 80.1504, + "step": 240510 + }, + { + "epoch": 0.9717312346222684, + "grad_norm": 843.4537353515625, + "learning_rate": 1.749513089196797e-07, + "loss": 78.5198, + "step": 240520 + }, + { + "epoch": 0.971771635887636, + "grad_norm": 474.85552978515625, + "learning_rate": 1.7458294564064004e-07, + "loss": 57.016, + "step": 240530 + }, + { + "epoch": 0.9718120371530037, + "grad_norm": 741.2623901367188, + "learning_rate": 1.7421496886831147e-07, + "loss": 68.0703, + "step": 240540 + }, + { + "epoch": 0.9718524384183712, + "grad_norm": 354.6985168457031, + "learning_rate": 1.7384737860987045e-07, + "loss": 41.8849, + "step": 240550 + }, + { + "epoch": 0.9718928396837389, + "grad_norm": 872.3302001953125, + "learning_rate": 1.7348017487247793e-07, + "loss": 50.4893, + "step": 240560 + }, + { + "epoch": 0.9719332409491065, + "grad_norm": 1181.014892578125, + "learning_rate": 1.7311335766330152e-07, + "loss": 71.0713, + "step": 240570 + }, + { + "epoch": 0.9719736422144741, + "grad_norm": 516.3545532226562, + "learning_rate": 1.7274692698948436e-07, + "loss": 76.1352, + "step": 240580 + }, + { + "epoch": 0.9720140434798418, + "grad_norm": 532.1878051757812, + "learning_rate": 1.7238088285817634e-07, + "loss": 43.2859, + "step": 240590 + }, + { + "epoch": 0.9720544447452094, + "grad_norm": 566.7020874023438, + "learning_rate": 1.720152252765095e-07, + "loss": 76.0906, + "step": 240600 + }, + { + "epoch": 0.972094846010577, + "grad_norm": 1053.009033203125, + "learning_rate": 1.7164995425161368e-07, + "loss": 63.7436, + "step": 240610 + }, + { + "epoch": 0.9721352472759447, + "grad_norm": 1027.7835693359375, + "learning_rate": 1.7128506979060989e-07, + "loss": 63.4823, + "step": 240620 + }, + { + "epoch": 0.9721756485413123, + "grad_norm": 537.8633422851562, + "learning_rate": 1.7092057190061461e-07, + "loss": 96.2466, + "step": 240630 + }, + { + "epoch": 0.97221604980668, + "grad_norm": 565.6464233398438, + "learning_rate": 1.7055646058872888e-07, + "loss": 55.3855, + "step": 240640 + }, + { + "epoch": 0.9722564510720476, + "grad_norm": 623.0969848632812, + "learning_rate": 1.7019273586205366e-07, + "loss": 81.4318, + "step": 240650 + }, + { + "epoch": 0.9722968523374153, + "grad_norm": 593.8271484375, + "learning_rate": 1.6982939772768324e-07, + "loss": 58.3769, + "step": 240660 + }, + { + "epoch": 0.9723372536027829, + "grad_norm": 355.52630615234375, + "learning_rate": 1.6946644619269647e-07, + "loss": 34.4476, + "step": 240670 + }, + { + "epoch": 0.9723776548681504, + "grad_norm": 489.40899658203125, + "learning_rate": 1.6910388126416988e-07, + "loss": 52.6867, + "step": 240680 + }, + { + "epoch": 0.9724180561335181, + "grad_norm": 746.28125, + "learning_rate": 1.6874170294917336e-07, + "loss": 85.6625, + "step": 240690 + }, + { + "epoch": 0.9724584573988857, + "grad_norm": 622.705810546875, + "learning_rate": 1.6837991125476572e-07, + "loss": 61.3725, + "step": 240700 + }, + { + "epoch": 0.9724988586642533, + "grad_norm": 533.9436645507812, + "learning_rate": 1.6801850618800354e-07, + "loss": 55.4334, + "step": 240710 + }, + { + "epoch": 0.972539259929621, + "grad_norm": 1190.5240478515625, + "learning_rate": 1.6765748775593005e-07, + "loss": 89.5726, + "step": 240720 + }, + { + "epoch": 0.9725796611949886, + "grad_norm": 184.86891174316406, + "learning_rate": 1.672968559655841e-07, + "loss": 94.4267, + "step": 240730 + }, + { + "epoch": 0.9726200624603563, + "grad_norm": 262.43310546875, + "learning_rate": 1.669366108239978e-07, + "loss": 60.6137, + "step": 240740 + }, + { + "epoch": 0.9726604637257239, + "grad_norm": 729.456298828125, + "learning_rate": 1.6657675233819225e-07, + "loss": 53.8576, + "step": 240750 + }, + { + "epoch": 0.9727008649910915, + "grad_norm": 255.04217529296875, + "learning_rate": 1.6621728051518182e-07, + "loss": 65.7865, + "step": 240760 + }, + { + "epoch": 0.9727412662564592, + "grad_norm": 945.2756958007812, + "learning_rate": 1.6585819536197868e-07, + "loss": 73.5392, + "step": 240770 + }, + { + "epoch": 0.9727816675218268, + "grad_norm": 1564.8101806640625, + "learning_rate": 1.6549949688558165e-07, + "loss": 127.7612, + "step": 240780 + }, + { + "epoch": 0.9728220687871945, + "grad_norm": 289.87060546875, + "learning_rate": 1.6514118509298293e-07, + "loss": 68.5775, + "step": 240790 + }, + { + "epoch": 0.9728624700525621, + "grad_norm": 747.6844482421875, + "learning_rate": 1.6478325999116806e-07, + "loss": 42.843, + "step": 240800 + }, + { + "epoch": 0.9729028713179296, + "grad_norm": 994.371337890625, + "learning_rate": 1.644257215871159e-07, + "loss": 83.1694, + "step": 240810 + }, + { + "epoch": 0.9729432725832973, + "grad_norm": 473.51031494140625, + "learning_rate": 1.640685698877964e-07, + "loss": 65.4437, + "step": 240820 + }, + { + "epoch": 0.9729836738486649, + "grad_norm": 411.5533752441406, + "learning_rate": 1.6371180490017292e-07, + "loss": 87.0783, + "step": 240830 + }, + { + "epoch": 0.9730240751140325, + "grad_norm": 430.9603271484375, + "learning_rate": 1.6335542663119985e-07, + "loss": 62.7952, + "step": 240840 + }, + { + "epoch": 0.9730644763794002, + "grad_norm": 151.275390625, + "learning_rate": 1.6299943508782501e-07, + "loss": 63.4214, + "step": 240850 + }, + { + "epoch": 0.9731048776447678, + "grad_norm": 1117.3607177734375, + "learning_rate": 1.6264383027698727e-07, + "loss": 95.7586, + "step": 240860 + }, + { + "epoch": 0.9731452789101355, + "grad_norm": 511.2049865722656, + "learning_rate": 1.6228861220562553e-07, + "loss": 58.6799, + "step": 240870 + }, + { + "epoch": 0.9731856801755031, + "grad_norm": 936.7846069335938, + "learning_rate": 1.6193378088065425e-07, + "loss": 78.9578, + "step": 240880 + }, + { + "epoch": 0.9732260814408707, + "grad_norm": 539.4894409179688, + "learning_rate": 1.6157933630900124e-07, + "loss": 58.7451, + "step": 240890 + }, + { + "epoch": 0.9732664827062384, + "grad_norm": 868.8643188476562, + "learning_rate": 1.6122527849757208e-07, + "loss": 76.6452, + "step": 240900 + }, + { + "epoch": 0.973306883971606, + "grad_norm": 496.6718444824219, + "learning_rate": 1.6087160745327014e-07, + "loss": 64.655, + "step": 240910 + }, + { + "epoch": 0.9733472852369737, + "grad_norm": 326.17803955078125, + "learning_rate": 1.605183231829899e-07, + "loss": 72.1982, + "step": 240920 + }, + { + "epoch": 0.9733876865023412, + "grad_norm": 527.7848510742188, + "learning_rate": 1.6016542569361692e-07, + "loss": 56.1801, + "step": 240930 + }, + { + "epoch": 0.9734280877677088, + "grad_norm": 295.9010314941406, + "learning_rate": 1.5981291499203687e-07, + "loss": 51.4536, + "step": 240940 + }, + { + "epoch": 0.9734684890330765, + "grad_norm": 456.5440368652344, + "learning_rate": 1.5946079108511536e-07, + "loss": 52.9763, + "step": 240950 + }, + { + "epoch": 0.9735088902984441, + "grad_norm": 564.4613037109375, + "learning_rate": 1.5910905397972244e-07, + "loss": 52.3563, + "step": 240960 + }, + { + "epoch": 0.9735492915638118, + "grad_norm": 836.3871459960938, + "learning_rate": 1.5875770368271258e-07, + "loss": 103.5377, + "step": 240970 + }, + { + "epoch": 0.9735896928291794, + "grad_norm": 1324.525390625, + "learning_rate": 1.5840674020093372e-07, + "loss": 103.3592, + "step": 240980 + }, + { + "epoch": 0.973630094094547, + "grad_norm": 1134.7835693359375, + "learning_rate": 1.5805616354123365e-07, + "loss": 69.9137, + "step": 240990 + }, + { + "epoch": 0.9736704953599147, + "grad_norm": 512.989501953125, + "learning_rate": 1.577059737104447e-07, + "loss": 66.0363, + "step": 241000 + }, + { + "epoch": 0.9737108966252823, + "grad_norm": 1187.2669677734375, + "learning_rate": 1.5735617071539032e-07, + "loss": 64.6944, + "step": 241010 + }, + { + "epoch": 0.97375129789065, + "grad_norm": 287.91412353515625, + "learning_rate": 1.5700675456289392e-07, + "loss": 86.9744, + "step": 241020 + }, + { + "epoch": 0.9737916991560176, + "grad_norm": 666.0401000976562, + "learning_rate": 1.5665772525976786e-07, + "loss": 57.2132, + "step": 241030 + }, + { + "epoch": 0.9738321004213852, + "grad_norm": 1034.9267578125, + "learning_rate": 1.5630908281281555e-07, + "loss": 55.3928, + "step": 241040 + }, + { + "epoch": 0.9738725016867529, + "grad_norm": 518.3536987304688, + "learning_rate": 1.5596082722883377e-07, + "loss": 43.4488, + "step": 241050 + }, + { + "epoch": 0.9739129029521204, + "grad_norm": 556.803466796875, + "learning_rate": 1.5561295851461046e-07, + "loss": 39.0185, + "step": 241060 + }, + { + "epoch": 0.973953304217488, + "grad_norm": 845.3297729492188, + "learning_rate": 1.5526547667693125e-07, + "loss": 62.043, + "step": 241070 + }, + { + "epoch": 0.9739937054828557, + "grad_norm": 619.6710205078125, + "learning_rate": 1.5491838172256634e-07, + "loss": 68.0527, + "step": 241080 + }, + { + "epoch": 0.9740341067482233, + "grad_norm": 671.1410522460938, + "learning_rate": 1.5457167365828584e-07, + "loss": 82.5342, + "step": 241090 + }, + { + "epoch": 0.974074508013591, + "grad_norm": 1132.6358642578125, + "learning_rate": 1.542253524908466e-07, + "loss": 77.5392, + "step": 241100 + }, + { + "epoch": 0.9741149092789586, + "grad_norm": 615.5963745117188, + "learning_rate": 1.5387941822700314e-07, + "loss": 68.676, + "step": 241110 + }, + { + "epoch": 0.9741553105443262, + "grad_norm": 397.6839294433594, + "learning_rate": 1.535338708734968e-07, + "loss": 69.9304, + "step": 241120 + }, + { + "epoch": 0.9741957118096939, + "grad_norm": 628.4906616210938, + "learning_rate": 1.531887104370644e-07, + "loss": 53.565, + "step": 241130 + }, + { + "epoch": 0.9742361130750615, + "grad_norm": 1203.1715087890625, + "learning_rate": 1.5284393692443833e-07, + "loss": 55.0489, + "step": 241140 + }, + { + "epoch": 0.9742765143404292, + "grad_norm": 509.1058349609375, + "learning_rate": 1.5249955034233544e-07, + "loss": 76.818, + "step": 241150 + }, + { + "epoch": 0.9743169156057968, + "grad_norm": 891.6856689453125, + "learning_rate": 1.5215555069747479e-07, + "loss": 64.6543, + "step": 241160 + }, + { + "epoch": 0.9743573168711644, + "grad_norm": 1088.8846435546875, + "learning_rate": 1.5181193799655546e-07, + "loss": 66.1383, + "step": 241170 + }, + { + "epoch": 0.9743977181365321, + "grad_norm": 384.4014587402344, + "learning_rate": 1.5146871224628545e-07, + "loss": 60.8488, + "step": 241180 + }, + { + "epoch": 0.9744381194018996, + "grad_norm": 820.3665161132812, + "learning_rate": 1.5112587345334827e-07, + "loss": 72.7001, + "step": 241190 + }, + { + "epoch": 0.9744785206672673, + "grad_norm": 746.9617919921875, + "learning_rate": 1.5078342162443195e-07, + "loss": 67.3102, + "step": 241200 + }, + { + "epoch": 0.9745189219326349, + "grad_norm": 539.9096069335938, + "learning_rate": 1.504413567662133e-07, + "loss": 116.7941, + "step": 241210 + }, + { + "epoch": 0.9745593231980025, + "grad_norm": 923.7551879882812, + "learning_rate": 1.5009967888535814e-07, + "loss": 63.2722, + "step": 241220 + }, + { + "epoch": 0.9745997244633702, + "grad_norm": 1034.20068359375, + "learning_rate": 1.4975838798853005e-07, + "loss": 108.2853, + "step": 241230 + }, + { + "epoch": 0.9746401257287378, + "grad_norm": 934.9916381835938, + "learning_rate": 1.4941748408238145e-07, + "loss": 59.4688, + "step": 241240 + }, + { + "epoch": 0.9746805269941055, + "grad_norm": 568.2880249023438, + "learning_rate": 1.4907696717356035e-07, + "loss": 63.5036, + "step": 241250 + }, + { + "epoch": 0.9747209282594731, + "grad_norm": 957.8430786132812, + "learning_rate": 1.4873683726870146e-07, + "loss": 102.3552, + "step": 241260 + }, + { + "epoch": 0.9747613295248407, + "grad_norm": 488.8724670410156, + "learning_rate": 1.4839709437443727e-07, + "loss": 56.8039, + "step": 241270 + }, + { + "epoch": 0.9748017307902084, + "grad_norm": 632.8944091796875, + "learning_rate": 1.480577384973958e-07, + "loss": 67.5738, + "step": 241280 + }, + { + "epoch": 0.974842132055576, + "grad_norm": 779.609375, + "learning_rate": 1.477187696441873e-07, + "loss": 103.5953, + "step": 241290 + }, + { + "epoch": 0.9748825333209437, + "grad_norm": 830.3026733398438, + "learning_rate": 1.4738018782141984e-07, + "loss": 62.9917, + "step": 241300 + }, + { + "epoch": 0.9749229345863113, + "grad_norm": 447.0550231933594, + "learning_rate": 1.470419930356992e-07, + "loss": 63.8094, + "step": 241310 + }, + { + "epoch": 0.9749633358516788, + "grad_norm": 1479.368896484375, + "learning_rate": 1.4670418529361574e-07, + "loss": 77.9393, + "step": 241320 + }, + { + "epoch": 0.9750037371170465, + "grad_norm": 321.82196044921875, + "learning_rate": 1.4636676460175303e-07, + "loss": 71.3396, + "step": 241330 + }, + { + "epoch": 0.9750441383824141, + "grad_norm": 586.1045532226562, + "learning_rate": 1.460297309666947e-07, + "loss": 63.108, + "step": 241340 + }, + { + "epoch": 0.9750845396477817, + "grad_norm": 626.8998413085938, + "learning_rate": 1.4569308439500664e-07, + "loss": 77.297, + "step": 241350 + }, + { + "epoch": 0.9751249409131494, + "grad_norm": 769.2272338867188, + "learning_rate": 1.4535682489325243e-07, + "loss": 79.631, + "step": 241360 + }, + { + "epoch": 0.975165342178517, + "grad_norm": 483.20745849609375, + "learning_rate": 1.450209524679891e-07, + "loss": 89.4462, + "step": 241370 + }, + { + "epoch": 0.9752057434438847, + "grad_norm": 896.4191284179688, + "learning_rate": 1.4468546712576692e-07, + "loss": 56.7267, + "step": 241380 + }, + { + "epoch": 0.9752461447092523, + "grad_norm": 943.5224609375, + "learning_rate": 1.4435036887312292e-07, + "loss": 96.5323, + "step": 241390 + }, + { + "epoch": 0.97528654597462, + "grad_norm": 847.03955078125, + "learning_rate": 1.4401565771658965e-07, + "loss": 69.0555, + "step": 241400 + }, + { + "epoch": 0.9753269472399876, + "grad_norm": 560.2784423828125, + "learning_rate": 1.43681333662693e-07, + "loss": 49.5203, + "step": 241410 + }, + { + "epoch": 0.9753673485053552, + "grad_norm": 723.2224731445312, + "learning_rate": 1.4334739671795217e-07, + "loss": 85.0161, + "step": 241420 + }, + { + "epoch": 0.9754077497707229, + "grad_norm": 690.346923828125, + "learning_rate": 1.4301384688887755e-07, + "loss": 72.8849, + "step": 241430 + }, + { + "epoch": 0.9754481510360905, + "grad_norm": 315.0877380371094, + "learning_rate": 1.4268068418197057e-07, + "loss": 40.6359, + "step": 241440 + }, + { + "epoch": 0.975488552301458, + "grad_norm": 546.4666748046875, + "learning_rate": 1.4234790860372827e-07, + "loss": 49.4746, + "step": 241450 + }, + { + "epoch": 0.9755289535668257, + "grad_norm": 833.85107421875, + "learning_rate": 1.4201552016063434e-07, + "loss": 64.3427, + "step": 241460 + }, + { + "epoch": 0.9755693548321933, + "grad_norm": 357.6621398925781, + "learning_rate": 1.4168351885917253e-07, + "loss": 40.1998, + "step": 241470 + }, + { + "epoch": 0.975609756097561, + "grad_norm": 430.1158142089844, + "learning_rate": 1.4135190470581538e-07, + "loss": 54.1172, + "step": 241480 + }, + { + "epoch": 0.9756501573629286, + "grad_norm": 841.9488525390625, + "learning_rate": 1.4102067770702443e-07, + "loss": 74.3591, + "step": 241490 + }, + { + "epoch": 0.9756905586282962, + "grad_norm": 577.3983154296875, + "learning_rate": 1.4068983786926117e-07, + "loss": 88.5444, + "step": 241500 + }, + { + "epoch": 0.9757309598936639, + "grad_norm": 860.0238037109375, + "learning_rate": 1.4035938519897152e-07, + "loss": 45.5844, + "step": 241510 + }, + { + "epoch": 0.9757713611590315, + "grad_norm": 1983.9759521484375, + "learning_rate": 1.400293197026037e-07, + "loss": 112.5443, + "step": 241520 + }, + { + "epoch": 0.9758117624243992, + "grad_norm": 879.3599243164062, + "learning_rate": 1.396996413865881e-07, + "loss": 45.5627, + "step": 241530 + }, + { + "epoch": 0.9758521636897668, + "grad_norm": 588.695068359375, + "learning_rate": 1.393703502573507e-07, + "loss": 53.1693, + "step": 241540 + }, + { + "epoch": 0.9758925649551344, + "grad_norm": 719.10791015625, + "learning_rate": 1.390414463213152e-07, + "loss": 47.0632, + "step": 241550 + }, + { + "epoch": 0.9759329662205021, + "grad_norm": 707.285888671875, + "learning_rate": 1.3871292958488992e-07, + "loss": 45.2965, + "step": 241560 + }, + { + "epoch": 0.9759733674858696, + "grad_norm": 784.40185546875, + "learning_rate": 1.3838480005448295e-07, + "loss": 71.5274, + "step": 241570 + }, + { + "epoch": 0.9760137687512372, + "grad_norm": 622.0474243164062, + "learning_rate": 1.3805705773648924e-07, + "loss": 54.9772, + "step": 241580 + }, + { + "epoch": 0.9760541700166049, + "grad_norm": 686.4695434570312, + "learning_rate": 1.3772970263729923e-07, + "loss": 81.2897, + "step": 241590 + }, + { + "epoch": 0.9760945712819725, + "grad_norm": 1589.612060546875, + "learning_rate": 1.3740273476329224e-07, + "loss": 81.0396, + "step": 241600 + }, + { + "epoch": 0.9761349725473402, + "grad_norm": 871.5650634765625, + "learning_rate": 1.3707615412084763e-07, + "loss": 67.6318, + "step": 241610 + }, + { + "epoch": 0.9761753738127078, + "grad_norm": 940.317138671875, + "learning_rate": 1.367499607163292e-07, + "loss": 61.876, + "step": 241620 + }, + { + "epoch": 0.9762157750780754, + "grad_norm": 587.5680541992188, + "learning_rate": 1.3642415455609625e-07, + "loss": 60.6986, + "step": 241630 + }, + { + "epoch": 0.9762561763434431, + "grad_norm": 687.8201904296875, + "learning_rate": 1.3609873564650155e-07, + "loss": 57.537, + "step": 241640 + }, + { + "epoch": 0.9762965776088107, + "grad_norm": 399.52972412109375, + "learning_rate": 1.3577370399388444e-07, + "loss": 43.4462, + "step": 241650 + }, + { + "epoch": 0.9763369788741784, + "grad_norm": 286.0831604003906, + "learning_rate": 1.3544905960459098e-07, + "loss": 76.7701, + "step": 241660 + }, + { + "epoch": 0.976377380139546, + "grad_norm": 365.4721984863281, + "learning_rate": 1.3512480248494276e-07, + "loss": 42.112, + "step": 241670 + }, + { + "epoch": 0.9764177814049136, + "grad_norm": 840.369140625, + "learning_rate": 1.3480093264126359e-07, + "loss": 69.3421, + "step": 241680 + }, + { + "epoch": 0.9764581826702813, + "grad_norm": 1049.4903564453125, + "learning_rate": 1.3447745007986844e-07, + "loss": 99.538, + "step": 241690 + }, + { + "epoch": 0.9764985839356488, + "grad_norm": 459.81390380859375, + "learning_rate": 1.3415435480706118e-07, + "loss": 49.4776, + "step": 241700 + }, + { + "epoch": 0.9765389852010165, + "grad_norm": 250.27896118164062, + "learning_rate": 1.338316468291456e-07, + "loss": 43.0766, + "step": 241710 + }, + { + "epoch": 0.9765793864663841, + "grad_norm": 778.7449340820312, + "learning_rate": 1.3350932615240563e-07, + "loss": 69.3938, + "step": 241720 + }, + { + "epoch": 0.9766197877317517, + "grad_norm": 1142.8193359375, + "learning_rate": 1.3318739278313176e-07, + "loss": 79.1882, + "step": 241730 + }, + { + "epoch": 0.9766601889971194, + "grad_norm": 1218.2408447265625, + "learning_rate": 1.328658467275967e-07, + "loss": 64.6144, + "step": 241740 + }, + { + "epoch": 0.976700590262487, + "grad_norm": 288.0774230957031, + "learning_rate": 1.325446879920711e-07, + "loss": 68.0282, + "step": 241750 + }, + { + "epoch": 0.9767409915278547, + "grad_norm": 618.6679077148438, + "learning_rate": 1.3222391658281652e-07, + "loss": 53.2939, + "step": 241760 + }, + { + "epoch": 0.9767813927932223, + "grad_norm": 1004.6763916015625, + "learning_rate": 1.3190353250608356e-07, + "loss": 68.4528, + "step": 241770 + }, + { + "epoch": 0.9768217940585899, + "grad_norm": 244.65829467773438, + "learning_rate": 1.3158353576812055e-07, + "loss": 44.4855, + "step": 241780 + }, + { + "epoch": 0.9768621953239576, + "grad_norm": 867.466796875, + "learning_rate": 1.312639263751647e-07, + "loss": 48.0175, + "step": 241790 + }, + { + "epoch": 0.9769025965893252, + "grad_norm": 501.9642028808594, + "learning_rate": 1.3094470433344886e-07, + "loss": 55.1853, + "step": 241800 + }, + { + "epoch": 0.9769429978546929, + "grad_norm": 667.4525146484375, + "learning_rate": 1.3062586964919466e-07, + "loss": 51.0024, + "step": 241810 + }, + { + "epoch": 0.9769833991200605, + "grad_norm": 655.6387939453125, + "learning_rate": 1.3030742232861937e-07, + "loss": 94.6192, + "step": 241820 + }, + { + "epoch": 0.977023800385428, + "grad_norm": 530.9959106445312, + "learning_rate": 1.2998936237792914e-07, + "loss": 35.7354, + "step": 241830 + }, + { + "epoch": 0.9770642016507957, + "grad_norm": 546.5045166015625, + "learning_rate": 1.2967168980332568e-07, + "loss": 43.0903, + "step": 241840 + }, + { + "epoch": 0.9771046029161633, + "grad_norm": 738.7405395507812, + "learning_rate": 1.29354404611004e-07, + "loss": 60.8502, + "step": 241850 + }, + { + "epoch": 0.9771450041815309, + "grad_norm": 673.092041015625, + "learning_rate": 1.290375068071481e-07, + "loss": 72.2773, + "step": 241860 + }, + { + "epoch": 0.9771854054468986, + "grad_norm": 815.2345581054688, + "learning_rate": 1.2872099639793522e-07, + "loss": 65.5121, + "step": 241870 + }, + { + "epoch": 0.9772258067122662, + "grad_norm": 667.307373046875, + "learning_rate": 1.2840487338953823e-07, + "loss": 75.511, + "step": 241880 + }, + { + "epoch": 0.9772662079776339, + "grad_norm": 443.1006164550781, + "learning_rate": 1.280891377881188e-07, + "loss": 62.4731, + "step": 241890 + }, + { + "epoch": 0.9773066092430015, + "grad_norm": 910.4033813476562, + "learning_rate": 1.2777378959983212e-07, + "loss": 88.5248, + "step": 241900 + }, + { + "epoch": 0.9773470105083691, + "grad_norm": 514.3375854492188, + "learning_rate": 1.274588288308265e-07, + "loss": 45.65, + "step": 241910 + }, + { + "epoch": 0.9773874117737368, + "grad_norm": 593.6806640625, + "learning_rate": 1.2714425548724153e-07, + "loss": 83.1789, + "step": 241920 + }, + { + "epoch": 0.9774278130391044, + "grad_norm": 518.370361328125, + "learning_rate": 1.2683006957521226e-07, + "loss": 36.2255, + "step": 241930 + }, + { + "epoch": 0.9774682143044721, + "grad_norm": 1082.47119140625, + "learning_rate": 1.265162711008583e-07, + "loss": 67.2249, + "step": 241940 + }, + { + "epoch": 0.9775086155698397, + "grad_norm": 513.029052734375, + "learning_rate": 1.2620286007030358e-07, + "loss": 58.6939, + "step": 241950 + }, + { + "epoch": 0.9775490168352072, + "grad_norm": 927.8790893554688, + "learning_rate": 1.2588983648965658e-07, + "loss": 53.7938, + "step": 241960 + }, + { + "epoch": 0.9775894181005749, + "grad_norm": 648.1143188476562, + "learning_rate": 1.2557720036501908e-07, + "loss": 52.7858, + "step": 241970 + }, + { + "epoch": 0.9776298193659425, + "grad_norm": 724.3201904296875, + "learning_rate": 1.252649517024862e-07, + "loss": 81.7216, + "step": 241980 + }, + { + "epoch": 0.9776702206313101, + "grad_norm": 394.4438781738281, + "learning_rate": 1.2495309050814198e-07, + "loss": 62.8394, + "step": 241990 + }, + { + "epoch": 0.9777106218966778, + "grad_norm": 508.3453063964844, + "learning_rate": 1.2464161678807262e-07, + "loss": 89.4998, + "step": 242000 + }, + { + "epoch": 0.9777510231620454, + "grad_norm": 651.3312377929688, + "learning_rate": 1.2433053054834665e-07, + "loss": 63.9369, + "step": 242010 + }, + { + "epoch": 0.9777914244274131, + "grad_norm": 883.4607543945312, + "learning_rate": 1.240198317950303e-07, + "loss": 47.8017, + "step": 242020 + }, + { + "epoch": 0.9778318256927807, + "grad_norm": 909.6490478515625, + "learning_rate": 1.2370952053417873e-07, + "loss": 84.8552, + "step": 242030 + }, + { + "epoch": 0.9778722269581483, + "grad_norm": 857.0239868164062, + "learning_rate": 1.2339959677184487e-07, + "loss": 79.7921, + "step": 242040 + }, + { + "epoch": 0.977912628223516, + "grad_norm": 823.301513671875, + "learning_rate": 1.2309006051406835e-07, + "loss": 78.5335, + "step": 242050 + }, + { + "epoch": 0.9779530294888836, + "grad_norm": 887.4210815429688, + "learning_rate": 1.2278091176688435e-07, + "loss": 102.7393, + "step": 242060 + }, + { + "epoch": 0.9779934307542513, + "grad_norm": 827.0256958007812, + "learning_rate": 1.2247215053632133e-07, + "loss": 61.0346, + "step": 242070 + }, + { + "epoch": 0.9780338320196189, + "grad_norm": 740.2754516601562, + "learning_rate": 1.2216377682839453e-07, + "loss": 64.1728, + "step": 242080 + }, + { + "epoch": 0.9780742332849864, + "grad_norm": 409.75433349609375, + "learning_rate": 1.2185579064912135e-07, + "loss": 41.568, + "step": 242090 + }, + { + "epoch": 0.9781146345503541, + "grad_norm": 585.8453979492188, + "learning_rate": 1.215481920045014e-07, + "loss": 58.3996, + "step": 242100 + }, + { + "epoch": 0.9781550358157217, + "grad_norm": 921.2176513671875, + "learning_rate": 1.2124098090053438e-07, + "loss": 102.05, + "step": 242110 + }, + { + "epoch": 0.9781954370810894, + "grad_norm": 853.3135375976562, + "learning_rate": 1.2093415734320878e-07, + "loss": 79.619, + "step": 242120 + }, + { + "epoch": 0.978235838346457, + "grad_norm": 499.5993957519531, + "learning_rate": 1.2062772133850653e-07, + "loss": 48.2903, + "step": 242130 + }, + { + "epoch": 0.9782762396118246, + "grad_norm": 846.5863037109375, + "learning_rate": 1.203216728924006e-07, + "loss": 49.6178, + "step": 242140 + }, + { + "epoch": 0.9783166408771923, + "grad_norm": 719.7883911132812, + "learning_rate": 1.2001601201085954e-07, + "loss": 111.8276, + "step": 242150 + }, + { + "epoch": 0.9783570421425599, + "grad_norm": 2354.08642578125, + "learning_rate": 1.1971073869984084e-07, + "loss": 61.9249, + "step": 242160 + }, + { + "epoch": 0.9783974434079276, + "grad_norm": 538.0729370117188, + "learning_rate": 1.194058529652975e-07, + "loss": 96.9726, + "step": 242170 + }, + { + "epoch": 0.9784378446732952, + "grad_norm": 865.9317016601562, + "learning_rate": 1.191013548131692e-07, + "loss": 64.2433, + "step": 242180 + }, + { + "epoch": 0.9784782459386628, + "grad_norm": 767.6415405273438, + "learning_rate": 1.1879724424939787e-07, + "loss": 65.4977, + "step": 242190 + }, + { + "epoch": 0.9785186472040305, + "grad_norm": 283.1065673828125, + "learning_rate": 1.1849352127990765e-07, + "loss": 53.3819, + "step": 242200 + }, + { + "epoch": 0.978559048469398, + "grad_norm": 343.9933776855469, + "learning_rate": 1.1819018591062492e-07, + "loss": 52.4081, + "step": 242210 + }, + { + "epoch": 0.9785994497347656, + "grad_norm": 1095.9114990234375, + "learning_rate": 1.1788723814745828e-07, + "loss": 76.0992, + "step": 242220 + }, + { + "epoch": 0.9786398510001333, + "grad_norm": 542.9095458984375, + "learning_rate": 1.1758467799631635e-07, + "loss": 65.0493, + "step": 242230 + }, + { + "epoch": 0.9786802522655009, + "grad_norm": 548.6416625976562, + "learning_rate": 1.1728250546309661e-07, + "loss": 78.7896, + "step": 242240 + }, + { + "epoch": 0.9787206535308686, + "grad_norm": 666.7401733398438, + "learning_rate": 1.1698072055369213e-07, + "loss": 85.5203, + "step": 242250 + }, + { + "epoch": 0.9787610547962362, + "grad_norm": 869.5223388671875, + "learning_rate": 1.166793232739849e-07, + "loss": 67.8039, + "step": 242260 + }, + { + "epoch": 0.9788014560616038, + "grad_norm": 618.5437622070312, + "learning_rate": 1.1637831362985019e-07, + "loss": 54.5165, + "step": 242270 + }, + { + "epoch": 0.9788418573269715, + "grad_norm": 688.8766479492188, + "learning_rate": 1.1607769162715665e-07, + "loss": 38.4375, + "step": 242280 + }, + { + "epoch": 0.9788822585923391, + "grad_norm": 280.96270751953125, + "learning_rate": 1.1577745727176625e-07, + "loss": 65.3113, + "step": 242290 + }, + { + "epoch": 0.9789226598577068, + "grad_norm": 624.2942504882812, + "learning_rate": 1.1547761056952989e-07, + "loss": 75.3675, + "step": 242300 + }, + { + "epoch": 0.9789630611230744, + "grad_norm": 493.1570739746094, + "learning_rate": 1.1517815152629619e-07, + "loss": 59.1935, + "step": 242310 + }, + { + "epoch": 0.979003462388442, + "grad_norm": 1030.673828125, + "learning_rate": 1.1487908014790272e-07, + "loss": 82.4092, + "step": 242320 + }, + { + "epoch": 0.9790438636538097, + "grad_norm": 530.0272216796875, + "learning_rate": 1.1458039644017816e-07, + "loss": 84.7378, + "step": 242330 + }, + { + "epoch": 0.9790842649191772, + "grad_norm": 1225.375, + "learning_rate": 1.1428210040894672e-07, + "loss": 64.7312, + "step": 242340 + }, + { + "epoch": 0.9791246661845449, + "grad_norm": 1083.844970703125, + "learning_rate": 1.1398419206002154e-07, + "loss": 64.9034, + "step": 242350 + }, + { + "epoch": 0.9791650674499125, + "grad_norm": 824.5458374023438, + "learning_rate": 1.1368667139921575e-07, + "loss": 62.0974, + "step": 242360 + }, + { + "epoch": 0.9792054687152801, + "grad_norm": 1552.5010986328125, + "learning_rate": 1.1338953843232469e-07, + "loss": 72.9025, + "step": 242370 + }, + { + "epoch": 0.9792458699806478, + "grad_norm": 258.69476318359375, + "learning_rate": 1.1309279316514376e-07, + "loss": 61.3686, + "step": 242380 + }, + { + "epoch": 0.9792862712460154, + "grad_norm": 861.3203735351562, + "learning_rate": 1.12796435603455e-07, + "loss": 78.4393, + "step": 242390 + }, + { + "epoch": 0.9793266725113831, + "grad_norm": 918.0736083984375, + "learning_rate": 1.1250046575304042e-07, + "loss": 54.3567, + "step": 242400 + }, + { + "epoch": 0.9793670737767507, + "grad_norm": 721.08935546875, + "learning_rate": 1.1220488361966875e-07, + "loss": 69.69, + "step": 242410 + }, + { + "epoch": 0.9794074750421183, + "grad_norm": 482.4394836425781, + "learning_rate": 1.1190968920909983e-07, + "loss": 61.0083, + "step": 242420 + }, + { + "epoch": 0.979447876307486, + "grad_norm": 880.8029174804688, + "learning_rate": 1.116148825270913e-07, + "loss": 63.6573, + "step": 242430 + }, + { + "epoch": 0.9794882775728536, + "grad_norm": 1077.541748046875, + "learning_rate": 1.1132046357938964e-07, + "loss": 84.227, + "step": 242440 + }, + { + "epoch": 0.9795286788382213, + "grad_norm": 690.1657104492188, + "learning_rate": 1.1102643237173471e-07, + "loss": 90.8331, + "step": 242450 + }, + { + "epoch": 0.9795690801035889, + "grad_norm": 455.1344299316406, + "learning_rate": 1.1073278890985973e-07, + "loss": 42.3968, + "step": 242460 + }, + { + "epoch": 0.9796094813689564, + "grad_norm": 886.1163940429688, + "learning_rate": 1.1043953319948897e-07, + "loss": 50.71, + "step": 242470 + }, + { + "epoch": 0.9796498826343241, + "grad_norm": 648.0940551757812, + "learning_rate": 1.1014666524633788e-07, + "loss": 86.5985, + "step": 242480 + }, + { + "epoch": 0.9796902838996917, + "grad_norm": 511.8902282714844, + "learning_rate": 1.0985418505611966e-07, + "loss": 40.5249, + "step": 242490 + }, + { + "epoch": 0.9797306851650593, + "grad_norm": 791.1116333007812, + "learning_rate": 1.0956209263453421e-07, + "loss": 66.8648, + "step": 242500 + }, + { + "epoch": 0.979771086430427, + "grad_norm": 882.7883911132812, + "learning_rate": 1.0927038798727474e-07, + "loss": 77.4512, + "step": 242510 + }, + { + "epoch": 0.9798114876957946, + "grad_norm": 822.940185546875, + "learning_rate": 1.0897907112003003e-07, + "loss": 107.5495, + "step": 242520 + }, + { + "epoch": 0.9798518889611623, + "grad_norm": 649.629150390625, + "learning_rate": 1.0868814203847777e-07, + "loss": 97.3643, + "step": 242530 + }, + { + "epoch": 0.9798922902265299, + "grad_norm": 906.0595092773438, + "learning_rate": 1.0839760074829342e-07, + "loss": 91.9293, + "step": 242540 + }, + { + "epoch": 0.9799326914918975, + "grad_norm": 898.42138671875, + "learning_rate": 1.0810744725513467e-07, + "loss": 81.1443, + "step": 242550 + }, + { + "epoch": 0.9799730927572652, + "grad_norm": 1379.10546875, + "learning_rate": 1.0781768156466587e-07, + "loss": 69.7558, + "step": 242560 + }, + { + "epoch": 0.9800134940226328, + "grad_norm": 689.7366333007812, + "learning_rate": 1.0752830368253142e-07, + "loss": 85.3837, + "step": 242570 + }, + { + "epoch": 0.9800538952880005, + "grad_norm": 800.4497680664062, + "learning_rate": 1.0723931361437345e-07, + "loss": 59.6612, + "step": 242580 + }, + { + "epoch": 0.9800942965533681, + "grad_norm": 1398.5048828125, + "learning_rate": 1.0695071136582747e-07, + "loss": 67.893, + "step": 242590 + }, + { + "epoch": 0.9801346978187356, + "grad_norm": 672.5675659179688, + "learning_rate": 1.0666249694251785e-07, + "loss": 63.0337, + "step": 242600 + }, + { + "epoch": 0.9801750990841033, + "grad_norm": 727.41650390625, + "learning_rate": 1.0637467035006677e-07, + "loss": 60.7368, + "step": 242610 + }, + { + "epoch": 0.9802155003494709, + "grad_norm": 494.944580078125, + "learning_rate": 1.0608723159408086e-07, + "loss": 58.3254, + "step": 242620 + }, + { + "epoch": 0.9802559016148386, + "grad_norm": 419.864013671875, + "learning_rate": 1.0580018068016895e-07, + "loss": 58.1856, + "step": 242630 + }, + { + "epoch": 0.9802963028802062, + "grad_norm": 2405.710693359375, + "learning_rate": 1.0551351761392214e-07, + "loss": 99.0624, + "step": 242640 + }, + { + "epoch": 0.9803367041455738, + "grad_norm": 625.212646484375, + "learning_rate": 1.0522724240093374e-07, + "loss": 52.1995, + "step": 242650 + }, + { + "epoch": 0.9803771054109415, + "grad_norm": 813.3497314453125, + "learning_rate": 1.0494135504678149e-07, + "loss": 71.0707, + "step": 242660 + }, + { + "epoch": 0.9804175066763091, + "grad_norm": 958.5562133789062, + "learning_rate": 1.0465585555704095e-07, + "loss": 67.2548, + "step": 242670 + }, + { + "epoch": 0.9804579079416768, + "grad_norm": 859.7366943359375, + "learning_rate": 1.0437074393727654e-07, + "loss": 62.3575, + "step": 242680 + }, + { + "epoch": 0.9804983092070444, + "grad_norm": 1028.4019775390625, + "learning_rate": 1.0408602019304825e-07, + "loss": 89.929, + "step": 242690 + }, + { + "epoch": 0.980538710472412, + "grad_norm": 613.0112915039062, + "learning_rate": 1.0380168432990722e-07, + "loss": 54.5782, + "step": 242700 + }, + { + "epoch": 0.9805791117377797, + "grad_norm": 425.4281005859375, + "learning_rate": 1.0351773635339346e-07, + "loss": 62.3901, + "step": 242710 + }, + { + "epoch": 0.9806195130031473, + "grad_norm": 516.1365966796875, + "learning_rate": 1.0323417626904697e-07, + "loss": 79.7791, + "step": 242720 + }, + { + "epoch": 0.9806599142685148, + "grad_norm": 649.6248779296875, + "learning_rate": 1.0295100408239223e-07, + "loss": 41.8165, + "step": 242730 + }, + { + "epoch": 0.9807003155338825, + "grad_norm": 725.009765625, + "learning_rate": 1.0266821979894926e-07, + "loss": 37.7587, + "step": 242740 + }, + { + "epoch": 0.9807407167992501, + "grad_norm": 720.6854858398438, + "learning_rate": 1.0238582342423586e-07, + "loss": 67.5808, + "step": 242750 + }, + { + "epoch": 0.9807811180646178, + "grad_norm": 894.71484375, + "learning_rate": 1.0210381496375432e-07, + "loss": 60.6433, + "step": 242760 + }, + { + "epoch": 0.9808215193299854, + "grad_norm": 838.4900512695312, + "learning_rate": 1.0182219442300467e-07, + "loss": 71.3421, + "step": 242770 + }, + { + "epoch": 0.980861920595353, + "grad_norm": 682.9637451171875, + "learning_rate": 1.0154096180747141e-07, + "loss": 77.4956, + "step": 242780 + }, + { + "epoch": 0.9809023218607207, + "grad_norm": 591.2731323242188, + "learning_rate": 1.0126011712264572e-07, + "loss": 52.899, + "step": 242790 + }, + { + "epoch": 0.9809427231260883, + "grad_norm": 476.27301025390625, + "learning_rate": 1.0097966037399654e-07, + "loss": 84.791, + "step": 242800 + }, + { + "epoch": 0.980983124391456, + "grad_norm": 659.4987182617188, + "learning_rate": 1.0069959156699505e-07, + "loss": 57.6696, + "step": 242810 + }, + { + "epoch": 0.9810235256568236, + "grad_norm": 401.3479309082031, + "learning_rate": 1.0041991070709911e-07, + "loss": 64.1531, + "step": 242820 + }, + { + "epoch": 0.9810639269221912, + "grad_norm": 643.1160278320312, + "learning_rate": 1.0014061779976214e-07, + "loss": 114.3007, + "step": 242830 + }, + { + "epoch": 0.9811043281875589, + "grad_norm": 459.0188293457031, + "learning_rate": 9.986171285042867e-08, + "loss": 49.277, + "step": 242840 + }, + { + "epoch": 0.9811447294529264, + "grad_norm": 866.7553100585938, + "learning_rate": 9.958319586453436e-08, + "loss": 92.8686, + "step": 242850 + }, + { + "epoch": 0.981185130718294, + "grad_norm": 439.1510925292969, + "learning_rate": 9.930506684751485e-08, + "loss": 36.5999, + "step": 242860 + }, + { + "epoch": 0.9812255319836617, + "grad_norm": 940.337646484375, + "learning_rate": 9.902732580478581e-08, + "loss": 87.6294, + "step": 242870 + }, + { + "epoch": 0.9812659332490293, + "grad_norm": 642.0379638671875, + "learning_rate": 9.874997274176734e-08, + "loss": 73.3465, + "step": 242880 + }, + { + "epoch": 0.981306334514397, + "grad_norm": 419.84979248046875, + "learning_rate": 9.84730076638596e-08, + "loss": 62.6954, + "step": 242890 + }, + { + "epoch": 0.9813467357797646, + "grad_norm": 942.747314453125, + "learning_rate": 9.819643057647155e-08, + "loss": 63.008, + "step": 242900 + }, + { + "epoch": 0.9813871370451323, + "grad_norm": 998.9188232421875, + "learning_rate": 9.79202414849878e-08, + "loss": 69.1018, + "step": 242910 + }, + { + "epoch": 0.9814275383104999, + "grad_norm": 901.4616088867188, + "learning_rate": 9.764444039479515e-08, + "loss": 77.2129, + "step": 242920 + }, + { + "epoch": 0.9814679395758675, + "grad_norm": 1165.6771240234375, + "learning_rate": 9.736902731127151e-08, + "loss": 68.2475, + "step": 242930 + }, + { + "epoch": 0.9815083408412352, + "grad_norm": 568.0184326171875, + "learning_rate": 9.709400223978371e-08, + "loss": 110.0545, + "step": 242940 + }, + { + "epoch": 0.9815487421066028, + "grad_norm": 676.884033203125, + "learning_rate": 9.681936518569635e-08, + "loss": 48.984, + "step": 242950 + }, + { + "epoch": 0.9815891433719705, + "grad_norm": 1078.5535888671875, + "learning_rate": 9.65451161543629e-08, + "loss": 59.8154, + "step": 242960 + }, + { + "epoch": 0.9816295446373381, + "grad_norm": 759.2507934570312, + "learning_rate": 9.6271255151128e-08, + "loss": 62.6671, + "step": 242970 + }, + { + "epoch": 0.9816699459027056, + "grad_norm": 389.92205810546875, + "learning_rate": 9.599778218133182e-08, + "loss": 53.117, + "step": 242980 + }, + { + "epoch": 0.9817103471680733, + "grad_norm": 2118.018798828125, + "learning_rate": 9.572469725030786e-08, + "loss": 50.8676, + "step": 242990 + }, + { + "epoch": 0.9817507484334409, + "grad_norm": 727.9429321289062, + "learning_rate": 9.545200036337631e-08, + "loss": 62.7592, + "step": 243000 + }, + { + "epoch": 0.9817911496988085, + "grad_norm": 600.7180786132812, + "learning_rate": 9.517969152585738e-08, + "loss": 62.7623, + "step": 243010 + }, + { + "epoch": 0.9818315509641762, + "grad_norm": 676.1437377929688, + "learning_rate": 9.49077707430579e-08, + "loss": 72.0344, + "step": 243020 + }, + { + "epoch": 0.9818719522295438, + "grad_norm": 1271.2333984375, + "learning_rate": 9.46362380202781e-08, + "loss": 103.3745, + "step": 243030 + }, + { + "epoch": 0.9819123534949115, + "grad_norm": 705.3414916992188, + "learning_rate": 9.436509336281374e-08, + "loss": 48.8186, + "step": 243040 + }, + { + "epoch": 0.9819527547602791, + "grad_norm": 1205.256591796875, + "learning_rate": 9.409433677594727e-08, + "loss": 74.455, + "step": 243050 + }, + { + "epoch": 0.9819931560256467, + "grad_norm": 467.0628662109375, + "learning_rate": 9.382396826496331e-08, + "loss": 85.3956, + "step": 243060 + }, + { + "epoch": 0.9820335572910144, + "grad_norm": 584.787841796875, + "learning_rate": 9.355398783512881e-08, + "loss": 88.4715, + "step": 243070 + }, + { + "epoch": 0.982073958556382, + "grad_norm": 863.6755981445312, + "learning_rate": 9.328439549170843e-08, + "loss": 87.6742, + "step": 243080 + }, + { + "epoch": 0.9821143598217497, + "grad_norm": 684.5046997070312, + "learning_rate": 9.301519123995573e-08, + "loss": 61.5067, + "step": 243090 + }, + { + "epoch": 0.9821547610871173, + "grad_norm": 686.7987060546875, + "learning_rate": 9.274637508512207e-08, + "loss": 57.5942, + "step": 243100 + }, + { + "epoch": 0.9821951623524848, + "grad_norm": 797.9330444335938, + "learning_rate": 9.24779470324455e-08, + "loss": 49.8196, + "step": 243110 + }, + { + "epoch": 0.9822355636178525, + "grad_norm": 842.2488403320312, + "learning_rate": 9.220990708716182e-08, + "loss": 63.9506, + "step": 243120 + }, + { + "epoch": 0.9822759648832201, + "grad_norm": 654.0978393554688, + "learning_rate": 9.194225525449351e-08, + "loss": 52.0281, + "step": 243130 + }, + { + "epoch": 0.9823163661485877, + "grad_norm": 919.1511840820312, + "learning_rate": 9.167499153966086e-08, + "loss": 52.8014, + "step": 243140 + }, + { + "epoch": 0.9823567674139554, + "grad_norm": 1400.91552734375, + "learning_rate": 9.140811594787524e-08, + "loss": 73.651, + "step": 243150 + }, + { + "epoch": 0.982397168679323, + "grad_norm": 795.4043579101562, + "learning_rate": 9.114162848433694e-08, + "loss": 71.1419, + "step": 243160 + }, + { + "epoch": 0.9824375699446907, + "grad_norm": 666.7384643554688, + "learning_rate": 9.08755291542418e-08, + "loss": 76.2871, + "step": 243170 + }, + { + "epoch": 0.9824779712100583, + "grad_norm": 803.645263671875, + "learning_rate": 9.060981796277901e-08, + "loss": 96.4452, + "step": 243180 + }, + { + "epoch": 0.982518372475426, + "grad_norm": 1427.246826171875, + "learning_rate": 9.034449491512665e-08, + "loss": 63.2939, + "step": 243190 + }, + { + "epoch": 0.9825587737407936, + "grad_norm": 385.2389831542969, + "learning_rate": 9.007956001646057e-08, + "loss": 67.5535, + "step": 243200 + }, + { + "epoch": 0.9825991750061612, + "grad_norm": 763.7426147460938, + "learning_rate": 8.981501327194109e-08, + "loss": 45.8859, + "step": 243210 + }, + { + "epoch": 0.9826395762715289, + "grad_norm": 1280.779296875, + "learning_rate": 8.955085468673075e-08, + "loss": 100.7838, + "step": 243220 + }, + { + "epoch": 0.9826799775368965, + "grad_norm": 443.2326354980469, + "learning_rate": 8.928708426597655e-08, + "loss": 67.6591, + "step": 243230 + }, + { + "epoch": 0.982720378802264, + "grad_norm": 389.4408264160156, + "learning_rate": 8.902370201482102e-08, + "loss": 60.1805, + "step": 243240 + }, + { + "epoch": 0.9827607800676317, + "grad_norm": 383.8136901855469, + "learning_rate": 8.876070793840008e-08, + "loss": 58.9468, + "step": 243250 + }, + { + "epoch": 0.9828011813329993, + "grad_norm": 458.1418762207031, + "learning_rate": 8.84981020418385e-08, + "loss": 58.7392, + "step": 243260 + }, + { + "epoch": 0.982841582598367, + "grad_norm": 400.0176086425781, + "learning_rate": 8.823588433025887e-08, + "loss": 58.368, + "step": 243270 + }, + { + "epoch": 0.9828819838637346, + "grad_norm": 479.36785888671875, + "learning_rate": 8.797405480877263e-08, + "loss": 56.4461, + "step": 243280 + }, + { + "epoch": 0.9829223851291022, + "grad_norm": 497.1054992675781, + "learning_rate": 8.771261348248239e-08, + "loss": 60.5835, + "step": 243290 + }, + { + "epoch": 0.9829627863944699, + "grad_norm": 925.6166381835938, + "learning_rate": 8.745156035648627e-08, + "loss": 90.6087, + "step": 243300 + }, + { + "epoch": 0.9830031876598375, + "grad_norm": 178.6003875732422, + "learning_rate": 8.719089543587355e-08, + "loss": 37.2681, + "step": 243310 + }, + { + "epoch": 0.9830435889252052, + "grad_norm": 1146.905029296875, + "learning_rate": 8.693061872572683e-08, + "loss": 84.9958, + "step": 243320 + }, + { + "epoch": 0.9830839901905728, + "grad_norm": 595.4630737304688, + "learning_rate": 8.667073023111983e-08, + "loss": 55.6841, + "step": 243330 + }, + { + "epoch": 0.9831243914559404, + "grad_norm": 864.3109741210938, + "learning_rate": 8.641122995711737e-08, + "loss": 39.4846, + "step": 243340 + }, + { + "epoch": 0.9831647927213081, + "grad_norm": 516.200927734375, + "learning_rate": 8.615211790878209e-08, + "loss": 62.8645, + "step": 243350 + }, + { + "epoch": 0.9832051939866756, + "grad_norm": 637.9439086914062, + "learning_rate": 8.589339409116105e-08, + "loss": 49.8689, + "step": 243360 + }, + { + "epoch": 0.9832455952520432, + "grad_norm": 631.1906127929688, + "learning_rate": 8.563505850930353e-08, + "loss": 66.464, + "step": 243370 + }, + { + "epoch": 0.9832859965174109, + "grad_norm": 393.0920715332031, + "learning_rate": 8.53771111682411e-08, + "loss": 48.3089, + "step": 243380 + }, + { + "epoch": 0.9833263977827785, + "grad_norm": 548.7828369140625, + "learning_rate": 8.511955207300527e-08, + "loss": 73.801, + "step": 243390 + }, + { + "epoch": 0.9833667990481462, + "grad_norm": 512.3258056640625, + "learning_rate": 8.486238122861867e-08, + "loss": 66.8518, + "step": 243400 + }, + { + "epoch": 0.9834072003135138, + "grad_norm": 1109.8782958984375, + "learning_rate": 8.460559864009066e-08, + "loss": 57.6219, + "step": 243410 + }, + { + "epoch": 0.9834476015788814, + "grad_norm": 450.7321472167969, + "learning_rate": 8.434920431243055e-08, + "loss": 81.1311, + "step": 243420 + }, + { + "epoch": 0.9834880028442491, + "grad_norm": 716.6806640625, + "learning_rate": 8.409319825063434e-08, + "loss": 61.5604, + "step": 243430 + }, + { + "epoch": 0.9835284041096167, + "grad_norm": 692.5364379882812, + "learning_rate": 8.383758045969582e-08, + "loss": 64.0649, + "step": 243440 + }, + { + "epoch": 0.9835688053749844, + "grad_norm": 543.8341064453125, + "learning_rate": 8.358235094459766e-08, + "loss": 57.1335, + "step": 243450 + }, + { + "epoch": 0.983609206640352, + "grad_norm": 758.9132080078125, + "learning_rate": 8.33275097103159e-08, + "loss": 56.6338, + "step": 243460 + }, + { + "epoch": 0.9836496079057196, + "grad_norm": 2274.101806640625, + "learning_rate": 8.307305676181543e-08, + "loss": 69.841, + "step": 243470 + }, + { + "epoch": 0.9836900091710873, + "grad_norm": 499.8296813964844, + "learning_rate": 8.281899210406119e-08, + "loss": 44.5148, + "step": 243480 + }, + { + "epoch": 0.9837304104364548, + "grad_norm": 1135.8594970703125, + "learning_rate": 8.256531574200699e-08, + "loss": 94.7456, + "step": 243490 + }, + { + "epoch": 0.9837708117018225, + "grad_norm": 911.710693359375, + "learning_rate": 8.231202768059332e-08, + "loss": 75.2026, + "step": 243500 + }, + { + "epoch": 0.9838112129671901, + "grad_norm": 1513.3843994140625, + "learning_rate": 8.205912792476068e-08, + "loss": 84.2203, + "step": 243510 + }, + { + "epoch": 0.9838516142325577, + "grad_norm": 476.7558898925781, + "learning_rate": 8.180661647944066e-08, + "loss": 62.9492, + "step": 243520 + }, + { + "epoch": 0.9838920154979254, + "grad_norm": 852.5686645507812, + "learning_rate": 8.155449334955601e-08, + "loss": 69.8845, + "step": 243530 + }, + { + "epoch": 0.983932416763293, + "grad_norm": 1797.7587890625, + "learning_rate": 8.130275854002057e-08, + "loss": 67.4081, + "step": 243540 + }, + { + "epoch": 0.9839728180286607, + "grad_norm": 456.5500183105469, + "learning_rate": 8.105141205574152e-08, + "loss": 48.7388, + "step": 243550 + }, + { + "epoch": 0.9840132192940283, + "grad_norm": 569.3279418945312, + "learning_rate": 8.080045390162383e-08, + "loss": 48.9876, + "step": 243560 + }, + { + "epoch": 0.9840536205593959, + "grad_norm": 618.7938842773438, + "learning_rate": 8.054988408255249e-08, + "loss": 70.7278, + "step": 243570 + }, + { + "epoch": 0.9840940218247636, + "grad_norm": 538.7880859375, + "learning_rate": 8.02997026034169e-08, + "loss": 67.8635, + "step": 243580 + }, + { + "epoch": 0.9841344230901312, + "grad_norm": 313.9493408203125, + "learning_rate": 8.004990946909542e-08, + "loss": 52.4206, + "step": 243590 + }, + { + "epoch": 0.9841748243554989, + "grad_norm": 555.1470336914062, + "learning_rate": 7.980050468445744e-08, + "loss": 58.6381, + "step": 243600 + }, + { + "epoch": 0.9842152256208665, + "grad_norm": 599.9049682617188, + "learning_rate": 7.955148825436133e-08, + "loss": 71.1181, + "step": 243610 + }, + { + "epoch": 0.984255626886234, + "grad_norm": 258.2479553222656, + "learning_rate": 7.930286018366762e-08, + "loss": 63.8481, + "step": 243620 + }, + { + "epoch": 0.9842960281516017, + "grad_norm": 685.8587646484375, + "learning_rate": 7.905462047721691e-08, + "loss": 58.6644, + "step": 243630 + }, + { + "epoch": 0.9843364294169693, + "grad_norm": 1264.7772216796875, + "learning_rate": 7.880676913985419e-08, + "loss": 90.2695, + "step": 243640 + }, + { + "epoch": 0.984376830682337, + "grad_norm": 83.77970123291016, + "learning_rate": 7.855930617641116e-08, + "loss": 66.6998, + "step": 243650 + }, + { + "epoch": 0.9844172319477046, + "grad_norm": 785.5528564453125, + "learning_rate": 7.831223159170842e-08, + "loss": 57.8648, + "step": 243660 + }, + { + "epoch": 0.9844576332130722, + "grad_norm": 453.8236999511719, + "learning_rate": 7.806554539056654e-08, + "loss": 70.9954, + "step": 243670 + }, + { + "epoch": 0.9844980344784399, + "grad_norm": 383.247314453125, + "learning_rate": 7.78192475777928e-08, + "loss": 58.2965, + "step": 243680 + }, + { + "epoch": 0.9845384357438075, + "grad_norm": 884.933837890625, + "learning_rate": 7.757333815819002e-08, + "loss": 70.9535, + "step": 243690 + }, + { + "epoch": 0.9845788370091751, + "grad_norm": 495.12286376953125, + "learning_rate": 7.73278171365499e-08, + "loss": 63.3251, + "step": 243700 + }, + { + "epoch": 0.9846192382745428, + "grad_norm": 772.5108642578125, + "learning_rate": 7.708268451766198e-08, + "loss": 52.3488, + "step": 243710 + }, + { + "epoch": 0.9846596395399104, + "grad_norm": 594.0120239257812, + "learning_rate": 7.683794030630687e-08, + "loss": 56.1041, + "step": 243720 + }, + { + "epoch": 0.9847000408052781, + "grad_norm": 1154.523681640625, + "learning_rate": 7.659358450724963e-08, + "loss": 75.9384, + "step": 243730 + }, + { + "epoch": 0.9847404420706457, + "grad_norm": 361.6215515136719, + "learning_rate": 7.63496171252598e-08, + "loss": 44.1774, + "step": 243740 + }, + { + "epoch": 0.9847808433360132, + "grad_norm": 789.0961303710938, + "learning_rate": 7.61060381650891e-08, + "loss": 49.8357, + "step": 243750 + }, + { + "epoch": 0.9848212446013809, + "grad_norm": 905.8184204101562, + "learning_rate": 7.586284763149154e-08, + "loss": 53.7246, + "step": 243760 + }, + { + "epoch": 0.9848616458667485, + "grad_norm": 1172.7508544921875, + "learning_rate": 7.562004552920332e-08, + "loss": 97.4768, + "step": 243770 + }, + { + "epoch": 0.9849020471321162, + "grad_norm": 1009.7659301757812, + "learning_rate": 7.537763186296066e-08, + "loss": 59.2314, + "step": 243780 + }, + { + "epoch": 0.9849424483974838, + "grad_norm": 722.6148681640625, + "learning_rate": 7.513560663748865e-08, + "loss": 82.0689, + "step": 243790 + }, + { + "epoch": 0.9849828496628514, + "grad_norm": 1352.7979736328125, + "learning_rate": 7.489396985750797e-08, + "loss": 59.404, + "step": 243800 + }, + { + "epoch": 0.9850232509282191, + "grad_norm": 556.8558959960938, + "learning_rate": 7.465272152772595e-08, + "loss": 78.4345, + "step": 243810 + }, + { + "epoch": 0.9850636521935867, + "grad_norm": 821.261474609375, + "learning_rate": 7.441186165284553e-08, + "loss": 54.2153, + "step": 243820 + }, + { + "epoch": 0.9851040534589544, + "grad_norm": 1048.9874267578125, + "learning_rate": 7.417139023756737e-08, + "loss": 68.843, + "step": 243830 + }, + { + "epoch": 0.985144454724322, + "grad_norm": 1082.3206787109375, + "learning_rate": 7.393130728657438e-08, + "loss": 108.0025, + "step": 243840 + }, + { + "epoch": 0.9851848559896896, + "grad_norm": 373.7929382324219, + "learning_rate": 7.369161280455172e-08, + "loss": 50.8421, + "step": 243850 + }, + { + "epoch": 0.9852252572550573, + "grad_norm": 450.90106201171875, + "learning_rate": 7.345230679616678e-08, + "loss": 67.6588, + "step": 243860 + }, + { + "epoch": 0.9852656585204249, + "grad_norm": 1049.5577392578125, + "learning_rate": 7.321338926608912e-08, + "loss": 74.0674, + "step": 243870 + }, + { + "epoch": 0.9853060597857924, + "grad_norm": 685.7279663085938, + "learning_rate": 7.297486021897726e-08, + "loss": 55.1783, + "step": 243880 + }, + { + "epoch": 0.9853464610511601, + "grad_norm": 360.4382019042969, + "learning_rate": 7.273671965947637e-08, + "loss": 43.5608, + "step": 243890 + }, + { + "epoch": 0.9853868623165277, + "grad_norm": 577.0673828125, + "learning_rate": 7.249896759223385e-08, + "loss": 47.1105, + "step": 243900 + }, + { + "epoch": 0.9854272635818954, + "grad_norm": 590.2833251953125, + "learning_rate": 7.226160402188375e-08, + "loss": 56.0085, + "step": 243910 + }, + { + "epoch": 0.985467664847263, + "grad_norm": 811.0994262695312, + "learning_rate": 7.20246289530535e-08, + "loss": 71.1872, + "step": 243920 + }, + { + "epoch": 0.9855080661126306, + "grad_norm": 657.19482421875, + "learning_rate": 7.178804239036163e-08, + "loss": 42.1817, + "step": 243930 + }, + { + "epoch": 0.9855484673779983, + "grad_norm": 2494.02001953125, + "learning_rate": 7.155184433842221e-08, + "loss": 95.3707, + "step": 243940 + }, + { + "epoch": 0.9855888686433659, + "grad_norm": 974.5989379882812, + "learning_rate": 7.131603480184046e-08, + "loss": 77.3227, + "step": 243950 + }, + { + "epoch": 0.9856292699087336, + "grad_norm": 337.721435546875, + "learning_rate": 7.108061378521047e-08, + "loss": 66.9099, + "step": 243960 + }, + { + "epoch": 0.9856696711741012, + "grad_norm": 679.7593383789062, + "learning_rate": 7.084558129312635e-08, + "loss": 54.3733, + "step": 243970 + }, + { + "epoch": 0.9857100724394688, + "grad_norm": 683.6921997070312, + "learning_rate": 7.061093733016444e-08, + "loss": 38.8053, + "step": 243980 + }, + { + "epoch": 0.9857504737048365, + "grad_norm": 1368.9061279296875, + "learning_rate": 7.037668190090552e-08, + "loss": 92.6351, + "step": 243990 + }, + { + "epoch": 0.985790874970204, + "grad_norm": 548.3333129882812, + "learning_rate": 7.01428150099126e-08, + "loss": 74.8858, + "step": 244000 + }, + { + "epoch": 0.9858312762355717, + "grad_norm": 534.1749877929688, + "learning_rate": 6.990933666174648e-08, + "loss": 75.2679, + "step": 244010 + }, + { + "epoch": 0.9858716775009393, + "grad_norm": 891.8532104492188, + "learning_rate": 6.967624686095908e-08, + "loss": 73.7884, + "step": 244020 + }, + { + "epoch": 0.9859120787663069, + "grad_norm": 88.4382095336914, + "learning_rate": 6.944354561209343e-08, + "loss": 37.3789, + "step": 244030 + }, + { + "epoch": 0.9859524800316746, + "grad_norm": 373.4239501953125, + "learning_rate": 6.92112329196859e-08, + "loss": 44.212, + "step": 244040 + }, + { + "epoch": 0.9859928812970422, + "grad_norm": 506.9051818847656, + "learning_rate": 6.897930878826841e-08, + "loss": 47.5511, + "step": 244050 + }, + { + "epoch": 0.9860332825624099, + "grad_norm": 719.2451782226562, + "learning_rate": 6.87477732223596e-08, + "loss": 84.1193, + "step": 244060 + }, + { + "epoch": 0.9860736838277775, + "grad_norm": 622.3131103515625, + "learning_rate": 6.851662622647359e-08, + "loss": 61.6768, + "step": 244070 + }, + { + "epoch": 0.9861140850931451, + "grad_norm": 1445.61669921875, + "learning_rate": 6.828586780511792e-08, + "loss": 65.1435, + "step": 244080 + }, + { + "epoch": 0.9861544863585128, + "grad_norm": 733.7633666992188, + "learning_rate": 6.805549796279121e-08, + "loss": 105.7263, + "step": 244090 + }, + { + "epoch": 0.9861948876238804, + "grad_norm": 304.1141052246094, + "learning_rate": 6.782551670398318e-08, + "loss": 44.5668, + "step": 244100 + }, + { + "epoch": 0.986235288889248, + "grad_norm": 1439.838623046875, + "learning_rate": 6.759592403317916e-08, + "loss": 57.4498, + "step": 244110 + }, + { + "epoch": 0.9862756901546157, + "grad_norm": 444.5619812011719, + "learning_rate": 6.736671995485333e-08, + "loss": 73.7206, + "step": 244120 + }, + { + "epoch": 0.9863160914199832, + "grad_norm": 773.2749633789062, + "learning_rate": 6.713790447347768e-08, + "loss": 81.6021, + "step": 244130 + }, + { + "epoch": 0.9863564926853509, + "grad_norm": 579.6310424804688, + "learning_rate": 6.690947759350863e-08, + "loss": 53.1077, + "step": 244140 + }, + { + "epoch": 0.9863968939507185, + "grad_norm": 733.1024780273438, + "learning_rate": 6.668143931940263e-08, + "loss": 89.6397, + "step": 244150 + }, + { + "epoch": 0.9864372952160861, + "grad_norm": 565.4549560546875, + "learning_rate": 6.645378965560278e-08, + "loss": 48.0025, + "step": 244160 + }, + { + "epoch": 0.9864776964814538, + "grad_norm": 498.0528259277344, + "learning_rate": 6.62265286065522e-08, + "loss": 52.4168, + "step": 244170 + }, + { + "epoch": 0.9865180977468214, + "grad_norm": 442.1097717285156, + "learning_rate": 6.599965617667403e-08, + "loss": 52.7895, + "step": 244180 + }, + { + "epoch": 0.9865584990121891, + "grad_norm": 1165.5400390625, + "learning_rate": 6.577317237039804e-08, + "loss": 63.3795, + "step": 244190 + }, + { + "epoch": 0.9865989002775567, + "grad_norm": 1350.6068115234375, + "learning_rate": 6.554707719213627e-08, + "loss": 70.7648, + "step": 244200 + }, + { + "epoch": 0.9866393015429243, + "grad_norm": 445.1037902832031, + "learning_rate": 6.532137064629629e-08, + "loss": 33.9495, + "step": 244210 + }, + { + "epoch": 0.986679702808292, + "grad_norm": 1126.8193359375, + "learning_rate": 6.509605273727903e-08, + "loss": 71.1502, + "step": 244220 + }, + { + "epoch": 0.9867201040736596, + "grad_norm": 830.3601684570312, + "learning_rate": 6.487112346947877e-08, + "loss": 47.5324, + "step": 244230 + }, + { + "epoch": 0.9867605053390273, + "grad_norm": 3858.345703125, + "learning_rate": 6.464658284727866e-08, + "loss": 99.3528, + "step": 244240 + }, + { + "epoch": 0.9868009066043949, + "grad_norm": 1037.9718017578125, + "learning_rate": 6.44224308750574e-08, + "loss": 89.1554, + "step": 244250 + }, + { + "epoch": 0.9868413078697624, + "grad_norm": 603.5880737304688, + "learning_rate": 6.419866755718263e-08, + "loss": 59.092, + "step": 244260 + }, + { + "epoch": 0.9868817091351301, + "grad_norm": 786.616943359375, + "learning_rate": 6.397529289801974e-08, + "loss": 83.1273, + "step": 244270 + }, + { + "epoch": 0.9869221104004977, + "grad_norm": 448.66424560546875, + "learning_rate": 6.375230690192302e-08, + "loss": 62.6173, + "step": 244280 + }, + { + "epoch": 0.9869625116658653, + "grad_norm": 524.8383178710938, + "learning_rate": 6.35297095732379e-08, + "loss": 42.9701, + "step": 244290 + }, + { + "epoch": 0.987002912931233, + "grad_norm": 1696.2509765625, + "learning_rate": 6.330750091630533e-08, + "loss": 53.6779, + "step": 244300 + }, + { + "epoch": 0.9870433141966006, + "grad_norm": 709.8682861328125, + "learning_rate": 6.308568093545742e-08, + "loss": 72.9671, + "step": 244310 + }, + { + "epoch": 0.9870837154619683, + "grad_norm": 553.858154296875, + "learning_rate": 6.286424963501736e-08, + "loss": 62.381, + "step": 244320 + }, + { + "epoch": 0.9871241167273359, + "grad_norm": 952.5428466796875, + "learning_rate": 6.264320701930393e-08, + "loss": 40.5266, + "step": 244330 + }, + { + "epoch": 0.9871645179927035, + "grad_norm": 792.9031372070312, + "learning_rate": 6.24225530926248e-08, + "loss": 71.8348, + "step": 244340 + }, + { + "epoch": 0.9872049192580712, + "grad_norm": 486.25311279296875, + "learning_rate": 6.220228785928317e-08, + "loss": 76.8299, + "step": 244350 + }, + { + "epoch": 0.9872453205234388, + "grad_norm": 317.6905212402344, + "learning_rate": 6.198241132357342e-08, + "loss": 56.8437, + "step": 244360 + }, + { + "epoch": 0.9872857217888065, + "grad_norm": 490.3849792480469, + "learning_rate": 6.176292348978097e-08, + "loss": 114.248, + "step": 244370 + }, + { + "epoch": 0.9873261230541741, + "grad_norm": 1924.96435546875, + "learning_rate": 6.154382436218464e-08, + "loss": 75.2473, + "step": 244380 + }, + { + "epoch": 0.9873665243195416, + "grad_norm": 417.4039306640625, + "learning_rate": 6.132511394505658e-08, + "loss": 44.6136, + "step": 244390 + }, + { + "epoch": 0.9874069255849093, + "grad_norm": 706.2890014648438, + "learning_rate": 6.110679224266003e-08, + "loss": 70.9342, + "step": 244400 + }, + { + "epoch": 0.9874473268502769, + "grad_norm": 718.73583984375, + "learning_rate": 6.088885925925159e-08, + "loss": 65.3186, + "step": 244410 + }, + { + "epoch": 0.9874877281156446, + "grad_norm": 472.24932861328125, + "learning_rate": 6.067131499908341e-08, + "loss": 61.8294, + "step": 244420 + }, + { + "epoch": 0.9875281293810122, + "grad_norm": 465.06866455078125, + "learning_rate": 6.045415946638988e-08, + "loss": 103.9113, + "step": 244430 + }, + { + "epoch": 0.9875685306463798, + "grad_norm": 421.2804870605469, + "learning_rate": 6.023739266540984e-08, + "loss": 57.386, + "step": 244440 + }, + { + "epoch": 0.9876089319117475, + "grad_norm": 874.5800170898438, + "learning_rate": 6.002101460036436e-08, + "loss": 68.0332, + "step": 244450 + }, + { + "epoch": 0.9876493331771151, + "grad_norm": 724.2532958984375, + "learning_rate": 5.980502527547893e-08, + "loss": 53.6597, + "step": 244460 + }, + { + "epoch": 0.9876897344424828, + "grad_norm": 366.66082763671875, + "learning_rate": 5.9589424694959095e-08, + "loss": 55.7274, + "step": 244470 + }, + { + "epoch": 0.9877301357078504, + "grad_norm": 489.0762634277344, + "learning_rate": 5.937421286300815e-08, + "loss": 36.7016, + "step": 244480 + }, + { + "epoch": 0.987770536973218, + "grad_norm": 414.38580322265625, + "learning_rate": 5.915938978382496e-08, + "loss": 77.5991, + "step": 244490 + }, + { + "epoch": 0.9878109382385857, + "grad_norm": 862.5131225585938, + "learning_rate": 5.894495546159506e-08, + "loss": 69.9306, + "step": 244500 + }, + { + "epoch": 0.9878513395039533, + "grad_norm": 476.9122619628906, + "learning_rate": 5.8730909900499565e-08, + "loss": 60.0749, + "step": 244510 + }, + { + "epoch": 0.9878917407693208, + "grad_norm": 1011.7698364257812, + "learning_rate": 5.851725310471068e-08, + "loss": 61.0872, + "step": 244520 + }, + { + "epoch": 0.9879321420346885, + "grad_norm": 889.07470703125, + "learning_rate": 5.8303985078396184e-08, + "loss": 64.9482, + "step": 244530 + }, + { + "epoch": 0.9879725433000561, + "grad_norm": 547.5029907226562, + "learning_rate": 5.809110582571054e-08, + "loss": 57.1066, + "step": 244540 + }, + { + "epoch": 0.9880129445654238, + "grad_norm": 529.1973876953125, + "learning_rate": 5.7878615350805966e-08, + "loss": 52.0022, + "step": 244550 + }, + { + "epoch": 0.9880533458307914, + "grad_norm": 251.06863403320312, + "learning_rate": 5.766651365782583e-08, + "loss": 42.1516, + "step": 244560 + }, + { + "epoch": 0.988093747096159, + "grad_norm": 194.66372680664062, + "learning_rate": 5.7454800750902374e-08, + "loss": 33.3899, + "step": 244570 + }, + { + "epoch": 0.9881341483615267, + "grad_norm": 661.5155029296875, + "learning_rate": 5.724347663416563e-08, + "loss": 50.6372, + "step": 244580 + }, + { + "epoch": 0.9881745496268943, + "grad_norm": 802.8948974609375, + "learning_rate": 5.703254131173452e-08, + "loss": 55.3289, + "step": 244590 + }, + { + "epoch": 0.988214950892262, + "grad_norm": 870.50244140625, + "learning_rate": 5.682199478772133e-08, + "loss": 56.9634, + "step": 244600 + }, + { + "epoch": 0.9882553521576296, + "grad_norm": 367.17572021484375, + "learning_rate": 5.6611837066229415e-08, + "loss": 47.5168, + "step": 244610 + }, + { + "epoch": 0.9882957534229972, + "grad_norm": 675.9530639648438, + "learning_rate": 5.6402068151359957e-08, + "loss": 59.4061, + "step": 244620 + }, + { + "epoch": 0.9883361546883649, + "grad_norm": 873.79150390625, + "learning_rate": 5.6192688047198574e-08, + "loss": 57.2921, + "step": 244630 + }, + { + "epoch": 0.9883765559537324, + "grad_norm": 636.3483276367188, + "learning_rate": 5.598369675782867e-08, + "loss": 58.1717, + "step": 244640 + }, + { + "epoch": 0.9884169572191001, + "grad_norm": 254.75625610351562, + "learning_rate": 5.577509428732475e-08, + "loss": 38.6115, + "step": 244650 + }, + { + "epoch": 0.9884573584844677, + "grad_norm": 747.763427734375, + "learning_rate": 5.556688063975246e-08, + "loss": 84.7266, + "step": 244660 + }, + { + "epoch": 0.9884977597498353, + "grad_norm": 1073.231201171875, + "learning_rate": 5.535905581917078e-08, + "loss": 73.8483, + "step": 244670 + }, + { + "epoch": 0.988538161015203, + "grad_norm": 1134.3597412109375, + "learning_rate": 5.515161982963424e-08, + "loss": 68.3608, + "step": 244680 + }, + { + "epoch": 0.9885785622805706, + "grad_norm": 270.23089599609375, + "learning_rate": 5.494457267518405e-08, + "loss": 50.5166, + "step": 244690 + }, + { + "epoch": 0.9886189635459383, + "grad_norm": 752.1763305664062, + "learning_rate": 5.4737914359859204e-08, + "loss": 60.441, + "step": 244700 + }, + { + "epoch": 0.9886593648113059, + "grad_norm": 601.8889770507812, + "learning_rate": 5.4531644887687584e-08, + "loss": 78.6238, + "step": 244710 + }, + { + "epoch": 0.9886997660766735, + "grad_norm": 743.849853515625, + "learning_rate": 5.432576426268821e-08, + "loss": 60.3527, + "step": 244720 + }, + { + "epoch": 0.9887401673420412, + "grad_norm": 4505.931640625, + "learning_rate": 5.4120272488877855e-08, + "loss": 89.462, + "step": 244730 + }, + { + "epoch": 0.9887805686074088, + "grad_norm": 715.7152709960938, + "learning_rate": 5.391516957026221e-08, + "loss": 55.1674, + "step": 244740 + }, + { + "epoch": 0.9888209698727765, + "grad_norm": 490.54498291015625, + "learning_rate": 5.371045551083809e-08, + "loss": 69.942, + "step": 244750 + }, + { + "epoch": 0.9888613711381441, + "grad_norm": 635.512939453125, + "learning_rate": 5.3506130314597835e-08, + "loss": 55.3682, + "step": 244760 + }, + { + "epoch": 0.9889017724035116, + "grad_norm": 340.0739440917969, + "learning_rate": 5.330219398552494e-08, + "loss": 53.0115, + "step": 244770 + }, + { + "epoch": 0.9889421736688793, + "grad_norm": 502.4493713378906, + "learning_rate": 5.309864652759622e-08, + "loss": 79.1881, + "step": 244780 + }, + { + "epoch": 0.9889825749342469, + "grad_norm": 392.6134033203125, + "learning_rate": 5.289548794477739e-08, + "loss": 58.1892, + "step": 244790 + }, + { + "epoch": 0.9890229761996145, + "grad_norm": 1040.2161865234375, + "learning_rate": 5.269271824102973e-08, + "loss": 81.7927, + "step": 244800 + }, + { + "epoch": 0.9890633774649822, + "grad_norm": 402.0889587402344, + "learning_rate": 5.249033742030785e-08, + "loss": 27.414, + "step": 244810 + }, + { + "epoch": 0.9891037787303498, + "grad_norm": 786.041015625, + "learning_rate": 5.228834548655748e-08, + "loss": 80.7714, + "step": 244820 + }, + { + "epoch": 0.9891441799957175, + "grad_norm": 1116.53564453125, + "learning_rate": 5.2086742443715475e-08, + "loss": 62.6505, + "step": 244830 + }, + { + "epoch": 0.9891845812610851, + "grad_norm": 878.5287475585938, + "learning_rate": 5.188552829571203e-08, + "loss": 75.5625, + "step": 244840 + }, + { + "epoch": 0.9892249825264527, + "grad_norm": 584.36181640625, + "learning_rate": 5.168470304646844e-08, + "loss": 81.2633, + "step": 244850 + }, + { + "epoch": 0.9892653837918204, + "grad_norm": 495.9171142578125, + "learning_rate": 5.1484266699903806e-08, + "loss": 41.9747, + "step": 244860 + }, + { + "epoch": 0.989305785057188, + "grad_norm": 484.5915222167969, + "learning_rate": 5.1284219259923883e-08, + "loss": 51.1297, + "step": 244870 + }, + { + "epoch": 0.9893461863225557, + "grad_norm": 885.5384521484375, + "learning_rate": 5.108456073042556e-08, + "loss": 81.2902, + "step": 244880 + }, + { + "epoch": 0.9893865875879233, + "grad_norm": 614.9737548828125, + "learning_rate": 5.088529111530793e-08, + "loss": 33.0703, + "step": 244890 + }, + { + "epoch": 0.9894269888532908, + "grad_norm": 680.9472045898438, + "learning_rate": 5.0686410418450124e-08, + "loss": 78.2345, + "step": 244900 + }, + { + "epoch": 0.9894673901186585, + "grad_norm": 961.8391723632812, + "learning_rate": 5.048791864373126e-08, + "loss": 95.7389, + "step": 244910 + }, + { + "epoch": 0.9895077913840261, + "grad_norm": 932.1373901367188, + "learning_rate": 5.028981579502157e-08, + "loss": 55.8102, + "step": 244920 + }, + { + "epoch": 0.9895481926493938, + "grad_norm": 643.6388549804688, + "learning_rate": 5.009210187618019e-08, + "loss": 55.2788, + "step": 244930 + }, + { + "epoch": 0.9895885939147614, + "grad_norm": 313.97479248046875, + "learning_rate": 4.989477689106625e-08, + "loss": 53.1409, + "step": 244940 + }, + { + "epoch": 0.989628995180129, + "grad_norm": 1713.712158203125, + "learning_rate": 4.969784084352558e-08, + "loss": 68.8483, + "step": 244950 + }, + { + "epoch": 0.9896693964454967, + "grad_norm": 358.81756591796875, + "learning_rate": 4.950129373739509e-08, + "loss": 76.6323, + "step": 244960 + }, + { + "epoch": 0.9897097977108643, + "grad_norm": 657.3624877929688, + "learning_rate": 4.9305135576509514e-08, + "loss": 97.1115, + "step": 244970 + }, + { + "epoch": 0.989750198976232, + "grad_norm": 534.5327758789062, + "learning_rate": 4.910936636469021e-08, + "loss": 81.8089, + "step": 244980 + }, + { + "epoch": 0.9897906002415996, + "grad_norm": 483.4454040527344, + "learning_rate": 4.891398610575637e-08, + "loss": 51.0072, + "step": 244990 + }, + { + "epoch": 0.9898310015069672, + "grad_norm": 492.21002197265625, + "learning_rate": 4.871899480351605e-08, + "loss": 43.9044, + "step": 245000 + }, + { + "epoch": 0.9898714027723349, + "grad_norm": 560.1443481445312, + "learning_rate": 4.852439246176843e-08, + "loss": 69.62, + "step": 245010 + }, + { + "epoch": 0.9899118040377025, + "grad_norm": 416.1751708984375, + "learning_rate": 4.833017908431048e-08, + "loss": 62.47, + "step": 245020 + }, + { + "epoch": 0.98995220530307, + "grad_norm": 895.3497314453125, + "learning_rate": 4.8136354674930274e-08, + "loss": 71.5198, + "step": 245030 + }, + { + "epoch": 0.9899926065684377, + "grad_norm": 426.020751953125, + "learning_rate": 4.794291923740035e-08, + "loss": 44.0449, + "step": 245040 + }, + { + "epoch": 0.9900330078338053, + "grad_norm": 505.74407958984375, + "learning_rate": 4.7749872775495476e-08, + "loss": 53.4401, + "step": 245050 + }, + { + "epoch": 0.990073409099173, + "grad_norm": 481.3490905761719, + "learning_rate": 4.75572152929793e-08, + "loss": 66.196, + "step": 245060 + }, + { + "epoch": 0.9901138103645406, + "grad_norm": 1285.4169921875, + "learning_rate": 4.736494679360881e-08, + "loss": 81.7554, + "step": 245070 + }, + { + "epoch": 0.9901542116299082, + "grad_norm": 567.5860595703125, + "learning_rate": 4.717306728112991e-08, + "loss": 61.873, + "step": 245080 + }, + { + "epoch": 0.9901946128952759, + "grad_norm": 309.3211669921875, + "learning_rate": 4.6981576759284054e-08, + "loss": 46.4515, + "step": 245090 + }, + { + "epoch": 0.9902350141606435, + "grad_norm": 404.72308349609375, + "learning_rate": 4.679047523180602e-08, + "loss": 38.5506, + "step": 245100 + }, + { + "epoch": 0.9902754154260112, + "grad_norm": 730.2078857421875, + "learning_rate": 4.6599762702419506e-08, + "loss": 58.5018, + "step": 245110 + }, + { + "epoch": 0.9903158166913788, + "grad_norm": 1148.2498779296875, + "learning_rate": 4.6409439174843753e-08, + "loss": 68.424, + "step": 245120 + }, + { + "epoch": 0.9903562179567464, + "grad_norm": 688.6669921875, + "learning_rate": 4.621950465278913e-08, + "loss": 57.7279, + "step": 245130 + }, + { + "epoch": 0.9903966192221141, + "grad_norm": 977.3458862304688, + "learning_rate": 4.602995913995711e-08, + "loss": 56.0227, + "step": 245140 + }, + { + "epoch": 0.9904370204874817, + "grad_norm": 944.2918701171875, + "learning_rate": 4.584080264004476e-08, + "loss": 48.3156, + "step": 245150 + }, + { + "epoch": 0.9904774217528493, + "grad_norm": 772.4479370117188, + "learning_rate": 4.565203515673799e-08, + "loss": 66.692, + "step": 245160 + }, + { + "epoch": 0.9905178230182169, + "grad_norm": 479.3548583984375, + "learning_rate": 4.546365669371833e-08, + "loss": 43.9013, + "step": 245170 + }, + { + "epoch": 0.9905582242835845, + "grad_norm": 1461.677490234375, + "learning_rate": 4.5275667254658375e-08, + "loss": 71.8242, + "step": 245180 + }, + { + "epoch": 0.9905986255489522, + "grad_norm": 929.8504638671875, + "learning_rate": 4.5088066843224086e-08, + "loss": 103.7692, + "step": 245190 + }, + { + "epoch": 0.9906390268143198, + "grad_norm": 2244.10400390625, + "learning_rate": 4.4900855463068104e-08, + "loss": 149.5595, + "step": 245200 + }, + { + "epoch": 0.9906794280796875, + "grad_norm": 529.055419921875, + "learning_rate": 4.47140331178475e-08, + "loss": 79.3554, + "step": 245210 + }, + { + "epoch": 0.9907198293450551, + "grad_norm": 1160.5233154296875, + "learning_rate": 4.452759981119714e-08, + "loss": 87.1227, + "step": 245220 + }, + { + "epoch": 0.9907602306104227, + "grad_norm": 2008.9127197265625, + "learning_rate": 4.434155554675634e-08, + "loss": 81.6234, + "step": 245230 + }, + { + "epoch": 0.9908006318757904, + "grad_norm": 371.3612365722656, + "learning_rate": 4.415590032815109e-08, + "loss": 53.8295, + "step": 245240 + }, + { + "epoch": 0.990841033141158, + "grad_norm": 1145.267578125, + "learning_rate": 4.3970634159000716e-08, + "loss": 73.2216, + "step": 245250 + }, + { + "epoch": 0.9908814344065257, + "grad_norm": 727.67138671875, + "learning_rate": 4.3785757042915657e-08, + "loss": 67.9677, + "step": 245260 + }, + { + "epoch": 0.9909218356718933, + "grad_norm": 277.6853942871094, + "learning_rate": 4.360126898350192e-08, + "loss": 43.7698, + "step": 245270 + }, + { + "epoch": 0.9909622369372608, + "grad_norm": 1086.3302001953125, + "learning_rate": 4.341716998435663e-08, + "loss": 54.215, + "step": 245280 + }, + { + "epoch": 0.9910026382026285, + "grad_norm": 687.7263793945312, + "learning_rate": 4.3233460049065815e-08, + "loss": 44.9248, + "step": 245290 + }, + { + "epoch": 0.9910430394679961, + "grad_norm": 471.13323974609375, + "learning_rate": 4.305013918121548e-08, + "loss": 56.6649, + "step": 245300 + }, + { + "epoch": 0.9910834407333637, + "grad_norm": 1082.625732421875, + "learning_rate": 4.2867207384376106e-08, + "loss": 74.9581, + "step": 245310 + }, + { + "epoch": 0.9911238419987314, + "grad_norm": 717.2423095703125, + "learning_rate": 4.268466466211596e-08, + "loss": 74.9209, + "step": 245320 + }, + { + "epoch": 0.991164243264099, + "grad_norm": 824.5343627929688, + "learning_rate": 4.250251101799219e-08, + "loss": 47.8365, + "step": 245330 + }, + { + "epoch": 0.9912046445294667, + "grad_norm": 596.40966796875, + "learning_rate": 4.232074645555529e-08, + "loss": 51.383, + "step": 245340 + }, + { + "epoch": 0.9912450457948343, + "grad_norm": 376.2271728515625, + "learning_rate": 4.2139370978353524e-08, + "loss": 95.9688, + "step": 245350 + }, + { + "epoch": 0.9912854470602019, + "grad_norm": 536.1099243164062, + "learning_rate": 4.1958384589917414e-08, + "loss": 75.9876, + "step": 245360 + }, + { + "epoch": 0.9913258483255696, + "grad_norm": 722.1608276367188, + "learning_rate": 4.177778729377968e-08, + "loss": 53.4284, + "step": 245370 + }, + { + "epoch": 0.9913662495909372, + "grad_norm": 403.77392578125, + "learning_rate": 4.159757909345752e-08, + "loss": 85.8546, + "step": 245380 + }, + { + "epoch": 0.9914066508563049, + "grad_norm": 801.222412109375, + "learning_rate": 4.141775999246589e-08, + "loss": 91.5247, + "step": 245390 + }, + { + "epoch": 0.9914470521216725, + "grad_norm": 490.95806884765625, + "learning_rate": 4.123832999431088e-08, + "loss": 79.4978, + "step": 245400 + }, + { + "epoch": 0.99148745338704, + "grad_norm": 566.7061157226562, + "learning_rate": 4.105928910248968e-08, + "loss": 61.3814, + "step": 245410 + }, + { + "epoch": 0.9915278546524077, + "grad_norm": 633.5494384765625, + "learning_rate": 4.0880637320492856e-08, + "loss": 80.2104, + "step": 245420 + }, + { + "epoch": 0.9915682559177753, + "grad_norm": 1150.8203125, + "learning_rate": 4.0702374651804263e-08, + "loss": 62.1925, + "step": 245430 + }, + { + "epoch": 0.991608657183143, + "grad_norm": 2473.39501953125, + "learning_rate": 4.052450109989892e-08, + "loss": 72.2005, + "step": 245440 + }, + { + "epoch": 0.9916490584485106, + "grad_norm": 765.2828979492188, + "learning_rate": 4.034701666824292e-08, + "loss": 52.7494, + "step": 245450 + }, + { + "epoch": 0.9916894597138782, + "grad_norm": 1667.071533203125, + "learning_rate": 4.016992136030018e-08, + "loss": 86.0946, + "step": 245460 + }, + { + "epoch": 0.9917298609792459, + "grad_norm": 700.4152221679688, + "learning_rate": 3.999321517951904e-08, + "loss": 85.5045, + "step": 245470 + }, + { + "epoch": 0.9917702622446135, + "grad_norm": 418.1116027832031, + "learning_rate": 3.981689812934786e-08, + "loss": 69.984, + "step": 245480 + }, + { + "epoch": 0.9918106635099811, + "grad_norm": 437.9290466308594, + "learning_rate": 3.964097021321944e-08, + "loss": 57.8075, + "step": 245490 + }, + { + "epoch": 0.9918510647753488, + "grad_norm": 717.4144287109375, + "learning_rate": 3.946543143456882e-08, + "loss": 47.4886, + "step": 245500 + }, + { + "epoch": 0.9918914660407164, + "grad_norm": 1026.0960693359375, + "learning_rate": 3.929028179681549e-08, + "loss": 60.5795, + "step": 245510 + }, + { + "epoch": 0.9919318673060841, + "grad_norm": 363.8679504394531, + "learning_rate": 3.911552130337448e-08, + "loss": 56.5288, + "step": 245520 + }, + { + "epoch": 0.9919722685714517, + "grad_norm": 524.9384155273438, + "learning_rate": 3.894114995765419e-08, + "loss": 42.0971, + "step": 245530 + }, + { + "epoch": 0.9920126698368192, + "grad_norm": 481.4562683105469, + "learning_rate": 3.876716776304967e-08, + "loss": 67.4155, + "step": 245540 + }, + { + "epoch": 0.9920530711021869, + "grad_norm": 1049.0361328125, + "learning_rate": 3.859357472296044e-08, + "loss": 84.2187, + "step": 245550 + }, + { + "epoch": 0.9920934723675545, + "grad_norm": 517.434814453125, + "learning_rate": 3.842037084076377e-08, + "loss": 56.4051, + "step": 245560 + }, + { + "epoch": 0.9921338736329222, + "grad_norm": 498.0035400390625, + "learning_rate": 3.8247556119836994e-08, + "loss": 48.6871, + "step": 245570 + }, + { + "epoch": 0.9921742748982898, + "grad_norm": 470.9654235839844, + "learning_rate": 3.807513056355294e-08, + "loss": 71.3102, + "step": 245580 + }, + { + "epoch": 0.9922146761636574, + "grad_norm": 875.8306274414062, + "learning_rate": 3.790309417527338e-08, + "loss": 71.2208, + "step": 245590 + }, + { + "epoch": 0.9922550774290251, + "grad_norm": 760.8909912109375, + "learning_rate": 3.773144695834674e-08, + "loss": 68.2483, + "step": 245600 + }, + { + "epoch": 0.9922954786943927, + "grad_norm": 752.4901733398438, + "learning_rate": 3.7560188916125895e-08, + "loss": 71.7483, + "step": 245610 + }, + { + "epoch": 0.9923358799597604, + "grad_norm": 502.5072937011719, + "learning_rate": 3.7389320051943734e-08, + "loss": 80.898, + "step": 245620 + }, + { + "epoch": 0.992376281225128, + "grad_norm": 1313.6217041015625, + "learning_rate": 3.721884036913537e-08, + "loss": 81.5008, + "step": 245630 + }, + { + "epoch": 0.9924166824904956, + "grad_norm": 352.4242858886719, + "learning_rate": 3.7048749871024804e-08, + "loss": 44.3478, + "step": 245640 + }, + { + "epoch": 0.9924570837558633, + "grad_norm": 865.5243530273438, + "learning_rate": 3.687904856092495e-08, + "loss": 69.2282, + "step": 245650 + }, + { + "epoch": 0.9924974850212309, + "grad_norm": 494.22021484375, + "learning_rate": 3.670973644214426e-08, + "loss": 55.4247, + "step": 245660 + }, + { + "epoch": 0.9925378862865984, + "grad_norm": 624.8347778320312, + "learning_rate": 3.654081351798677e-08, + "loss": 47.3491, + "step": 245670 + }, + { + "epoch": 0.9925782875519661, + "grad_norm": 1026.3302001953125, + "learning_rate": 3.63722797917454e-08, + "loss": 72.737, + "step": 245680 + }, + { + "epoch": 0.9926186888173337, + "grad_norm": 634.2025146484375, + "learning_rate": 3.6204135266701966e-08, + "loss": 71.7019, + "step": 245690 + }, + { + "epoch": 0.9926590900827014, + "grad_norm": 557.3114624023438, + "learning_rate": 3.603637994613829e-08, + "loss": 56.7464, + "step": 245700 + }, + { + "epoch": 0.992699491348069, + "grad_norm": 527.4268798828125, + "learning_rate": 3.586901383332286e-08, + "loss": 61.7486, + "step": 245710 + }, + { + "epoch": 0.9927398926134366, + "grad_norm": 630.2237548828125, + "learning_rate": 3.570203693151975e-08, + "loss": 59.9647, + "step": 245720 + }, + { + "epoch": 0.9927802938788043, + "grad_norm": 495.5207214355469, + "learning_rate": 3.553544924398189e-08, + "loss": 49.6535, + "step": 245730 + }, + { + "epoch": 0.9928206951441719, + "grad_norm": 629.6219482421875, + "learning_rate": 3.536925077396225e-08, + "loss": 53.0621, + "step": 245740 + }, + { + "epoch": 0.9928610964095396, + "grad_norm": 795.8760986328125, + "learning_rate": 3.520344152469379e-08, + "loss": 79.4053, + "step": 245750 + }, + { + "epoch": 0.9929014976749072, + "grad_norm": 424.3914794921875, + "learning_rate": 3.5038021499416154e-08, + "loss": 49.1679, + "step": 245760 + }, + { + "epoch": 0.9929418989402748, + "grad_norm": 642.6776123046875, + "learning_rate": 3.487299070134897e-08, + "loss": 94.5289, + "step": 245770 + }, + { + "epoch": 0.9929823002056425, + "grad_norm": 513.3138427734375, + "learning_rate": 3.4708349133711904e-08, + "loss": 69.5716, + "step": 245780 + }, + { + "epoch": 0.9930227014710101, + "grad_norm": 513.6527709960938, + "learning_rate": 3.454409679971571e-08, + "loss": 79.1926, + "step": 245790 + }, + { + "epoch": 0.9930631027363777, + "grad_norm": 454.0513610839844, + "learning_rate": 3.438023370256005e-08, + "loss": 72.3835, + "step": 245800 + }, + { + "epoch": 0.9931035040017453, + "grad_norm": 1255.083251953125, + "learning_rate": 3.421675984544015e-08, + "loss": 73.0137, + "step": 245810 + }, + { + "epoch": 0.9931439052671129, + "grad_norm": 875.16650390625, + "learning_rate": 3.405367523154457e-08, + "loss": 64.2918, + "step": 245820 + }, + { + "epoch": 0.9931843065324806, + "grad_norm": 1599.042724609375, + "learning_rate": 3.3890979864050764e-08, + "loss": 61.3414, + "step": 245830 + }, + { + "epoch": 0.9932247077978482, + "grad_norm": 1058.0859375, + "learning_rate": 3.372867374613176e-08, + "loss": 55.903, + "step": 245840 + }, + { + "epoch": 0.9932651090632159, + "grad_norm": 681.10205078125, + "learning_rate": 3.35667568809539e-08, + "loss": 65.0381, + "step": 245850 + }, + { + "epoch": 0.9933055103285835, + "grad_norm": 733.2611694335938, + "learning_rate": 3.340522927166801e-08, + "loss": 76.9129, + "step": 245860 + }, + { + "epoch": 0.9933459115939511, + "grad_norm": 927.6220092773438, + "learning_rate": 3.3244090921429325e-08, + "loss": 90.917, + "step": 245870 + }, + { + "epoch": 0.9933863128593188, + "grad_norm": 534.97900390625, + "learning_rate": 3.308334183337536e-08, + "loss": 79.7148, + "step": 245880 + }, + { + "epoch": 0.9934267141246864, + "grad_norm": 472.1578674316406, + "learning_rate": 3.292298201064137e-08, + "loss": 62.6103, + "step": 245890 + }, + { + "epoch": 0.9934671153900541, + "grad_norm": 827.1378173828125, + "learning_rate": 3.2763011456353744e-08, + "loss": 65.2287, + "step": 245900 + }, + { + "epoch": 0.9935075166554217, + "grad_norm": 496.7746887207031, + "learning_rate": 3.2603430173632214e-08, + "loss": 85.1574, + "step": 245910 + }, + { + "epoch": 0.9935479179207892, + "grad_norm": 767.6474609375, + "learning_rate": 3.24442381655854e-08, + "loss": 65.1521, + "step": 245920 + }, + { + "epoch": 0.9935883191861569, + "grad_norm": 494.93463134765625, + "learning_rate": 3.228543543531748e-08, + "loss": 62.9498, + "step": 245930 + }, + { + "epoch": 0.9936287204515245, + "grad_norm": 707.6901245117188, + "learning_rate": 3.2127021985925986e-08, + "loss": 51.276, + "step": 245940 + }, + { + "epoch": 0.9936691217168921, + "grad_norm": 568.8327026367188, + "learning_rate": 3.1968997820499556e-08, + "loss": 42.5167, + "step": 245950 + }, + { + "epoch": 0.9937095229822598, + "grad_norm": 524.4324340820312, + "learning_rate": 3.181136294211573e-08, + "loss": 50.2851, + "step": 245960 + }, + { + "epoch": 0.9937499242476274, + "grad_norm": 225.7127227783203, + "learning_rate": 3.1654117353852045e-08, + "loss": 55.0212, + "step": 245970 + }, + { + "epoch": 0.9937903255129951, + "grad_norm": 502.35174560546875, + "learning_rate": 3.149726105877049e-08, + "loss": 70.1332, + "step": 245980 + }, + { + "epoch": 0.9938307267783627, + "grad_norm": 822.3461303710938, + "learning_rate": 3.134079405992862e-08, + "loss": 67.1115, + "step": 245990 + }, + { + "epoch": 0.9938711280437303, + "grad_norm": 793.0733642578125, + "learning_rate": 3.1184716360379564e-08, + "loss": 67.0892, + "step": 246000 + }, + { + "epoch": 0.993911529309098, + "grad_norm": 422.6313781738281, + "learning_rate": 3.102902796316531e-08, + "loss": 57.4665, + "step": 246010 + }, + { + "epoch": 0.9939519305744656, + "grad_norm": 476.46405029296875, + "learning_rate": 3.087372887132123e-08, + "loss": 63.1614, + "step": 246020 + }, + { + "epoch": 0.9939923318398333, + "grad_norm": 427.029296875, + "learning_rate": 3.071881908787599e-08, + "loss": 60.9906, + "step": 246030 + }, + { + "epoch": 0.9940327331052009, + "grad_norm": 626.3902587890625, + "learning_rate": 3.0564298615844976e-08, + "loss": 50.1448, + "step": 246040 + }, + { + "epoch": 0.9940731343705684, + "grad_norm": 707.5762939453125, + "learning_rate": 3.041016745824577e-08, + "loss": 50.9822, + "step": 246050 + }, + { + "epoch": 0.9941135356359361, + "grad_norm": 704.7064208984375, + "learning_rate": 3.025642561808262e-08, + "loss": 107.3421, + "step": 246060 + }, + { + "epoch": 0.9941539369013037, + "grad_norm": 299.4898376464844, + "learning_rate": 3.01030730983487e-08, + "loss": 64.4418, + "step": 246070 + }, + { + "epoch": 0.9941943381666714, + "grad_norm": 676.4454345703125, + "learning_rate": 2.995010990203939e-08, + "loss": 54.6544, + "step": 246080 + }, + { + "epoch": 0.994234739432039, + "grad_norm": 465.7747802734375, + "learning_rate": 2.979753603213231e-08, + "loss": 86.7989, + "step": 246090 + }, + { + "epoch": 0.9942751406974066, + "grad_norm": 1124.3768310546875, + "learning_rate": 2.964535149160286e-08, + "loss": 73.1189, + "step": 246100 + }, + { + "epoch": 0.9943155419627743, + "grad_norm": 1212.193603515625, + "learning_rate": 2.9493556283419765e-08, + "loss": 61.1368, + "step": 246110 + }, + { + "epoch": 0.9943559432281419, + "grad_norm": 1511.6201171875, + "learning_rate": 2.934215041054289e-08, + "loss": 82.0976, + "step": 246120 + }, + { + "epoch": 0.9943963444935096, + "grad_norm": 999.6705932617188, + "learning_rate": 2.9191133875920985e-08, + "loss": 62.6416, + "step": 246130 + }, + { + "epoch": 0.9944367457588772, + "grad_norm": 719.7281494140625, + "learning_rate": 2.904050668249836e-08, + "loss": 44.9201, + "step": 246140 + }, + { + "epoch": 0.9944771470242448, + "grad_norm": 588.3489990234375, + "learning_rate": 2.8890268833214884e-08, + "loss": 82.1718, + "step": 246150 + }, + { + "epoch": 0.9945175482896125, + "grad_norm": 328.4591369628906, + "learning_rate": 2.874042033099489e-08, + "loss": 103.9053, + "step": 246160 + }, + { + "epoch": 0.9945579495549801, + "grad_norm": 474.165283203125, + "learning_rate": 2.859096117876492e-08, + "loss": 74.7182, + "step": 246170 + }, + { + "epoch": 0.9945983508203476, + "grad_norm": 922.8868408203125, + "learning_rate": 2.844189137943376e-08, + "loss": 67.6252, + "step": 246180 + }, + { + "epoch": 0.9946387520857153, + "grad_norm": 486.7824401855469, + "learning_rate": 2.8293210935912417e-08, + "loss": 73.3193, + "step": 246190 + }, + { + "epoch": 0.9946791533510829, + "grad_norm": 666.9881591796875, + "learning_rate": 2.8144919851096352e-08, + "loss": 46.7169, + "step": 246200 + }, + { + "epoch": 0.9947195546164506, + "grad_norm": 1134.45068359375, + "learning_rate": 2.7997018127876585e-08, + "loss": 77.393, + "step": 246210 + }, + { + "epoch": 0.9947599558818182, + "grad_norm": 791.5308837890625, + "learning_rate": 2.7849505769137474e-08, + "loss": 104.9714, + "step": 246220 + }, + { + "epoch": 0.9948003571471858, + "grad_norm": 1089.5355224609375, + "learning_rate": 2.77023827777545e-08, + "loss": 82.6711, + "step": 246230 + }, + { + "epoch": 0.9948407584125535, + "grad_norm": 2470.469970703125, + "learning_rate": 2.7555649156596476e-08, + "loss": 84.9853, + "step": 246240 + }, + { + "epoch": 0.9948811596779211, + "grad_norm": 845.45703125, + "learning_rate": 2.740930490852334e-08, + "loss": 67.5918, + "step": 246250 + }, + { + "epoch": 0.9949215609432888, + "grad_norm": 333.334228515625, + "learning_rate": 2.726335003638836e-08, + "loss": 56.0542, + "step": 246260 + }, + { + "epoch": 0.9949619622086564, + "grad_norm": 1119.0545654296875, + "learning_rate": 2.711778454303593e-08, + "loss": 68.3364, + "step": 246270 + }, + { + "epoch": 0.995002363474024, + "grad_norm": 1080.394775390625, + "learning_rate": 2.6972608431306e-08, + "loss": 96.3131, + "step": 246280 + }, + { + "epoch": 0.9950427647393917, + "grad_norm": 1152.6812744140625, + "learning_rate": 2.6827821704027425e-08, + "loss": 63.0397, + "step": 246290 + }, + { + "epoch": 0.9950831660047593, + "grad_norm": 616.3544311523438, + "learning_rate": 2.6683424364024603e-08, + "loss": 52.4763, + "step": 246300 + }, + { + "epoch": 0.9951235672701269, + "grad_norm": 1146.6976318359375, + "learning_rate": 2.653941641411084e-08, + "loss": 74.3114, + "step": 246310 + }, + { + "epoch": 0.9951639685354945, + "grad_norm": 704.0271606445312, + "learning_rate": 2.6395797857092785e-08, + "loss": 46.4209, + "step": 246320 + }, + { + "epoch": 0.9952043698008621, + "grad_norm": 874.688720703125, + "learning_rate": 2.625256869577486e-08, + "loss": 79.9039, + "step": 246330 + }, + { + "epoch": 0.9952447710662298, + "grad_norm": 615.0228881835938, + "learning_rate": 2.6109728932943723e-08, + "loss": 70.0368, + "step": 246340 + }, + { + "epoch": 0.9952851723315974, + "grad_norm": 371.4426574707031, + "learning_rate": 2.5967278571386036e-08, + "loss": 86.2411, + "step": 246350 + }, + { + "epoch": 0.995325573596965, + "grad_norm": 558.6928100585938, + "learning_rate": 2.5825217613881793e-08, + "loss": 37.7069, + "step": 246360 + }, + { + "epoch": 0.9953659748623327, + "grad_norm": 537.0572509765625, + "learning_rate": 2.5683546063197672e-08, + "loss": 64.9856, + "step": 246370 + }, + { + "epoch": 0.9954063761277003, + "grad_norm": 313.0009460449219, + "learning_rate": 2.554226392209591e-08, + "loss": 57.0275, + "step": 246380 + }, + { + "epoch": 0.995446777393068, + "grad_norm": 708.5098266601562, + "learning_rate": 2.540137119332986e-08, + "loss": 57.0459, + "step": 246390 + }, + { + "epoch": 0.9954871786584356, + "grad_norm": 609.0169677734375, + "learning_rate": 2.5260867879650652e-08, + "loss": 74.8419, + "step": 246400 + }, + { + "epoch": 0.9955275799238033, + "grad_norm": 705.3273315429688, + "learning_rate": 2.5120753983791655e-08, + "loss": 88.1848, + "step": 246410 + }, + { + "epoch": 0.9955679811891709, + "grad_norm": 552.0274658203125, + "learning_rate": 2.4981029508488463e-08, + "loss": 78.1259, + "step": 246420 + }, + { + "epoch": 0.9956083824545384, + "grad_norm": 612.467041015625, + "learning_rate": 2.484169445646556e-08, + "loss": 47.3379, + "step": 246430 + }, + { + "epoch": 0.9956487837199061, + "grad_norm": 697.12451171875, + "learning_rate": 2.4702748830436328e-08, + "loss": 54.0609, + "step": 246440 + }, + { + "epoch": 0.9956891849852737, + "grad_norm": 508.4583740234375, + "learning_rate": 2.4564192633109717e-08, + "loss": 72.4029, + "step": 246450 + }, + { + "epoch": 0.9957295862506413, + "grad_norm": 553.5885620117188, + "learning_rate": 2.4426025867190228e-08, + "loss": 46.5991, + "step": 246460 + }, + { + "epoch": 0.995769987516009, + "grad_norm": 558.6953125, + "learning_rate": 2.4288248535369042e-08, + "loss": 69.7685, + "step": 246470 + }, + { + "epoch": 0.9958103887813766, + "grad_norm": 588.5094604492188, + "learning_rate": 2.4150860640332897e-08, + "loss": 43.9059, + "step": 246480 + }, + { + "epoch": 0.9958507900467443, + "grad_norm": 1699.56982421875, + "learning_rate": 2.4013862184757432e-08, + "loss": 87.2433, + "step": 246490 + }, + { + "epoch": 0.9958911913121119, + "grad_norm": 503.01873779296875, + "learning_rate": 2.3877253171320504e-08, + "loss": 60.5497, + "step": 246500 + }, + { + "epoch": 0.9959315925774795, + "grad_norm": 841.1090698242188, + "learning_rate": 2.3741033602677767e-08, + "loss": 43.7916, + "step": 246510 + }, + { + "epoch": 0.9959719938428472, + "grad_norm": 736.8029174804688, + "learning_rate": 2.3605203481489315e-08, + "loss": 83.1938, + "step": 246520 + }, + { + "epoch": 0.9960123951082148, + "grad_norm": 1105.7611083984375, + "learning_rate": 2.34697628103997e-08, + "loss": 47.2408, + "step": 246530 + }, + { + "epoch": 0.9960527963735825, + "grad_norm": 647.1959838867188, + "learning_rate": 2.3334711592053472e-08, + "loss": 95.841, + "step": 246540 + }, + { + "epoch": 0.9960931976389501, + "grad_norm": 939.3787231445312, + "learning_rate": 2.3200049829081862e-08, + "loss": 71.7151, + "step": 246550 + }, + { + "epoch": 0.9961335989043176, + "grad_norm": 800.9390869140625, + "learning_rate": 2.3065777524109433e-08, + "loss": 49.6888, + "step": 246560 + }, + { + "epoch": 0.9961740001696853, + "grad_norm": 182.66232299804688, + "learning_rate": 2.2931894679756317e-08, + "loss": 58.2825, + "step": 246570 + }, + { + "epoch": 0.9962144014350529, + "grad_norm": 490.4267883300781, + "learning_rate": 2.2798401298629313e-08, + "loss": 64.6816, + "step": 246580 + }, + { + "epoch": 0.9962548027004205, + "grad_norm": 604.3055419921875, + "learning_rate": 2.2665297383333006e-08, + "loss": 56.7492, + "step": 246590 + }, + { + "epoch": 0.9962952039657882, + "grad_norm": 539.9654541015625, + "learning_rate": 2.253258293645866e-08, + "loss": 87.3909, + "step": 246600 + }, + { + "epoch": 0.9963356052311558, + "grad_norm": 748.7692260742188, + "learning_rate": 2.2400257960599748e-08, + "loss": 61.2478, + "step": 246610 + }, + { + "epoch": 0.9963760064965235, + "grad_norm": 477.85089111328125, + "learning_rate": 2.2268322458334214e-08, + "loss": 85.2617, + "step": 246620 + }, + { + "epoch": 0.9964164077618911, + "grad_norm": 594.9008178710938, + "learning_rate": 2.2136776432231112e-08, + "loss": 52.733, + "step": 246630 + }, + { + "epoch": 0.9964568090272587, + "grad_norm": 612.5916137695312, + "learning_rate": 2.2005619884857276e-08, + "loss": 52.6187, + "step": 246640 + }, + { + "epoch": 0.9964972102926264, + "grad_norm": 341.560302734375, + "learning_rate": 2.1874852818768445e-08, + "loss": 42.6931, + "step": 246650 + }, + { + "epoch": 0.996537611557994, + "grad_norm": 800.2680053710938, + "learning_rate": 2.1744475236513683e-08, + "loss": 57.4986, + "step": 246660 + }, + { + "epoch": 0.9965780128233617, + "grad_norm": 732.1220092773438, + "learning_rate": 2.161448714063763e-08, + "loss": 59.2533, + "step": 246670 + }, + { + "epoch": 0.9966184140887293, + "grad_norm": 879.8084106445312, + "learning_rate": 2.1484888533673807e-08, + "loss": 56.1472, + "step": 246680 + }, + { + "epoch": 0.9966588153540968, + "grad_norm": 920.48974609375, + "learning_rate": 2.1355679418144647e-08, + "loss": 63.4963, + "step": 246690 + }, + { + "epoch": 0.9966992166194645, + "grad_norm": 676.2283935546875, + "learning_rate": 2.1226859796574794e-08, + "loss": 61.6036, + "step": 246700 + }, + { + "epoch": 0.9967396178848321, + "grad_norm": 736.2852783203125, + "learning_rate": 2.1098429671473354e-08, + "loss": 71.895, + "step": 246710 + }, + { + "epoch": 0.9967800191501998, + "grad_norm": 752.240478515625, + "learning_rate": 2.0970389045342766e-08, + "loss": 86.0384, + "step": 246720 + }, + { + "epoch": 0.9968204204155674, + "grad_norm": 1176.173095703125, + "learning_rate": 2.084273792067881e-08, + "loss": 97.8257, + "step": 246730 + }, + { + "epoch": 0.996860821680935, + "grad_norm": 898.9769897460938, + "learning_rate": 2.0715476299972837e-08, + "loss": 62.5789, + "step": 246740 + }, + { + "epoch": 0.9969012229463027, + "grad_norm": 619.3209228515625, + "learning_rate": 2.0588604185707295e-08, + "loss": 60.086, + "step": 246750 + }, + { + "epoch": 0.9969416242116703, + "grad_norm": 1800.61962890625, + "learning_rate": 2.0462121580349104e-08, + "loss": 62.5061, + "step": 246760 + }, + { + "epoch": 0.996982025477038, + "grad_norm": 335.31427001953125, + "learning_rate": 2.033602848636962e-08, + "loss": 58.383, + "step": 246770 + }, + { + "epoch": 0.9970224267424056, + "grad_norm": 657.8942260742188, + "learning_rate": 2.0210324906226876e-08, + "loss": 54.0897, + "step": 246780 + }, + { + "epoch": 0.9970628280077732, + "grad_norm": 881.69677734375, + "learning_rate": 2.00850108423678e-08, + "loss": 99.3379, + "step": 246790 + }, + { + "epoch": 0.9971032292731409, + "grad_norm": 1130.1905517578125, + "learning_rate": 1.9960086297239335e-08, + "loss": 69.8414, + "step": 246800 + }, + { + "epoch": 0.9971436305385085, + "grad_norm": 1389.18212890625, + "learning_rate": 1.983555127327508e-08, + "loss": 61.3453, + "step": 246810 + }, + { + "epoch": 0.997184031803876, + "grad_norm": 796.7022705078125, + "learning_rate": 1.971140577290198e-08, + "loss": 46.4247, + "step": 246820 + }, + { + "epoch": 0.9972244330692437, + "grad_norm": 1127.28369140625, + "learning_rate": 1.958764979854255e-08, + "loss": 70.0903, + "step": 246830 + }, + { + "epoch": 0.9972648343346113, + "grad_norm": 532.8534545898438, + "learning_rate": 1.9464283352608195e-08, + "loss": 68.058, + "step": 246840 + }, + { + "epoch": 0.997305235599979, + "grad_norm": 2313.693359375, + "learning_rate": 1.934130643750587e-08, + "loss": 72.9816, + "step": 246850 + }, + { + "epoch": 0.9973456368653466, + "grad_norm": 573.3366088867188, + "learning_rate": 1.921871905562922e-08, + "loss": 69.424, + "step": 246860 + }, + { + "epoch": 0.9973860381307142, + "grad_norm": 542.7574462890625, + "learning_rate": 1.909652120937189e-08, + "loss": 64.9796, + "step": 246870 + }, + { + "epoch": 0.9974264393960819, + "grad_norm": 1876.592529296875, + "learning_rate": 1.8974712901114188e-08, + "loss": 69.0193, + "step": 246880 + }, + { + "epoch": 0.9974668406614495, + "grad_norm": 1846.334228515625, + "learning_rate": 1.8853294133232004e-08, + "loss": 73.0309, + "step": 246890 + }, + { + "epoch": 0.9975072419268172, + "grad_norm": 711.7422485351562, + "learning_rate": 1.8732264908092323e-08, + "loss": 89.349, + "step": 246900 + }, + { + "epoch": 0.9975476431921848, + "grad_norm": 708.5359497070312, + "learning_rate": 1.8611625228053266e-08, + "loss": 43.2723, + "step": 246910 + }, + { + "epoch": 0.9975880444575524, + "grad_norm": 852.0037231445312, + "learning_rate": 1.8491375095466278e-08, + "loss": 49.2954, + "step": 246920 + }, + { + "epoch": 0.9976284457229201, + "grad_norm": 682.1520385742188, + "learning_rate": 1.837151451267838e-08, + "loss": 80.5949, + "step": 246930 + }, + { + "epoch": 0.9976688469882877, + "grad_norm": 1637.173095703125, + "learning_rate": 1.8252043482025474e-08, + "loss": 73.5092, + "step": 246940 + }, + { + "epoch": 0.9977092482536553, + "grad_norm": 1478.87255859375, + "learning_rate": 1.8132962005836807e-08, + "loss": 64.4434, + "step": 246950 + }, + { + "epoch": 0.9977496495190229, + "grad_norm": 877.1627807617188, + "learning_rate": 1.801427008643275e-08, + "loss": 53.4055, + "step": 246960 + }, + { + "epoch": 0.9977900507843905, + "grad_norm": 728.1354370117188, + "learning_rate": 1.7895967726127007e-08, + "loss": 93.248, + "step": 246970 + }, + { + "epoch": 0.9978304520497582, + "grad_norm": 358.18438720703125, + "learning_rate": 1.7778054927228838e-08, + "loss": 89.7632, + "step": 246980 + }, + { + "epoch": 0.9978708533151258, + "grad_norm": 681.3452758789062, + "learning_rate": 1.7660531692031967e-08, + "loss": 41.9654, + "step": 246990 + }, + { + "epoch": 0.9979112545804935, + "grad_norm": 3944.366455078125, + "learning_rate": 1.7543398022832337e-08, + "loss": 80.078, + "step": 247000 + }, + { + "epoch": 0.9979516558458611, + "grad_norm": 400.3491516113281, + "learning_rate": 1.7426653921912562e-08, + "loss": 55.9031, + "step": 247010 + }, + { + "epoch": 0.9979920571112287, + "grad_norm": 389.7321472167969, + "learning_rate": 1.731029939154638e-08, + "loss": 74.2283, + "step": 247020 + }, + { + "epoch": 0.9980324583765964, + "grad_norm": 892.865966796875, + "learning_rate": 1.719433443400531e-08, + "loss": 75.2334, + "step": 247030 + }, + { + "epoch": 0.998072859641964, + "grad_norm": 314.8853454589844, + "learning_rate": 1.7078759051547543e-08, + "loss": 91.8047, + "step": 247040 + }, + { + "epoch": 0.9981132609073317, + "grad_norm": 1051.8057861328125, + "learning_rate": 1.6963573246429054e-08, + "loss": 82.6677, + "step": 247050 + }, + { + "epoch": 0.9981536621726993, + "grad_norm": 304.2667541503906, + "learning_rate": 1.6848777020890274e-08, + "loss": 51.886, + "step": 247060 + }, + { + "epoch": 0.9981940634380668, + "grad_norm": 764.0817260742188, + "learning_rate": 1.6734370377176067e-08, + "loss": 76.392, + "step": 247070 + }, + { + "epoch": 0.9982344647034345, + "grad_norm": 840.25634765625, + "learning_rate": 1.662035331751133e-08, + "loss": 91.3798, + "step": 247080 + }, + { + "epoch": 0.9982748659688021, + "grad_norm": 650.6286010742188, + "learning_rate": 1.6506725844120942e-08, + "loss": 55.671, + "step": 247090 + }, + { + "epoch": 0.9983152672341697, + "grad_norm": 735.1884765625, + "learning_rate": 1.639348795922091e-08, + "loss": 93.6046, + "step": 247100 + }, + { + "epoch": 0.9983556684995374, + "grad_norm": 980.9933471679688, + "learning_rate": 1.628063966501836e-08, + "loss": 71.8791, + "step": 247110 + }, + { + "epoch": 0.998396069764905, + "grad_norm": 606.859619140625, + "learning_rate": 1.616818096371153e-08, + "loss": 82.7781, + "step": 247120 + }, + { + "epoch": 0.9984364710302727, + "grad_norm": 538.3990478515625, + "learning_rate": 1.6056111857494228e-08, + "loss": 50.7735, + "step": 247130 + }, + { + "epoch": 0.9984768722956403, + "grad_norm": 875.1250610351562, + "learning_rate": 1.5944432348553584e-08, + "loss": 61.0174, + "step": 247140 + }, + { + "epoch": 0.998517273561008, + "grad_norm": 1419.672607421875, + "learning_rate": 1.5833142439063422e-08, + "loss": 64.5997, + "step": 247150 + }, + { + "epoch": 0.9985576748263756, + "grad_norm": 1172.145751953125, + "learning_rate": 1.572224213119533e-08, + "loss": 79.6937, + "step": 247160 + }, + { + "epoch": 0.9985980760917432, + "grad_norm": 428.78973388671875, + "learning_rate": 1.561173142710981e-08, + "loss": 44.6349, + "step": 247170 + }, + { + "epoch": 0.9986384773571109, + "grad_norm": 900.9158325195312, + "learning_rate": 1.5501610328962914e-08, + "loss": 89.4821, + "step": 247180 + }, + { + "epoch": 0.9986788786224785, + "grad_norm": 807.5845336914062, + "learning_rate": 1.539187883890181e-08, + "loss": 59.0328, + "step": 247190 + }, + { + "epoch": 0.998719279887846, + "grad_norm": 645.8209228515625, + "learning_rate": 1.5282536959062567e-08, + "loss": 77.7649, + "step": 247200 + }, + { + "epoch": 0.9987596811532137, + "grad_norm": 1021.3309936523438, + "learning_rate": 1.5173584691581256e-08, + "loss": 58.1548, + "step": 247210 + }, + { + "epoch": 0.9988000824185813, + "grad_norm": 356.52020263671875, + "learning_rate": 1.5065022038578403e-08, + "loss": 47.4731, + "step": 247220 + }, + { + "epoch": 0.998840483683949, + "grad_norm": 1126.925048828125, + "learning_rate": 1.4956849002172313e-08, + "loss": 94.2163, + "step": 247230 + }, + { + "epoch": 0.9988808849493166, + "grad_norm": 921.4385986328125, + "learning_rate": 1.484906558447019e-08, + "loss": 83.6366, + "step": 247240 + }, + { + "epoch": 0.9989212862146842, + "grad_norm": 184.76283264160156, + "learning_rate": 1.4741671787577016e-08, + "loss": 64.0525, + "step": 247250 + }, + { + "epoch": 0.9989616874800519, + "grad_norm": 380.9809265136719, + "learning_rate": 1.4634667613582231e-08, + "loss": 40.9251, + "step": 247260 + }, + { + "epoch": 0.9990020887454195, + "grad_norm": 336.2428894042969, + "learning_rate": 1.4528053064573055e-08, + "loss": 66.1481, + "step": 247270 + }, + { + "epoch": 0.9990424900107872, + "grad_norm": 539.5074462890625, + "learning_rate": 1.4421828142630045e-08, + "loss": 55.3878, + "step": 247280 + }, + { + "epoch": 0.9990828912761548, + "grad_norm": 2589.85009765625, + "learning_rate": 1.4315992849822658e-08, + "loss": 89.3596, + "step": 247290 + }, + { + "epoch": 0.9991232925415224, + "grad_norm": 513.9119262695312, + "learning_rate": 1.4210547188215906e-08, + "loss": 54.1202, + "step": 247300 + }, + { + "epoch": 0.9991636938068901, + "grad_norm": 915.6912231445312, + "learning_rate": 1.4105491159861483e-08, + "loss": 106.7107, + "step": 247310 + }, + { + "epoch": 0.9992040950722577, + "grad_norm": 764.6376342773438, + "learning_rate": 1.4000824766811083e-08, + "loss": 53.7929, + "step": 247320 + }, + { + "epoch": 0.9992444963376252, + "grad_norm": 732.2562255859375, + "learning_rate": 1.3896548011103072e-08, + "loss": 59.3251, + "step": 247330 + }, + { + "epoch": 0.9992848976029929, + "grad_norm": 510.8457336425781, + "learning_rate": 1.3792660894773602e-08, + "loss": 61.2146, + "step": 247340 + }, + { + "epoch": 0.9993252988683605, + "grad_norm": 871.4805908203125, + "learning_rate": 1.3689163419843277e-08, + "loss": 68.9993, + "step": 247350 + }, + { + "epoch": 0.9993657001337282, + "grad_norm": 662.73046875, + "learning_rate": 1.3586055588332703e-08, + "loss": 95.0368, + "step": 247360 + }, + { + "epoch": 0.9994061013990958, + "grad_norm": 550.651123046875, + "learning_rate": 1.3483337402253605e-08, + "loss": 53.8348, + "step": 247370 + }, + { + "epoch": 0.9994465026644634, + "grad_norm": 421.8531188964844, + "learning_rate": 1.3381008863604383e-08, + "loss": 54.8719, + "step": 247380 + }, + { + "epoch": 0.9994869039298311, + "grad_norm": 1114.98779296875, + "learning_rate": 1.327906997438344e-08, + "loss": 70.2479, + "step": 247390 + }, + { + "epoch": 0.9995273051951987, + "grad_norm": 618.6839599609375, + "learning_rate": 1.3177520736575855e-08, + "loss": 50.813, + "step": 247400 + }, + { + "epoch": 0.9995677064605664, + "grad_norm": 1047.6392822265625, + "learning_rate": 1.3076361152164484e-08, + "loss": 70.5558, + "step": 247410 + }, + { + "epoch": 0.999608107725934, + "grad_norm": 689.8629760742188, + "learning_rate": 1.2975591223116646e-08, + "loss": 81.1684, + "step": 247420 + }, + { + "epoch": 0.9996485089913016, + "grad_norm": 322.34161376953125, + "learning_rate": 1.2875210951401872e-08, + "loss": 53.7038, + "step": 247430 + }, + { + "epoch": 0.9996889102566693, + "grad_norm": 498.062744140625, + "learning_rate": 1.2775220338974159e-08, + "loss": 36.1415, + "step": 247440 + }, + { + "epoch": 0.9997293115220369, + "grad_norm": 1072.8707275390625, + "learning_rate": 1.2675619387783055e-08, + "loss": 61.9057, + "step": 247450 + }, + { + "epoch": 0.9997697127874045, + "grad_norm": 466.61956787109375, + "learning_rate": 1.2576408099771454e-08, + "loss": 77.1693, + "step": 247460 + }, + { + "epoch": 0.9998101140527721, + "grad_norm": 623.4584350585938, + "learning_rate": 1.247758647687336e-08, + "loss": 40.9465, + "step": 247470 + }, + { + "epoch": 0.9998505153181397, + "grad_norm": 604.044677734375, + "learning_rate": 1.2379154521016124e-08, + "loss": 68.9561, + "step": 247480 + }, + { + "epoch": 0.9998909165835074, + "grad_norm": 991.634765625, + "learning_rate": 1.2281112234115988e-08, + "loss": 80.5863, + "step": 247490 + }, + { + "epoch": 0.999931317848875, + "grad_norm": 1190.2669677734375, + "learning_rate": 1.2183459618084759e-08, + "loss": 69.7541, + "step": 247500 + }, + { + "epoch": 0.9999717191142427, + "grad_norm": 658.0904541015625, + "learning_rate": 1.2086196674829797e-08, + "loss": 71.9346, + "step": 247510 + }, + { + "epoch": 1.0, + "eval_loss": 61.85871505737305, + "eval_runtime": 464.9355, + "eval_samples_per_second": 21.511, + "eval_steps_per_second": 5.379, + "step": 247517 + }, + { + "epoch": 1.0000121203796102, + "grad_norm": 590.2011108398438, + "learning_rate": 1.1989323406242925e-08, + "loss": 82.8564, + "step": 247520 + }, + { + "epoch": 1.0000525216449778, + "grad_norm": 531.0361328125, + "learning_rate": 1.1892839814215962e-08, + "loss": 65.404, + "step": 247530 + }, + { + "epoch": 1.0000929229103455, + "grad_norm": 884.9467163085938, + "learning_rate": 1.1796745900629625e-08, + "loss": 74.4416, + "step": 247540 + }, + { + "epoch": 1.000133324175713, + "grad_norm": 416.27923583984375, + "learning_rate": 1.170104166735353e-08, + "loss": 56.9064, + "step": 247550 + }, + { + "epoch": 1.0001737254410807, + "grad_norm": 465.0064697265625, + "learning_rate": 1.1605727116257292e-08, + "loss": 83.9302, + "step": 247560 + }, + { + "epoch": 1.0002141267064484, + "grad_norm": 1883.000244140625, + "learning_rate": 1.1510802249199427e-08, + "loss": 68.139, + "step": 247570 + }, + { + "epoch": 1.000254527971816, + "grad_norm": 861.8699951171875, + "learning_rate": 1.1416267068029562e-08, + "loss": 38.9482, + "step": 247580 + }, + { + "epoch": 1.0002949292371837, + "grad_norm": 785.831787109375, + "learning_rate": 1.1322121574588452e-08, + "loss": 71.108, + "step": 247590 + }, + { + "epoch": 1.0003353305025513, + "grad_norm": 845.5962524414062, + "learning_rate": 1.1228365770714622e-08, + "loss": 90.9453, + "step": 247600 + }, + { + "epoch": 1.000375731767919, + "grad_norm": 400.4775390625, + "learning_rate": 1.113499965823328e-08, + "loss": 94.7443, + "step": 247610 + }, + { + "epoch": 1.0004161330332866, + "grad_norm": 387.5064392089844, + "learning_rate": 1.1042023238967415e-08, + "loss": 49.6058, + "step": 247620 + }, + { + "epoch": 1.0004565342986542, + "grad_norm": 506.70709228515625, + "learning_rate": 1.0949436514728907e-08, + "loss": 46.463, + "step": 247630 + }, + { + "epoch": 1.0004969355640219, + "grad_norm": 811.4459228515625, + "learning_rate": 1.0857239487320758e-08, + "loss": 101.0327, + "step": 247640 + }, + { + "epoch": 1.0005373368293895, + "grad_norm": 568.8379516601562, + "learning_rate": 1.0765432158543754e-08, + "loss": 58.7048, + "step": 247650 + }, + { + "epoch": 1.0005777380947571, + "grad_norm": 1062.4122314453125, + "learning_rate": 1.067401453018313e-08, + "loss": 87.9873, + "step": 247660 + }, + { + "epoch": 1.0006181393601248, + "grad_norm": 619.7223510742188, + "learning_rate": 1.0582986604026347e-08, + "loss": 69.6916, + "step": 247670 + }, + { + "epoch": 1.0006585406254924, + "grad_norm": 944.2957763671875, + "learning_rate": 1.0492348381843098e-08, + "loss": 82.0855, + "step": 247680 + }, + { + "epoch": 1.00069894189086, + "grad_norm": 305.23724365234375, + "learning_rate": 1.0402099865405302e-08, + "loss": 63.2735, + "step": 247690 + }, + { + "epoch": 1.0007393431562277, + "grad_norm": 768.7704467773438, + "learning_rate": 1.031224105646711e-08, + "loss": 76.5508, + "step": 247700 + }, + { + "epoch": 1.0007797444215953, + "grad_norm": 991.5065307617188, + "learning_rate": 1.0222771956784894e-08, + "loss": 76.6663, + "step": 247710 + }, + { + "epoch": 1.000820145686963, + "grad_norm": 651.7576293945312, + "learning_rate": 1.0133692568101705e-08, + "loss": 53.9201, + "step": 247720 + }, + { + "epoch": 1.0008605469523306, + "grad_norm": 2352.241943359375, + "learning_rate": 1.0045002892151711e-08, + "loss": 73.9465, + "step": 247730 + }, + { + "epoch": 1.0009009482176983, + "grad_norm": 276.79168701171875, + "learning_rate": 9.956702930666862e-09, + "loss": 99.8684, + "step": 247740 + }, + { + "epoch": 1.000941349483066, + "grad_norm": 855.5513305664062, + "learning_rate": 9.868792685368001e-09, + "loss": 125.7065, + "step": 247750 + }, + { + "epoch": 1.0009817507484335, + "grad_norm": 725.9985961914062, + "learning_rate": 9.781272157969313e-09, + "loss": 47.856, + "step": 247760 + }, + { + "epoch": 1.0010221520138012, + "grad_norm": 919.3187866210938, + "learning_rate": 9.6941413501761e-09, + "loss": 63.6191, + "step": 247770 + }, + { + "epoch": 1.0010625532791686, + "grad_norm": 908.6085815429688, + "learning_rate": 9.607400263687006e-09, + "loss": 77.2247, + "step": 247780 + }, + { + "epoch": 1.0011029545445362, + "grad_norm": 498.5030212402344, + "learning_rate": 9.521048900191787e-09, + "loss": 45.0316, + "step": 247790 + }, + { + "epoch": 1.0011433558099039, + "grad_norm": 362.3508605957031, + "learning_rate": 9.435087261377984e-09, + "loss": 52.2125, + "step": 247800 + }, + { + "epoch": 1.0011837570752715, + "grad_norm": 562.1640014648438, + "learning_rate": 9.349515348917592e-09, + "loss": 58.7031, + "step": 247810 + }, + { + "epoch": 1.0012241583406392, + "grad_norm": 587.8255004882812, + "learning_rate": 9.264333164480388e-09, + "loss": 44.7145, + "step": 247820 + }, + { + "epoch": 1.0012645596060068, + "grad_norm": 720.3592529296875, + "learning_rate": 9.179540709727264e-09, + "loss": 60.0216, + "step": 247830 + }, + { + "epoch": 1.0013049608713744, + "grad_norm": 1516.165283203125, + "learning_rate": 9.095137986310231e-09, + "loss": 68.2838, + "step": 247840 + }, + { + "epoch": 1.001345362136742, + "grad_norm": 818.8074340820312, + "learning_rate": 9.011124995876863e-09, + "loss": 75.5746, + "step": 247850 + }, + { + "epoch": 1.0013857634021097, + "grad_norm": 547.4713134765625, + "learning_rate": 8.927501740063627e-09, + "loss": 59.9123, + "step": 247860 + }, + { + "epoch": 1.0014261646674774, + "grad_norm": 439.8132019042969, + "learning_rate": 8.844268220498109e-09, + "loss": 50.0097, + "step": 247870 + }, + { + "epoch": 1.001466565932845, + "grad_norm": 846.3936767578125, + "learning_rate": 8.761424438807898e-09, + "loss": 38.718, + "step": 247880 + }, + { + "epoch": 1.0015069671982126, + "grad_norm": 959.5693969726562, + "learning_rate": 8.678970396607256e-09, + "loss": 103.6881, + "step": 247890 + }, + { + "epoch": 1.0015473684635803, + "grad_norm": 489.3706970214844, + "learning_rate": 8.596906095499347e-09, + "loss": 60.4012, + "step": 247900 + }, + { + "epoch": 1.001587769728948, + "grad_norm": 931.6546020507812, + "learning_rate": 8.515231537089553e-09, + "loss": 78.1174, + "step": 247910 + }, + { + "epoch": 1.0016281709943156, + "grad_norm": 1046.0059814453125, + "learning_rate": 8.433946722965491e-09, + "loss": 81.1064, + "step": 247920 + }, + { + "epoch": 1.0016685722596832, + "grad_norm": 454.8185119628906, + "learning_rate": 8.353051654717004e-09, + "loss": 37.5365, + "step": 247930 + }, + { + "epoch": 1.0017089735250508, + "grad_norm": 912.1582641601562, + "learning_rate": 8.272546333916165e-09, + "loss": 60.6166, + "step": 247940 + }, + { + "epoch": 1.0017493747904185, + "grad_norm": 457.503173828125, + "learning_rate": 8.19243076213283e-09, + "loss": 65.6211, + "step": 247950 + }, + { + "epoch": 1.0017897760557861, + "grad_norm": 790.7493286132812, + "learning_rate": 8.112704940932414e-09, + "loss": 58.887, + "step": 247960 + }, + { + "epoch": 1.0018301773211538, + "grad_norm": 257.21759033203125, + "learning_rate": 8.033368871867008e-09, + "loss": 51.1348, + "step": 247970 + }, + { + "epoch": 1.0018705785865214, + "grad_norm": 400.2229919433594, + "learning_rate": 7.954422556484265e-09, + "loss": 69.9081, + "step": 247980 + }, + { + "epoch": 1.001910979851889, + "grad_norm": 354.52508544921875, + "learning_rate": 7.875865996322951e-09, + "loss": 45.9348, + "step": 247990 + }, + { + "epoch": 1.0019513811172567, + "grad_norm": 664.3076782226562, + "learning_rate": 7.797699192912955e-09, + "loss": 98.4398, + "step": 248000 + }, + { + "epoch": 1.0019917823826243, + "grad_norm": 375.5564270019531, + "learning_rate": 7.719922147779723e-09, + "loss": 62.2112, + "step": 248010 + }, + { + "epoch": 1.002032183647992, + "grad_norm": 783.7318725585938, + "learning_rate": 7.642534862439821e-09, + "loss": 60.1914, + "step": 248020 + }, + { + "epoch": 1.0020725849133594, + "grad_norm": 666.0960693359375, + "learning_rate": 7.56553733840093e-09, + "loss": 72.5856, + "step": 248030 + }, + { + "epoch": 1.002112986178727, + "grad_norm": 825.030517578125, + "learning_rate": 7.488929577164072e-09, + "loss": 60.5071, + "step": 248040 + }, + { + "epoch": 1.0021533874440947, + "grad_norm": 892.0487060546875, + "learning_rate": 7.412711580225829e-09, + "loss": 78.1599, + "step": 248050 + }, + { + "epoch": 1.0021937887094623, + "grad_norm": 610.88330078125, + "learning_rate": 7.336883349067236e-09, + "loss": 81.9374, + "step": 248060 + }, + { + "epoch": 1.00223418997483, + "grad_norm": 289.4344787597656, + "learning_rate": 7.261444885169333e-09, + "loss": 56.0395, + "step": 248070 + }, + { + "epoch": 1.0022745912401976, + "grad_norm": 751.8075561523438, + "learning_rate": 7.1863961900042746e-09, + "loss": 76.6276, + "step": 248080 + }, + { + "epoch": 1.0023149925055652, + "grad_norm": 472.09954833984375, + "learning_rate": 7.111737265030893e-09, + "loss": 47.1432, + "step": 248090 + }, + { + "epoch": 1.0023553937709329, + "grad_norm": 536.2998657226562, + "learning_rate": 7.037468111710244e-09, + "loss": 63.2109, + "step": 248100 + }, + { + "epoch": 1.0023957950363005, + "grad_norm": 521.3856201171875, + "learning_rate": 6.963588731485616e-09, + "loss": 47.3244, + "step": 248110 + }, + { + "epoch": 1.0024361963016681, + "grad_norm": 557.798583984375, + "learning_rate": 6.890099125798078e-09, + "loss": 50.9857, + "step": 248120 + }, + { + "epoch": 1.0024765975670358, + "grad_norm": 437.3095703125, + "learning_rate": 6.816999296082039e-09, + "loss": 64.5692, + "step": 248130 + }, + { + "epoch": 1.0025169988324034, + "grad_norm": 338.9017639160156, + "learning_rate": 6.744289243760804e-09, + "loss": 31.7949, + "step": 248140 + }, + { + "epoch": 1.002557400097771, + "grad_norm": 621.0401611328125, + "learning_rate": 6.6719689702554605e-09, + "loss": 75.7591, + "step": 248150 + }, + { + "epoch": 1.0025978013631387, + "grad_norm": 769.9017944335938, + "learning_rate": 6.600038476971548e-09, + "loss": 56.7265, + "step": 248160 + }, + { + "epoch": 1.0026382026285063, + "grad_norm": 756.1639404296875, + "learning_rate": 6.528497765312392e-09, + "loss": 47.5924, + "step": 248170 + }, + { + "epoch": 1.002678603893874, + "grad_norm": 583.2656860351562, + "learning_rate": 6.45734683667465e-09, + "loss": 77.5252, + "step": 248180 + }, + { + "epoch": 1.0027190051592416, + "grad_norm": 800.96337890625, + "learning_rate": 6.38658569244388e-09, + "loss": 75.3352, + "step": 248190 + }, + { + "epoch": 1.0027594064246093, + "grad_norm": 373.50006103515625, + "learning_rate": 6.316214334001203e-09, + "loss": 42.8643, + "step": 248200 + }, + { + "epoch": 1.002799807689977, + "grad_norm": 362.3075256347656, + "learning_rate": 6.2462327627166304e-09, + "loss": 45.8071, + "step": 248210 + }, + { + "epoch": 1.0028402089553445, + "grad_norm": 957.6506958007812, + "learning_rate": 6.176640979953519e-09, + "loss": 70.7984, + "step": 248220 + }, + { + "epoch": 1.0028806102207122, + "grad_norm": 591.532470703125, + "learning_rate": 6.107438987073e-09, + "loss": 69.2733, + "step": 248230 + }, + { + "epoch": 1.0029210114860798, + "grad_norm": 2639.35009765625, + "learning_rate": 6.038626785418444e-09, + "loss": 87.1864, + "step": 248240 + }, + { + "epoch": 1.0029614127514475, + "grad_norm": 594.7188110351562, + "learning_rate": 5.970204376337663e-09, + "loss": 69.4996, + "step": 248250 + }, + { + "epoch": 1.003001814016815, + "grad_norm": 331.9031066894531, + "learning_rate": 5.902171761160702e-09, + "loss": 63.0195, + "step": 248260 + }, + { + "epoch": 1.0030422152821827, + "grad_norm": 732.08984375, + "learning_rate": 5.834528941213169e-09, + "loss": 62.3124, + "step": 248270 + }, + { + "epoch": 1.0030826165475504, + "grad_norm": 316.0459289550781, + "learning_rate": 5.767275917816229e-09, + "loss": 47.8008, + "step": 248280 + }, + { + "epoch": 1.0031230178129178, + "grad_norm": 180.98361206054688, + "learning_rate": 5.700412692279944e-09, + "loss": 82.8906, + "step": 248290 + }, + { + "epoch": 1.0031634190782854, + "grad_norm": 383.6042175292969, + "learning_rate": 5.633939265905497e-09, + "loss": 42.444, + "step": 248300 + }, + { + "epoch": 1.003203820343653, + "grad_norm": 698.3505859375, + "learning_rate": 5.5678556399940686e-09, + "loss": 71.1799, + "step": 248310 + }, + { + "epoch": 1.0032442216090207, + "grad_norm": 737.6282348632812, + "learning_rate": 5.502161815829077e-09, + "loss": 92.5681, + "step": 248320 + }, + { + "epoch": 1.0032846228743884, + "grad_norm": 695.4033813476562, + "learning_rate": 5.4368577946961596e-09, + "loss": 52.5139, + "step": 248330 + }, + { + "epoch": 1.003325024139756, + "grad_norm": 460.7320556640625, + "learning_rate": 5.371943577863192e-09, + "loss": 43.3595, + "step": 248340 + }, + { + "epoch": 1.0033654254051236, + "grad_norm": 563.068115234375, + "learning_rate": 5.3074191665980494e-09, + "loss": 61.4819, + "step": 248350 + }, + { + "epoch": 1.0034058266704913, + "grad_norm": 342.0809631347656, + "learning_rate": 5.243284562159723e-09, + "loss": 43.0043, + "step": 248360 + }, + { + "epoch": 1.003446227935859, + "grad_norm": 455.55938720703125, + "learning_rate": 5.179539765798325e-09, + "loss": 58.0983, + "step": 248370 + }, + { + "epoch": 1.0034866292012266, + "grad_norm": 1556.4609375, + "learning_rate": 5.116184778755085e-09, + "loss": 78.0046, + "step": 248380 + }, + { + "epoch": 1.0035270304665942, + "grad_norm": 631.3849487304688, + "learning_rate": 5.053219602264569e-09, + "loss": 53.1356, + "step": 248390 + }, + { + "epoch": 1.0035674317319618, + "grad_norm": 688.4679565429688, + "learning_rate": 4.990644237556908e-09, + "loss": 66.3212, + "step": 248400 + }, + { + "epoch": 1.0036078329973295, + "grad_norm": 384.0335388183594, + "learning_rate": 4.928458685851123e-09, + "loss": 52.2714, + "step": 248410 + }, + { + "epoch": 1.0036482342626971, + "grad_norm": 1073.441650390625, + "learning_rate": 4.866662948359579e-09, + "loss": 55.6296, + "step": 248420 + }, + { + "epoch": 1.0036886355280648, + "grad_norm": 949.4934692382812, + "learning_rate": 4.805257026283538e-09, + "loss": 71.919, + "step": 248430 + }, + { + "epoch": 1.0037290367934324, + "grad_norm": 911.975830078125, + "learning_rate": 4.744240920826481e-09, + "loss": 71.4097, + "step": 248440 + }, + { + "epoch": 1.0037694380588, + "grad_norm": 1051.722900390625, + "learning_rate": 4.683614633174128e-09, + "loss": 46.7513, + "step": 248450 + }, + { + "epoch": 1.0038098393241677, + "grad_norm": 786.8934326171875, + "learning_rate": 4.623378164507752e-09, + "loss": 65.1381, + "step": 248460 + }, + { + "epoch": 1.0038502405895353, + "grad_norm": 1190.6405029296875, + "learning_rate": 4.563531516004194e-09, + "loss": 49.0076, + "step": 248470 + }, + { + "epoch": 1.003890641854903, + "grad_norm": 609.81103515625, + "learning_rate": 4.5040746888269646e-09, + "loss": 94.319, + "step": 248480 + }, + { + "epoch": 1.0039310431202706, + "grad_norm": 1157.087158203125, + "learning_rate": 4.44500768413958e-09, + "loss": 103.0867, + "step": 248490 + }, + { + "epoch": 1.0039714443856382, + "grad_norm": 843.581298828125, + "learning_rate": 4.386330503090008e-09, + "loss": 72.9247, + "step": 248500 + }, + { + "epoch": 1.0040118456510059, + "grad_norm": 507.43182373046875, + "learning_rate": 4.328043146824002e-09, + "loss": 54.9502, + "step": 248510 + }, + { + "epoch": 1.0040522469163735, + "grad_norm": 915.3667602539062, + "learning_rate": 4.2701456164762066e-09, + "loss": 79.8888, + "step": 248520 + }, + { + "epoch": 1.0040926481817412, + "grad_norm": 612.6775512695312, + "learning_rate": 4.2126379131768314e-09, + "loss": 51.1185, + "step": 248530 + }, + { + "epoch": 1.0041330494471086, + "grad_norm": 921.3275146484375, + "learning_rate": 4.155520038047201e-09, + "loss": 61.4335, + "step": 248540 + }, + { + "epoch": 1.0041734507124762, + "grad_norm": 454.0970458984375, + "learning_rate": 4.098791992199758e-09, + "loss": 66.9522, + "step": 248550 + }, + { + "epoch": 1.0042138519778439, + "grad_norm": 594.523681640625, + "learning_rate": 4.042453776740285e-09, + "loss": 40.608, + "step": 248560 + }, + { + "epoch": 1.0042542532432115, + "grad_norm": 807.0231323242188, + "learning_rate": 3.986505392770124e-09, + "loss": 93.9332, + "step": 248570 + }, + { + "epoch": 1.0042946545085791, + "grad_norm": 307.3790283203125, + "learning_rate": 3.9309468413750715e-09, + "loss": 74.7959, + "step": 248580 + }, + { + "epoch": 1.0043350557739468, + "grad_norm": 427.72808837890625, + "learning_rate": 3.875778123643148e-09, + "loss": 65.4532, + "step": 248590 + }, + { + "epoch": 1.0043754570393144, + "grad_norm": 1061.6275634765625, + "learning_rate": 3.820999240644608e-09, + "loss": 71.2552, + "step": 248600 + }, + { + "epoch": 1.004415858304682, + "grad_norm": 352.9907531738281, + "learning_rate": 3.766610193454146e-09, + "loss": 54.195, + "step": 248610 + }, + { + "epoch": 1.0044562595700497, + "grad_norm": 668.2301635742188, + "learning_rate": 3.7126109831264746e-09, + "loss": 60.256, + "step": 248620 + }, + { + "epoch": 1.0044966608354173, + "grad_norm": 228.10552978515625, + "learning_rate": 3.6590016107163064e-09, + "loss": 45.8084, + "step": 248630 + }, + { + "epoch": 1.004537062100785, + "grad_norm": 237.22738647460938, + "learning_rate": 3.60578207726725e-09, + "loss": 40.9312, + "step": 248640 + }, + { + "epoch": 1.0045774633661526, + "grad_norm": 550.7319946289062, + "learning_rate": 3.552952383820696e-09, + "loss": 66.7888, + "step": 248650 + }, + { + "epoch": 1.0046178646315203, + "grad_norm": 951.9227294921875, + "learning_rate": 3.500512531402489e-09, + "loss": 81.9458, + "step": 248660 + }, + { + "epoch": 1.004658265896888, + "grad_norm": 483.5680236816406, + "learning_rate": 3.4484625210362554e-09, + "loss": 55.9166, + "step": 248670 + }, + { + "epoch": 1.0046986671622555, + "grad_norm": 1149.939697265625, + "learning_rate": 3.3968023537389594e-09, + "loss": 68.5474, + "step": 248680 + }, + { + "epoch": 1.0047390684276232, + "grad_norm": 615.0205078125, + "learning_rate": 3.3455320305142423e-09, + "loss": 76.0233, + "step": 248690 + }, + { + "epoch": 1.0047794696929908, + "grad_norm": 355.2969055175781, + "learning_rate": 3.294651552365746e-09, + "loss": 64.7722, + "step": 248700 + }, + { + "epoch": 1.0048198709583585, + "grad_norm": 1062.1531982421875, + "learning_rate": 3.2441609202815694e-09, + "loss": 80.2818, + "step": 248710 + }, + { + "epoch": 1.004860272223726, + "grad_norm": 651.049560546875, + "learning_rate": 3.1940601352475895e-09, + "loss": 52.2253, + "step": 248720 + }, + { + "epoch": 1.0049006734890937, + "grad_norm": 588.716064453125, + "learning_rate": 3.1443491982408035e-09, + "loss": 68.1338, + "step": 248730 + }, + { + "epoch": 1.0049410747544614, + "grad_norm": 757.9308471679688, + "learning_rate": 3.0950281102293258e-09, + "loss": 59.9994, + "step": 248740 + }, + { + "epoch": 1.004981476019829, + "grad_norm": 682.371337890625, + "learning_rate": 3.0460968721746086e-09, + "loss": 41.9455, + "step": 248750 + }, + { + "epoch": 1.0050218772851967, + "grad_norm": 850.54833984375, + "learning_rate": 2.997555485033665e-09, + "loss": 48.9406, + "step": 248760 + }, + { + "epoch": 1.0050622785505643, + "grad_norm": 585.2266845703125, + "learning_rate": 2.9494039497479645e-09, + "loss": 70.8767, + "step": 248770 + }, + { + "epoch": 1.005102679815932, + "grad_norm": 923.8726806640625, + "learning_rate": 2.9016422672611954e-09, + "loss": 71.7036, + "step": 248780 + }, + { + "epoch": 1.0051430810812996, + "grad_norm": 811.24658203125, + "learning_rate": 2.854270438501505e-09, + "loss": 67.8668, + "step": 248790 + }, + { + "epoch": 1.005183482346667, + "grad_norm": 653.4196166992188, + "learning_rate": 2.807288464392599e-09, + "loss": 57.9755, + "step": 248800 + }, + { + "epoch": 1.0052238836120346, + "grad_norm": 1551.59375, + "learning_rate": 2.7606963458493006e-09, + "loss": 83.9572, + "step": 248810 + }, + { + "epoch": 1.0052642848774023, + "grad_norm": 435.47625732421875, + "learning_rate": 2.7144940837842138e-09, + "loss": 54.7893, + "step": 248820 + }, + { + "epoch": 1.00530468614277, + "grad_norm": 726.771484375, + "learning_rate": 2.668681679094398e-09, + "loss": 63.5434, + "step": 248830 + }, + { + "epoch": 1.0053450874081375, + "grad_norm": 648.3975219726562, + "learning_rate": 2.6232591326724733e-09, + "loss": 60.4749, + "step": 248840 + }, + { + "epoch": 1.0053854886735052, + "grad_norm": 644.8402709960938, + "learning_rate": 2.5782264454066175e-09, + "loss": 73.8123, + "step": 248850 + }, + { + "epoch": 1.0054258899388728, + "grad_norm": 513.4389038085938, + "learning_rate": 2.5335836181739072e-09, + "loss": 50.9707, + "step": 248860 + }, + { + "epoch": 1.0054662912042405, + "grad_norm": 479.2508239746094, + "learning_rate": 2.489330651844757e-09, + "loss": 45.7774, + "step": 248870 + }, + { + "epoch": 1.005506692469608, + "grad_norm": 489.208984375, + "learning_rate": 2.4454675472807e-09, + "loss": 52.9505, + "step": 248880 + }, + { + "epoch": 1.0055470937349757, + "grad_norm": 634.3941650390625, + "learning_rate": 2.401994305336608e-09, + "loss": 67.2182, + "step": 248890 + }, + { + "epoch": 1.0055874950003434, + "grad_norm": 834.0480346679688, + "learning_rate": 2.3589109268629116e-09, + "loss": 61.1101, + "step": 248900 + }, + { + "epoch": 1.005627896265711, + "grad_norm": 1048.023193359375, + "learning_rate": 2.3162174126967196e-09, + "loss": 73.4154, + "step": 248910 + }, + { + "epoch": 1.0056682975310787, + "grad_norm": 400.6410827636719, + "learning_rate": 2.273913763672919e-09, + "loss": 68.9832, + "step": 248920 + }, + { + "epoch": 1.0057086987964463, + "grad_norm": 622.4219970703125, + "learning_rate": 2.2319999806130753e-09, + "loss": 46.3725, + "step": 248930 + }, + { + "epoch": 1.005749100061814, + "grad_norm": 1237.3565673828125, + "learning_rate": 2.190476064336533e-09, + "loss": 79.3537, + "step": 248940 + }, + { + "epoch": 1.0057895013271816, + "grad_norm": 1718.1204833984375, + "learning_rate": 2.1493420156515345e-09, + "loss": 98.4554, + "step": 248950 + }, + { + "epoch": 1.0058299025925492, + "grad_norm": 549.4330444335938, + "learning_rate": 2.1085978353618807e-09, + "loss": 53.079, + "step": 248960 + }, + { + "epoch": 1.0058703038579169, + "grad_norm": 1439.4869384765625, + "learning_rate": 2.0682435242602716e-09, + "loss": 59.7202, + "step": 248970 + }, + { + "epoch": 1.0059107051232845, + "grad_norm": 554.4739990234375, + "learning_rate": 2.0282790831349653e-09, + "loss": 81.9638, + "step": 248980 + }, + { + "epoch": 1.0059511063886521, + "grad_norm": 586.0916748046875, + "learning_rate": 1.988704512763118e-09, + "loss": 53.7479, + "step": 248990 + }, + { + "epoch": 1.0059915076540198, + "grad_norm": 230.25294494628906, + "learning_rate": 1.949519813915224e-09, + "loss": 53.467, + "step": 249000 + }, + { + "epoch": 1.0060319089193874, + "grad_norm": 651.7587890625, + "learning_rate": 1.910724987359558e-09, + "loss": 75.1255, + "step": 249010 + }, + { + "epoch": 1.006072310184755, + "grad_norm": 253.41368103027344, + "learning_rate": 1.872320033848851e-09, + "loss": 70.5722, + "step": 249020 + }, + { + "epoch": 1.0061127114501227, + "grad_norm": 165.32437133789062, + "learning_rate": 1.8343049541336143e-09, + "loss": 37.0831, + "step": 249030 + }, + { + "epoch": 1.0061531127154903, + "grad_norm": 707.3920288085938, + "learning_rate": 1.796679748953256e-09, + "loss": 56.9702, + "step": 249040 + }, + { + "epoch": 1.006193513980858, + "grad_norm": 692.0291748046875, + "learning_rate": 1.7594444190449645e-09, + "loss": 56.6415, + "step": 249050 + }, + { + "epoch": 1.0062339152462254, + "grad_norm": 469.0581359863281, + "learning_rate": 1.7225989651303842e-09, + "loss": 49.4065, + "step": 249060 + }, + { + "epoch": 1.006274316511593, + "grad_norm": 331.9457092285156, + "learning_rate": 1.68614338792894e-09, + "loss": 54.5548, + "step": 249070 + }, + { + "epoch": 1.0063147177769607, + "grad_norm": 404.8883056640625, + "learning_rate": 1.6500776881511748e-09, + "loss": 87.9609, + "step": 249080 + }, + { + "epoch": 1.0063551190423283, + "grad_norm": 789.7308959960938, + "learning_rate": 1.6144018665031901e-09, + "loss": 39.0294, + "step": 249090 + }, + { + "epoch": 1.006395520307696, + "grad_norm": 590.8009643554688, + "learning_rate": 1.5791159236777654e-09, + "loss": 64.4315, + "step": 249100 + }, + { + "epoch": 1.0064359215730636, + "grad_norm": 393.2922058105469, + "learning_rate": 1.5442198603632386e-09, + "loss": 82.9317, + "step": 249110 + }, + { + "epoch": 1.0064763228384312, + "grad_norm": 449.51104736328125, + "learning_rate": 1.5097136772390663e-09, + "loss": 63.2408, + "step": 249120 + }, + { + "epoch": 1.0065167241037989, + "grad_norm": 662.6757202148438, + "learning_rate": 1.4755973749802642e-09, + "loss": 64.8274, + "step": 249130 + }, + { + "epoch": 1.0065571253691665, + "grad_norm": 575.337646484375, + "learning_rate": 1.441870954250746e-09, + "loss": 76.7478, + "step": 249140 + }, + { + "epoch": 1.0065975266345342, + "grad_norm": 450.4961853027344, + "learning_rate": 1.4085344157055425e-09, + "loss": 42.8611, + "step": 249150 + }, + { + "epoch": 1.0066379278999018, + "grad_norm": 794.6490478515625, + "learning_rate": 1.3755877599996858e-09, + "loss": 64.4882, + "step": 249160 + }, + { + "epoch": 1.0066783291652694, + "grad_norm": 371.93194580078125, + "learning_rate": 1.3430309877726643e-09, + "loss": 82.8797, + "step": 249170 + }, + { + "epoch": 1.006718730430637, + "grad_norm": 459.426025390625, + "learning_rate": 1.3108640996573053e-09, + "loss": 80.5951, + "step": 249180 + }, + { + "epoch": 1.0067591316960047, + "grad_norm": 829.705322265625, + "learning_rate": 1.279087096284215e-09, + "loss": 51.7935, + "step": 249190 + }, + { + "epoch": 1.0067995329613724, + "grad_norm": 981.8072509765625, + "learning_rate": 1.2476999782706779e-09, + "loss": 48.9967, + "step": 249200 + }, + { + "epoch": 1.00683993422674, + "grad_norm": 662.7955322265625, + "learning_rate": 1.2167027462295366e-09, + "loss": 48.9169, + "step": 249210 + }, + { + "epoch": 1.0068803354921076, + "grad_norm": 498.3249816894531, + "learning_rate": 1.186095400764753e-09, + "loss": 65.781, + "step": 249220 + }, + { + "epoch": 1.0069207367574753, + "grad_norm": 1017.97900390625, + "learning_rate": 1.1558779424714063e-09, + "loss": 74.366, + "step": 249230 + }, + { + "epoch": 1.006961138022843, + "grad_norm": 566.4378662109375, + "learning_rate": 1.1260503719423554e-09, + "loss": 59.101, + "step": 249240 + }, + { + "epoch": 1.0070015392882106, + "grad_norm": 690.8829345703125, + "learning_rate": 1.0966126897571373e-09, + "loss": 51.221, + "step": 249250 + }, + { + "epoch": 1.0070419405535782, + "grad_norm": 530.546875, + "learning_rate": 1.0675648964886265e-09, + "loss": 71.1756, + "step": 249260 + }, + { + "epoch": 1.0070823418189458, + "grad_norm": 482.6839599609375, + "learning_rate": 1.0389069927052575e-09, + "loss": 62.3818, + "step": 249270 + }, + { + "epoch": 1.0071227430843135, + "grad_norm": 567.1624755859375, + "learning_rate": 1.0106389789643622e-09, + "loss": 61.6863, + "step": 249280 + }, + { + "epoch": 1.0071631443496811, + "grad_norm": 506.1968688964844, + "learning_rate": 9.827608558166113e-10, + "loss": 54.4761, + "step": 249290 + }, + { + "epoch": 1.0072035456150488, + "grad_norm": 541.2426147460938, + "learning_rate": 9.55272623806014e-10, + "loss": 37.5779, + "step": 249300 + }, + { + "epoch": 1.0072439468804162, + "grad_norm": 716.0906982421875, + "learning_rate": 9.281742834699181e-10, + "loss": 70.1744, + "step": 249310 + }, + { + "epoch": 1.0072843481457838, + "grad_norm": 703.7621459960938, + "learning_rate": 9.014658353323491e-10, + "loss": 60.6156, + "step": 249320 + }, + { + "epoch": 1.0073247494111515, + "grad_norm": 586.1680297851562, + "learning_rate": 8.751472799195527e-10, + "loss": 70.0694, + "step": 249330 + }, + { + "epoch": 1.007365150676519, + "grad_norm": 1170.073486328125, + "learning_rate": 8.492186177422312e-10, + "loss": 66.7577, + "step": 249340 + }, + { + "epoch": 1.0074055519418867, + "grad_norm": 482.56781005859375, + "learning_rate": 8.236798493044262e-10, + "loss": 61.0401, + "step": 249350 + }, + { + "epoch": 1.0074459532072544, + "grad_norm": 611.2447509765625, + "learning_rate": 7.985309751057379e-10, + "loss": 48.5732, + "step": 249360 + }, + { + "epoch": 1.007486354472622, + "grad_norm": 923.9035034179688, + "learning_rate": 7.737719956346646e-10, + "loss": 68.4029, + "step": 249370 + }, + { + "epoch": 1.0075267557379897, + "grad_norm": 511.7546081542969, + "learning_rate": 7.494029113752632e-10, + "loss": 48.3581, + "step": 249380 + }, + { + "epoch": 1.0075671570033573, + "grad_norm": 711.5328369140625, + "learning_rate": 7.254237228049298e-10, + "loss": 86.4123, + "step": 249390 + }, + { + "epoch": 1.007607558268725, + "grad_norm": 560.9522094726562, + "learning_rate": 7.018344303877378e-10, + "loss": 41.3287, + "step": 249400 + }, + { + "epoch": 1.0076479595340926, + "grad_norm": 568.5785522460938, + "learning_rate": 6.786350345833193e-10, + "loss": 48.3969, + "step": 249410 + }, + { + "epoch": 1.0076883607994602, + "grad_norm": 543.312744140625, + "learning_rate": 6.558255358468657e-10, + "loss": 57.2176, + "step": 249420 + }, + { + "epoch": 1.0077287620648279, + "grad_norm": 801.2297973632812, + "learning_rate": 6.334059346202459e-10, + "loss": 77.0439, + "step": 249430 + }, + { + "epoch": 1.0077691633301955, + "grad_norm": 127.95111846923828, + "learning_rate": 6.113762313431082e-10, + "loss": 52.8138, + "step": 249440 + }, + { + "epoch": 1.0078095645955631, + "grad_norm": 485.751953125, + "learning_rate": 5.897364264417782e-10, + "loss": 65.7876, + "step": 249450 + }, + { + "epoch": 1.0078499658609308, + "grad_norm": 1014.9998168945312, + "learning_rate": 5.684865203403611e-10, + "loss": 66.254, + "step": 249460 + }, + { + "epoch": 1.0078903671262984, + "grad_norm": 660.0430297851562, + "learning_rate": 5.476265134540804e-10, + "loss": 58.4983, + "step": 249470 + }, + { + "epoch": 1.007930768391666, + "grad_norm": 381.69024658203125, + "learning_rate": 5.271564061870571e-10, + "loss": 68.6676, + "step": 249480 + }, + { + "epoch": 1.0079711696570337, + "grad_norm": 328.0639343261719, + "learning_rate": 5.070761989411921e-10, + "loss": 81.1313, + "step": 249490 + }, + { + "epoch": 1.0080115709224013, + "grad_norm": 635.9850463867188, + "learning_rate": 4.873858921050634e-10, + "loss": 52.8049, + "step": 249500 + }, + { + "epoch": 1.008051972187769, + "grad_norm": 556.7789306640625, + "learning_rate": 4.680854860628081e-10, + "loss": 54.1077, + "step": 249510 + }, + { + "epoch": 1.0080923734531366, + "grad_norm": 1849.8094482421875, + "learning_rate": 4.491749811919022e-10, + "loss": 68.67, + "step": 249520 + }, + { + "epoch": 1.0081327747185043, + "grad_norm": 424.52734375, + "learning_rate": 4.3065437786316e-10, + "loss": 57.378, + "step": 249530 + }, + { + "epoch": 1.008173175983872, + "grad_norm": 415.8068542480469, + "learning_rate": 4.1252367643185297e-10, + "loss": 45.5723, + "step": 249540 + }, + { + "epoch": 1.0082135772492395, + "grad_norm": 650.1827392578125, + "learning_rate": 3.9478287725769337e-10, + "loss": 53.6675, + "step": 249550 + }, + { + "epoch": 1.0082539785146072, + "grad_norm": 313.34033203125, + "learning_rate": 3.7743198068262986e-10, + "loss": 51.5655, + "step": 249560 + }, + { + "epoch": 1.0082943797799746, + "grad_norm": 754.9653930664062, + "learning_rate": 3.604709870463907e-10, + "loss": 60.8922, + "step": 249570 + }, + { + "epoch": 1.0083347810453422, + "grad_norm": 707.226806640625, + "learning_rate": 3.438998966776019e-10, + "loss": 64.934, + "step": 249580 + }, + { + "epoch": 1.0083751823107099, + "grad_norm": 1383.1534423828125, + "learning_rate": 3.277187099026691e-10, + "loss": 90.5521, + "step": 249590 + }, + { + "epoch": 1.0084155835760775, + "grad_norm": 527.865966796875, + "learning_rate": 3.11927427034675e-10, + "loss": 70.7257, + "step": 249600 + }, + { + "epoch": 1.0084559848414452, + "grad_norm": 454.8075256347656, + "learning_rate": 2.965260483844823e-10, + "loss": 71.8497, + "step": 249610 + }, + { + "epoch": 1.0084963861068128, + "grad_norm": 461.6095275878906, + "learning_rate": 2.8151457424741014e-10, + "loss": 55.6032, + "step": 249620 + }, + { + "epoch": 1.0085367873721804, + "grad_norm": 906.763916015625, + "learning_rate": 2.668930049187779e-10, + "loss": 84.5327, + "step": 249630 + }, + { + "epoch": 1.008577188637548, + "grad_norm": 281.6214904785156, + "learning_rate": 2.526613406850231e-10, + "loss": 47.5958, + "step": 249640 + }, + { + "epoch": 1.0086175899029157, + "grad_norm": 621.9278564453125, + "learning_rate": 2.388195818214811e-10, + "loss": 52.0255, + "step": 249650 + }, + { + "epoch": 1.0086579911682834, + "grad_norm": 814.9580688476562, + "learning_rate": 2.2536772859904632e-10, + "loss": 50.8963, + "step": 249660 + }, + { + "epoch": 1.008698392433651, + "grad_norm": 566.6548461914062, + "learning_rate": 2.1230578127973134e-10, + "loss": 70.7275, + "step": 249670 + }, + { + "epoch": 1.0087387936990186, + "grad_norm": 1687.9730224609375, + "learning_rate": 1.9963374011666703e-10, + "loss": 78.5721, + "step": 249680 + }, + { + "epoch": 1.0087791949643863, + "grad_norm": 486.9939270019531, + "learning_rate": 1.873516053607638e-10, + "loss": 57.9145, + "step": 249690 + }, + { + "epoch": 1.008819596229754, + "grad_norm": 481.72943115234375, + "learning_rate": 1.7545937724738894e-10, + "loss": 57.3564, + "step": 249700 + }, + { + "epoch": 1.0088599974951216, + "grad_norm": 582.765869140625, + "learning_rate": 1.6395705600968926e-10, + "loss": 66.8067, + "step": 249710 + }, + { + "epoch": 1.0089003987604892, + "grad_norm": 722.7216186523438, + "learning_rate": 1.5284464187192981e-10, + "loss": 86.6796, + "step": 249720 + }, + { + "epoch": 1.0089408000258568, + "grad_norm": 423.3017578125, + "learning_rate": 1.421221350539348e-10, + "loss": 36.3374, + "step": 249730 + }, + { + "epoch": 1.0089812012912245, + "grad_norm": 765.08056640625, + "learning_rate": 1.317895357599852e-10, + "loss": 55.4276, + "step": 249740 + }, + { + "epoch": 1.0090216025565921, + "grad_norm": 458.633544921875, + "learning_rate": 1.2184684419214166e-10, + "loss": 62.0634, + "step": 249750 + }, + { + "epoch": 1.0090620038219598, + "grad_norm": 512.1096801757812, + "learning_rate": 1.1229406054802383e-10, + "loss": 72.1461, + "step": 249760 + }, + { + "epoch": 1.0091024050873274, + "grad_norm": 575.4948120117188, + "learning_rate": 1.0313118500970831e-10, + "loss": 57.1718, + "step": 249770 + }, + { + "epoch": 1.009142806352695, + "grad_norm": 362.7676086425781, + "learning_rate": 9.435821775705123e-11, + "loss": 44.035, + "step": 249780 + }, + { + "epoch": 1.0091832076180627, + "grad_norm": 818.1105346679688, + "learning_rate": 8.597515896324737e-11, + "loss": 71.8585, + "step": 249790 + }, + { + "epoch": 1.0092236088834303, + "grad_norm": 736.1803588867188, + "learning_rate": 7.798200878816886e-11, + "loss": 87.0728, + "step": 249800 + }, + { + "epoch": 1.009264010148798, + "grad_norm": 216.5111541748047, + "learning_rate": 7.03787673916878e-11, + "loss": 63.7444, + "step": 249810 + }, + { + "epoch": 1.0093044114141656, + "grad_norm": 384.854248046875, + "learning_rate": 6.31654349181332e-11, + "loss": 53.48, + "step": 249820 + }, + { + "epoch": 1.009344812679533, + "grad_norm": 495.5076904296875, + "learning_rate": 5.63420115096136e-11, + "loss": 63.2487, + "step": 249830 + }, + { + "epoch": 1.0093852139449007, + "grad_norm": 579.0645751953125, + "learning_rate": 4.9908497301576206e-11, + "loss": 50.9092, + "step": 249840 + }, + { + "epoch": 1.0094256152102683, + "grad_norm": 675.76953125, + "learning_rate": 4.386489241392511e-11, + "loss": 63.6097, + "step": 249850 + }, + { + "epoch": 1.009466016475636, + "grad_norm": 380.5902404785156, + "learning_rate": 3.821119697100528e-11, + "loss": 63.8182, + "step": 249860 + }, + { + "epoch": 1.0095064177410036, + "grad_norm": 355.2414245605469, + "learning_rate": 3.2947411077177694e-11, + "loss": 55.9546, + "step": 249870 + }, + { + "epoch": 1.0095468190063712, + "grad_norm": 267.0750427246094, + "learning_rate": 2.8073534839023753e-11, + "loss": 82.4405, + "step": 249880 + }, + { + "epoch": 1.0095872202717389, + "grad_norm": 302.30224609375, + "learning_rate": 2.358956835202264e-11, + "loss": 58.0008, + "step": 249890 + }, + { + "epoch": 1.0096276215371065, + "grad_norm": 669.5781860351562, + "learning_rate": 1.949551169833086e-11, + "loss": 48.5882, + "step": 249900 + }, + { + "epoch": 1.0096680228024741, + "grad_norm": 618.9566040039062, + "learning_rate": 1.579136496454581e-11, + "loss": 39.3103, + "step": 249910 + }, + { + "epoch": 1.0097084240678418, + "grad_norm": 2055.600830078125, + "learning_rate": 1.2477128217280864e-11, + "loss": 63.2549, + "step": 249920 + }, + { + "epoch": 1.0097488253332094, + "grad_norm": 954.0150756835938, + "learning_rate": 9.552801523149412e-12, + "loss": 67.9715, + "step": 249930 + }, + { + "epoch": 1.009789226598577, + "grad_norm": 901.2742309570312, + "learning_rate": 7.018384942103496e-12, + "loss": 66.1644, + "step": 249940 + }, + { + "epoch": 1.0098296278639447, + "grad_norm": 663.9369506835938, + "learning_rate": 4.873878518552033e-12, + "loss": 60.1538, + "step": 249950 + }, + { + "epoch": 1.0098700291293123, + "grad_norm": 809.4927978515625, + "learning_rate": 3.119282296903947e-12, + "loss": 78.8019, + "step": 249960 + }, + { + "epoch": 1.00991043039468, + "grad_norm": 347.0174865722656, + "learning_rate": 1.754596312686374e-12, + "loss": 54.3162, + "step": 249970 + }, + { + "epoch": 1.0099508316600476, + "grad_norm": 259.03460693359375, + "learning_rate": 7.798205903242206e-13, + "loss": 81.9569, + "step": 249980 + }, + { + "epoch": 1.0099912329254153, + "grad_norm": 841.800537109375, + "learning_rate": 1.9495514758105516e-13, + "loss": 61.0793, + "step": 249990 + }, + { + "epoch": 1.010031634190783, + "grad_norm": 511.8416442871094, + "learning_rate": 0.0, + "loss": 39.5704, + "step": 250000 + } + ], + "logging_steps": 10, + "max_steps": 250000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 4000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}