diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,43416 +1,8416 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.5009736665616238, + "epoch": 0.04848151844115758, "eval_steps": 500, - "global_step": 62000, + "global_step": 12000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 8.080220428413287e-05, - "grad_norm": 120464.7890625, - "learning_rate": 4e-08, - "loss": 6299.9457, + "epoch": 4.040126536763131e-05, + "grad_norm": 13786306.0, + "learning_rate": 2e-08, + "loss": 1195071.7, "step": 10 }, { - "epoch": 0.00016160440856826573, - "grad_norm": 241508.140625, - "learning_rate": 8e-08, - "loss": 7276.068, + "epoch": 8.080253073526263e-05, + "grad_norm": 3502914.5, + "learning_rate": 4e-08, + "loss": 1488604.5, "step": 20 }, { - "epoch": 0.00024240661285239863, - "grad_norm": 215982.046875, - "learning_rate": 1.2e-07, - "loss": 7175.6945, + "epoch": 0.00012120379610289395, + "grad_norm": 5249533.5, + "learning_rate": 6e-08, + "loss": 1363003.1, "step": 30 }, { - "epoch": 0.00032320881713653147, - "grad_norm": 459522.375, - "learning_rate": 1.6e-07, - "loss": 8219.4047, + "epoch": 0.00016160506147052525, + "grad_norm": 17823226.0, + "learning_rate": 8e-08, + "loss": 1539778.9, "step": 40 }, { - "epoch": 0.00040401102142066436, - "grad_norm": 127352.8203125, - "learning_rate": 2.0000000000000002e-07, - "loss": 5466.4887, + "epoch": 0.00020200632683815657, + "grad_norm": 14010826.0, + "learning_rate": 1.0000000000000001e-07, + "loss": 1378274.9, "step": 50 }, { - "epoch": 0.00048481322570479725, - "grad_norm": 37251.453125, - "learning_rate": 2.4e-07, - "loss": 7538.1164, + "epoch": 0.0002424075922057879, + "grad_norm": 23003992.0, + "learning_rate": 1.2e-07, + "loss": 1250654.1, "step": 60 }, { - "epoch": 0.0005656154299889301, - "grad_norm": 166443.484375, - "learning_rate": 2.8e-07, - "loss": 6918.2461, + "epoch": 0.0002828088575734192, + "grad_norm": 7809646.0, + "learning_rate": 1.4e-07, + "loss": 1819393.6, "step": 70 }, { - "epoch": 0.0006464176342730629, - "grad_norm": 64075.265625, - "learning_rate": 3.2e-07, - "loss": 7033.7508, + "epoch": 0.0003232101229410505, + "grad_norm": 43233020.0, + "learning_rate": 1.6e-07, + "loss": 1770900.4, "step": 80 }, { - "epoch": 0.0007272198385571959, - "grad_norm": 114055.5625, - "learning_rate": 3.6e-07, - "loss": 5786.0078, + "epoch": 0.0003636113883086818, + "grad_norm": 6094604.5, + "learning_rate": 1.8e-07, + "loss": 1529363.0, "step": 90 }, { - "epoch": 0.0008080220428413287, - "grad_norm": 103202.8515625, - "learning_rate": 4.0000000000000003e-07, - "loss": 3995.518, + "epoch": 0.00040401265367631315, + "grad_norm": 8696879.0, + "learning_rate": 2.0000000000000002e-07, + "loss": 1028300.1, "step": 100 }, { - "epoch": 0.0008888242471254616, - "grad_norm": 82120.25, - "learning_rate": 4.4e-07, - "loss": 3580.6078, + "epoch": 0.00044441391904394446, + "grad_norm": 7774881.5, + "learning_rate": 2.2e-07, + "loss": 1786590.6, "step": 110 }, { - "epoch": 0.0009696264514095945, - "grad_norm": 97468.5390625, - "learning_rate": 4.8e-07, - "loss": 4251.7633, + "epoch": 0.0004848151844115758, + "grad_norm": 2879459.0, + "learning_rate": 2.4e-07, + "loss": 1377895.4, "step": 120 }, { - "epoch": 0.0010504286556937273, - "grad_norm": 176016.71875, - "learning_rate": 5.2e-07, - "loss": 5624.952, + "epoch": 0.000525216449779207, + "grad_norm": 19783298.0, + "learning_rate": 2.6e-07, + "loss": 1599422.0, "step": 130 }, { - "epoch": 0.0011312308599778602, - "grad_norm": 75456.171875, - "learning_rate": 5.6e-07, - "loss": 3680.2211, + "epoch": 0.0005656177151468384, + "grad_norm": 2185791.0, + "learning_rate": 2.8e-07, + "loss": 1243788.4, "step": 140 }, { - "epoch": 0.001212033064261993, - "grad_norm": 51971.734375, - "learning_rate": 6.000000000000001e-07, - "loss": 2647.0199, + "epoch": 0.0006060189805144697, + "grad_norm": 9816560.0, + "learning_rate": 3.0000000000000004e-07, + "loss": 1245093.3, "step": 150 }, { - "epoch": 0.0012928352685461259, - "grad_norm": 52799.71484375, - "learning_rate": 6.4e-07, - "loss": 4544.4746, + "epoch": 0.000646420245882101, + "grad_norm": 9404809.0, + "learning_rate": 3.2e-07, + "loss": 1288713.5, "step": 160 }, { - "epoch": 0.0013736374728302587, - "grad_norm": 71383.046875, - "learning_rate": 6.8e-07, - "loss": 3399.3539, + "epoch": 0.0006868215112497323, + "grad_norm": 8446407.0, + "learning_rate": 3.4e-07, + "loss": 1147532.8, "step": 170 }, { - "epoch": 0.0014544396771143918, - "grad_norm": 11680.890625, - "learning_rate": 7.2e-07, - "loss": 1805.7992, + "epoch": 0.0007272227766173637, + "grad_norm": 3988919.0, + "learning_rate": 3.6e-07, + "loss": 704286.9, "step": 180 }, { - "epoch": 0.0015352418813985246, - "grad_norm": 62105.87109375, - "learning_rate": 7.6e-07, - "loss": 2236.952, + "epoch": 0.000767624041984995, + "grad_norm": 2574691.5, + "learning_rate": 3.8e-07, + "loss": 545635.6, "step": 190 }, { - "epoch": 0.0016160440856826574, - "grad_norm": 122126.75, - "learning_rate": 8.000000000000001e-07, - "loss": 2166.6689, + "epoch": 0.0008080253073526263, + "grad_norm": 21092594.0, + "learning_rate": 4.0000000000000003e-07, + "loss": 728793.05, "step": 200 }, { - "epoch": 0.0016968462899667903, - "grad_norm": 7320.271484375, - "learning_rate": 8.4e-07, - "loss": 1651.749, + "epoch": 0.0008484265727202576, + "grad_norm": 2704460.5, + "learning_rate": 4.2e-07, + "loss": 467207.7, "step": 210 }, { - "epoch": 0.0017776484942509231, - "grad_norm": 7491.0078125, - "learning_rate": 8.8e-07, - "loss": 1587.3332, + "epoch": 0.0008888278380878889, + "grad_norm": 4876784.0, + "learning_rate": 4.4e-07, + "loss": 369614.55, "step": 220 }, { - "epoch": 0.001858450698535056, - "grad_norm": 10839.4326171875, - "learning_rate": 9.2e-07, - "loss": 1348.7195, + "epoch": 0.0009292291034555202, + "grad_norm": 10803276.0, + "learning_rate": 4.6e-07, + "loss": 428869.6, "step": 230 }, { - "epoch": 0.001939252902819189, - "grad_norm": 16774.65234375, - "learning_rate": 9.6e-07, - "loss": 1362.4081, + "epoch": 0.0009696303688231516, + "grad_norm": 5475451.0, + "learning_rate": 4.8e-07, + "loss": 404623.45, "step": 240 }, { - "epoch": 0.0020200551071033216, - "grad_norm": 7878.6630859375, - "learning_rate": 1.0000000000000002e-06, - "loss": 1098.9078, + "epoch": 0.0010100316341907828, + "grad_norm": 3401077.25, + "learning_rate": 5.000000000000001e-07, + "loss": 606814.6, "step": 250 }, { - "epoch": 0.0021008573113874547, - "grad_norm": 4392.02099609375, - "learning_rate": 1.04e-06, - "loss": 846.722, + "epoch": 0.001050432899558414, + "grad_norm": 1785725.0, + "learning_rate": 5.2e-07, + "loss": 230520.125, "step": 260 }, { - "epoch": 0.0021816595156715873, - "grad_norm": 3646.33984375, - "learning_rate": 1.08e-06, - "loss": 771.4109, + "epoch": 0.0010908341649260454, + "grad_norm": 26237066.0, + "learning_rate": 5.4e-07, + "loss": 251476.375, "step": 270 }, { - "epoch": 0.0022624617199557204, - "grad_norm": 10479.375, - "learning_rate": 1.12e-06, - "loss": 787.6179, + "epoch": 0.0011312354302936767, + "grad_norm": 3433710.25, + "learning_rate": 5.6e-07, + "loss": 378523.225, "step": 280 }, { - "epoch": 0.0023432639242398534, - "grad_norm": 7161.4765625, - "learning_rate": 1.16e-06, - "loss": 647.1227, + "epoch": 0.001171636695661308, + "grad_norm": 3058920.5, + "learning_rate": 5.8e-07, + "loss": 118036.575, "step": 290 }, { - "epoch": 0.002424066128523986, - "grad_norm": 2936.88330078125, - "learning_rate": 1.2000000000000002e-06, - "loss": 708.7227, + "epoch": 0.0012120379610289394, + "grad_norm": 2775939.25, + "learning_rate": 6.000000000000001e-07, + "loss": 128199.575, "step": 300 }, { - "epoch": 0.002504868332808119, - "grad_norm": 4183.91650390625, - "learning_rate": 1.24e-06, - "loss": 776.9394, + "epoch": 0.0012524392263965707, + "grad_norm": 2798658.75, + "learning_rate": 6.2e-07, + "loss": 145799.6125, "step": 310 }, { - "epoch": 0.0025856705370922517, - "grad_norm": 8742.2177734375, - "learning_rate": 1.28e-06, - "loss": 630.9082, + "epoch": 0.001292840491764202, + "grad_norm": 3050425.75, + "learning_rate": 6.4e-07, + "loss": 170385.65, "step": 320 }, { - "epoch": 0.002666472741376385, - "grad_norm": 2643.6337890625, - "learning_rate": 1.32e-06, - "loss": 534.4705, + "epoch": 0.0013332417571318333, + "grad_norm": 483486.6875, + "learning_rate": 6.6e-07, + "loss": 79056.7, "step": 330 }, { - "epoch": 0.0027472749456605174, - "grad_norm": 2986.521728515625, - "learning_rate": 1.36e-06, - "loss": 560.3912, + "epoch": 0.0013736430224994647, + "grad_norm": 903465.9375, + "learning_rate": 6.8e-07, + "loss": 69717.8562, "step": 340 }, { - "epoch": 0.0028280771499446505, - "grad_norm": 2350.021484375, - "learning_rate": 1.4000000000000001e-06, - "loss": 550.6548, + "epoch": 0.001414044287867096, + "grad_norm": 166525.765625, + "learning_rate": 7.000000000000001e-07, + "loss": 29574.1, "step": 350 }, { - "epoch": 0.0029088793542287835, - "grad_norm": 2693.84423828125, - "learning_rate": 1.44e-06, - "loss": 606.1348, + "epoch": 0.0014544455532347273, + "grad_norm": 83730.7109375, + "learning_rate": 7.2e-07, + "loss": 9268.3289, "step": 360 }, { - "epoch": 0.002989681558512916, - "grad_norm": 5240.84912109375, - "learning_rate": 1.4800000000000002e-06, - "loss": 449.3018, + "epoch": 0.0014948468186023586, + "grad_norm": 13107.71484375, + "learning_rate": 7.400000000000001e-07, + "loss": 16814.9484, "step": 370 }, { - "epoch": 0.003070483762797049, - "grad_norm": 1318.360595703125, - "learning_rate": 1.52e-06, - "loss": 342.8536, + "epoch": 0.00153524808396999, + "grad_norm": 199187.046875, + "learning_rate": 7.6e-07, + "loss": 4728.9383, "step": 380 }, { - "epoch": 0.003151285967081182, - "grad_norm": 1237.4891357421875, - "learning_rate": 1.56e-06, - "loss": 563.5072, + "epoch": 0.0015756493493376213, + "grad_norm": 209663.171875, + "learning_rate": 7.8e-07, + "loss": 3811.3047, "step": 390 }, { - "epoch": 0.003232088171365315, - "grad_norm": 1203.504150390625, - "learning_rate": 1.6000000000000001e-06, - "loss": 469.4237, + "epoch": 0.0016160506147052526, + "grad_norm": 261596.96875, + "learning_rate": 8.000000000000001e-07, + "loss": 1873.8627, "step": 400 }, { - "epoch": 0.0033128903756494475, - "grad_norm": 2520.18505859375, - "learning_rate": 1.6400000000000002e-06, - "loss": 449.855, + "epoch": 0.001656451880072884, + "grad_norm": 9995.125, + "learning_rate": 8.200000000000001e-07, + "loss": 1428.6535, "step": 410 }, { - "epoch": 0.0033936925799335806, - "grad_norm": 1450.073486328125, - "learning_rate": 1.68e-06, - "loss": 383.9006, + "epoch": 0.0016968531454405152, + "grad_norm": 2573.125732421875, + "learning_rate": 8.4e-07, + "loss": 533.159, "step": 420 }, { - "epoch": 0.0034744947842177136, - "grad_norm": 1275.7200927734375, - "learning_rate": 1.72e-06, - "loss": 487.236, + "epoch": 0.0017372544108081465, + "grad_norm": 547.4088134765625, + "learning_rate": 8.6e-07, + "loss": 702.8197, "step": 430 }, { - "epoch": 0.0035552969885018462, - "grad_norm": 811.244140625, - "learning_rate": 1.76e-06, - "loss": 452.3355, + "epoch": 0.0017776556761757779, + "grad_norm": 734.5255126953125, + "learning_rate": 8.8e-07, + "loss": 485.3759, "step": 440 }, { - "epoch": 0.0036360991927859793, - "grad_norm": 1536.311279296875, - "learning_rate": 1.8e-06, - "loss": 451.7344, + "epoch": 0.0018180569415434092, + "grad_norm": 5114.30615234375, + "learning_rate": 9e-07, + "loss": 545.9343, "step": 450 }, { - "epoch": 0.003716901397070112, - "grad_norm": 3434.607177734375, - "learning_rate": 1.84e-06, - "loss": 445.9789, + "epoch": 0.0018584582069110405, + "grad_norm": 1266.8800048828125, + "learning_rate": 9.2e-07, + "loss": 482.805, "step": 460 }, { - "epoch": 0.003797703601354245, - "grad_norm": 2198.537841796875, - "learning_rate": 1.8800000000000002e-06, - "loss": 450.8833, + "epoch": 0.0018988594722786718, + "grad_norm": 5457.6689453125, + "learning_rate": 9.400000000000001e-07, + "loss": 550.8278, "step": 470 }, { - "epoch": 0.003878505805638378, - "grad_norm": 1277.9805908203125, - "learning_rate": 1.92e-06, - "loss": 377.03, + "epoch": 0.0019392607376463031, + "grad_norm": 1092.0926513671875, + "learning_rate": 9.6e-07, + "loss": 572.7061, "step": 480 }, { - "epoch": 0.003959308009922511, - "grad_norm": 1207.123779296875, - "learning_rate": 1.96e-06, - "loss": 394.4269, + "epoch": 0.0019796620030139342, + "grad_norm": 1708.60546875, + "learning_rate": 9.8e-07, + "loss": 447.9137, "step": 490 }, { - "epoch": 0.004040110214206643, - "grad_norm": 2832.99658203125, - "learning_rate": 2.0000000000000003e-06, - "loss": 398.5146, + "epoch": 0.0020200632683815656, + "grad_norm": 833.2102661132812, + "learning_rate": 1.0000000000000002e-06, + "loss": 452.3768, "step": 500 }, { - "epoch": 0.004120912418490777, - "grad_norm": 3714.432373046875, - "learning_rate": 2.0400000000000004e-06, - "loss": 438.9916, + "epoch": 0.002060464533749197, + "grad_norm": 854.2012329101562, + "learning_rate": 1.0200000000000002e-06, + "loss": 453.9597, "step": 510 }, { - "epoch": 0.004201714622774909, - "grad_norm": 1005.0446166992188, - "learning_rate": 2.08e-06, - "loss": 551.6515, + "epoch": 0.002100865799116828, + "grad_norm": 883.29052734375, + "learning_rate": 1.04e-06, + "loss": 399.3994, "step": 520 }, { - "epoch": 0.004282516827059042, - "grad_norm": 2383.093994140625, - "learning_rate": 2.12e-06, - "loss": 413.3005, + "epoch": 0.0021412670644844595, + "grad_norm": 876.2081909179688, + "learning_rate": 1.06e-06, + "loss": 508.956, "step": 530 }, { - "epoch": 0.004363319031343175, - "grad_norm": 1522.02978515625, - "learning_rate": 2.16e-06, - "loss": 443.4297, + "epoch": 0.002181668329852091, + "grad_norm": 1216.003662109375, + "learning_rate": 1.08e-06, + "loss": 279.6047, "step": 540 }, { - "epoch": 0.004444121235627308, - "grad_norm": 1392.7056884765625, - "learning_rate": 2.2e-06, - "loss": 475.9268, + "epoch": 0.002222069595219722, + "grad_norm": 20951.376953125, + "learning_rate": 1.1e-06, + "loss": 523.4955, "step": 550 }, { - "epoch": 0.004524923439911441, - "grad_norm": 811.1383056640625, - "learning_rate": 2.24e-06, - "loss": 424.8331, + "epoch": 0.0022624708605873535, + "grad_norm": 6249.7998046875, + "learning_rate": 1.12e-06, + "loss": 373.9713, "step": 560 }, { - "epoch": 0.004605725644195573, - "grad_norm": 1622.7584228515625, - "learning_rate": 2.28e-06, - "loss": 432.1297, + "epoch": 0.002302872125954985, + "grad_norm": 8211.34375, + "learning_rate": 1.14e-06, + "loss": 322.8237, "step": 570 }, { - "epoch": 0.004686527848479707, - "grad_norm": 1191.283935546875, - "learning_rate": 2.32e-06, - "loss": 474.9673, + "epoch": 0.002343273391322616, + "grad_norm": 1029.943359375, + "learning_rate": 1.16e-06, + "loss": 443.8321, "step": 580 }, { - "epoch": 0.0047673300527638395, - "grad_norm": 1298.748291015625, - "learning_rate": 2.36e-06, - "loss": 400.011, + "epoch": 0.0023836746566902474, + "grad_norm": 5415.279296875, + "learning_rate": 1.18e-06, + "loss": 580.9646, "step": 590 }, { - "epoch": 0.004848132257047972, - "grad_norm": 818.8621215820312, - "learning_rate": 2.4000000000000003e-06, - "loss": 384.9881, + "epoch": 0.0024240759220578788, + "grad_norm": 683.0673217773438, + "learning_rate": 1.2000000000000002e-06, + "loss": 322.4693, "step": 600 }, { - "epoch": 0.004928934461332105, - "grad_norm": 1199.1748046875, - "learning_rate": 2.4400000000000004e-06, - "loss": 313.529, + "epoch": 0.00246447718742551, + "grad_norm": 3990.54931640625, + "learning_rate": 1.2200000000000002e-06, + "loss": 543.5786, "step": 610 }, { - "epoch": 0.005009736665616238, - "grad_norm": 1211.6876220703125, - "learning_rate": 2.48e-06, - "loss": 394.428, + "epoch": 0.0025048784527931414, + "grad_norm": 771.86767578125, + "learning_rate": 1.24e-06, + "loss": 464.2772, "step": 620 }, { - "epoch": 0.005090538869900371, - "grad_norm": 1843.43212890625, - "learning_rate": 2.52e-06, - "loss": 418.0629, + "epoch": 0.0025452797181607727, + "grad_norm": 830.4238891601562, + "learning_rate": 1.26e-06, + "loss": 502.5308, "step": 630 }, { - "epoch": 0.0051713410741845035, - "grad_norm": 1308.6473388671875, - "learning_rate": 2.56e-06, - "loss": 435.9449, + "epoch": 0.002585680983528404, + "grad_norm": 4528.138671875, + "learning_rate": 1.28e-06, + "loss": 368.3498, "step": 640 }, { - "epoch": 0.005252143278468637, - "grad_norm": 1077.8184814453125, - "learning_rate": 2.6e-06, - "loss": 415.5275, + "epoch": 0.0026260822488960354, + "grad_norm": 701.6304321289062, + "learning_rate": 1.3e-06, + "loss": 433.9283, "step": 650 }, { - "epoch": 0.00533294548275277, - "grad_norm": 4885.33154296875, - "learning_rate": 2.64e-06, - "loss": 465.1564, + "epoch": 0.0026664835142636667, + "grad_norm": 593.0372924804688, + "learning_rate": 1.32e-06, + "loss": 327.8368, "step": 660 }, { - "epoch": 0.005413747687036902, - "grad_norm": 1733.787353515625, - "learning_rate": 2.68e-06, - "loss": 376.4916, + "epoch": 0.002706884779631298, + "grad_norm": 718.9718627929688, + "learning_rate": 1.34e-06, + "loss": 363.8515, "step": 670 }, { - "epoch": 0.005494549891321035, - "grad_norm": 2824.440185546875, - "learning_rate": 2.72e-06, - "loss": 298.1259, + "epoch": 0.0027472860449989293, + "grad_norm": 1276.8353271484375, + "learning_rate": 1.36e-06, + "loss": 457.8593, "step": 680 }, { - "epoch": 0.005575352095605168, - "grad_norm": 1772.1290283203125, - "learning_rate": 2.7600000000000003e-06, - "loss": 416.0719, + "epoch": 0.0027876873103665606, + "grad_norm": 9622.8271484375, + "learning_rate": 1.3800000000000001e-06, + "loss": 448.6425, "step": 690 }, { - "epoch": 0.005656154299889301, - "grad_norm": 1537.0374755859375, - "learning_rate": 2.8000000000000003e-06, - "loss": 448.8441, + "epoch": 0.002828088575734192, + "grad_norm": 795.946044921875, + "learning_rate": 1.4000000000000001e-06, + "loss": 370.567, "step": 700 }, { - "epoch": 0.0057369565041734336, - "grad_norm": 2114.287841796875, - "learning_rate": 2.8400000000000003e-06, - "loss": 382.4187, + "epoch": 0.0028684898411018233, + "grad_norm": 2605.2119140625, + "learning_rate": 1.4200000000000002e-06, + "loss": 399.8033, "step": 710 }, { - "epoch": 0.005817758708457567, - "grad_norm": 1443.6988525390625, - "learning_rate": 2.88e-06, - "loss": 391.9944, + "epoch": 0.0029088911064694546, + "grad_norm": 3644.47607421875, + "learning_rate": 1.44e-06, + "loss": 519.6482, "step": 720 }, { - "epoch": 0.0058985609127417, - "grad_norm": 1869.8372802734375, - "learning_rate": 2.92e-06, - "loss": 393.6924, + "epoch": 0.002949292371837086, + "grad_norm": 859.5913696289062, + "learning_rate": 1.46e-06, + "loss": 366.024, "step": 730 }, { - "epoch": 0.005979363117025832, - "grad_norm": 668.3851928710938, - "learning_rate": 2.9600000000000005e-06, - "loss": 254.4029, + "epoch": 0.0029896936372047172, + "grad_norm": 9709.5673828125, + "learning_rate": 1.4800000000000002e-06, + "loss": 387.851, "step": 740 }, { - "epoch": 0.006060165321309965, - "grad_norm": 848.6920776367188, - "learning_rate": 3e-06, - "loss": 347.2792, + "epoch": 0.0030300949025723486, + "grad_norm": 1355.1983642578125, + "learning_rate": 1.5e-06, + "loss": 287.8213, "step": 750 }, { - "epoch": 0.006140967525594098, - "grad_norm": 991.5322875976562, - "learning_rate": 3.04e-06, - "loss": 312.4748, + "epoch": 0.00307049616793998, + "grad_norm": 1910.957275390625, + "learning_rate": 1.52e-06, + "loss": 263.4618, "step": 760 }, { - "epoch": 0.006221769729878231, - "grad_norm": 2497.806396484375, - "learning_rate": 3.08e-06, - "loss": 331.4418, + "epoch": 0.003110897433307611, + "grad_norm": 1562.11669921875, + "learning_rate": 1.54e-06, + "loss": 514.2329, "step": 770 }, { - "epoch": 0.006302571934162364, - "grad_norm": 1114.24609375, - "learning_rate": 3.12e-06, - "loss": 303.8009, + "epoch": 0.0031512986986752425, + "grad_norm": 765.830078125, + "learning_rate": 1.56e-06, + "loss": 332.8841, "step": 780 }, { - "epoch": 0.006383374138446497, - "grad_norm": 1654.25, - "learning_rate": 3.1600000000000007e-06, - "loss": 361.0829, + "epoch": 0.003191699964042874, + "grad_norm": 13385.1689453125, + "learning_rate": 1.5800000000000003e-06, + "loss": 456.157, "step": 790 }, { - "epoch": 0.00646417634273063, - "grad_norm": 1273.365966796875, - "learning_rate": 3.2000000000000003e-06, - "loss": 361.2258, + "epoch": 0.003232101229410505, + "grad_norm": 969.9119262695312, + "learning_rate": 1.6000000000000001e-06, + "loss": 300.1112, "step": 800 }, { - "epoch": 0.006544978547014762, - "grad_norm": 1538.13818359375, - "learning_rate": 3.24e-06, - "loss": 373.1735, + "epoch": 0.0032725024947781365, + "grad_norm": 1840.109619140625, + "learning_rate": 1.62e-06, + "loss": 384.0655, "step": 810 }, { - "epoch": 0.006625780751298895, - "grad_norm": 1388.8038330078125, - "learning_rate": 3.2800000000000004e-06, - "loss": 390.9153, + "epoch": 0.003312903760145768, + "grad_norm": 1270.9483642578125, + "learning_rate": 1.6400000000000002e-06, + "loss": 355.3219, "step": 820 }, { - "epoch": 0.0067065829555830285, - "grad_norm": 1278.6357421875, - "learning_rate": 3.3200000000000004e-06, - "loss": 378.8895, + "epoch": 0.003353305025513399, + "grad_norm": 668.4849243164062, + "learning_rate": 1.6600000000000002e-06, + "loss": 247.1219, "step": 830 }, { - "epoch": 0.006787385159867161, - "grad_norm": 1666.7745361328125, - "learning_rate": 3.36e-06, - "loss": 367.7609, + "epoch": 0.0033937062908810304, + "grad_norm": 1676.130615234375, + "learning_rate": 1.68e-06, + "loss": 312.9415, "step": 840 }, { - "epoch": 0.006868187364151294, - "grad_norm": 1185.635009765625, - "learning_rate": 3.4000000000000005e-06, - "loss": 342.8507, + "epoch": 0.0034341075562486618, + "grad_norm": 480.2029724121094, + "learning_rate": 1.7000000000000002e-06, + "loss": 388.1627, "step": 850 }, { - "epoch": 0.006948989568435427, - "grad_norm": 5413.97265625, - "learning_rate": 3.44e-06, - "loss": 396.4134, + "epoch": 0.003474508821616293, + "grad_norm": 579.8500366210938, + "learning_rate": 1.72e-06, + "loss": 423.7644, "step": 860 }, { - "epoch": 0.00702979177271956, - "grad_norm": 1959.2401123046875, - "learning_rate": 3.4799999999999997e-06, - "loss": 352.2037, + "epoch": 0.0035149100869839244, + "grad_norm": 780.9789428710938, + "learning_rate": 1.7399999999999999e-06, + "loss": 335.4401, "step": 870 }, { - "epoch": 0.0071105939770036925, - "grad_norm": 800.6333618164062, - "learning_rate": 3.52e-06, - "loss": 430.5951, + "epoch": 0.0035553113523515557, + "grad_norm": 500.0970153808594, + "learning_rate": 1.76e-06, + "loss": 375.4284, "step": 880 }, { - "epoch": 0.007191396181287825, - "grad_norm": 3441.051513671875, - "learning_rate": 3.5600000000000002e-06, - "loss": 343.6903, + "epoch": 0.003595712617719187, + "grad_norm": 663.9608154296875, + "learning_rate": 1.7800000000000001e-06, + "loss": 432.7744, "step": 890 }, { - "epoch": 0.007272198385571959, - "grad_norm": 1369.2808837890625, - "learning_rate": 3.6e-06, - "loss": 310.2747, + "epoch": 0.0036361138830868184, + "grad_norm": 738.2296142578125, + "learning_rate": 1.8e-06, + "loss": 330.4268, "step": 900 }, { - "epoch": 0.007353000589856091, - "grad_norm": 3001.960205078125, - "learning_rate": 3.6400000000000003e-06, - "loss": 380.9129, + "epoch": 0.0036765151484544497, + "grad_norm": 888.3779907226562, + "learning_rate": 1.8200000000000002e-06, + "loss": 328.1051, "step": 910 }, { - "epoch": 0.007433802794140224, - "grad_norm": 2471.067626953125, - "learning_rate": 3.68e-06, - "loss": 300.5005, + "epoch": 0.003716916413822081, + "grad_norm": 681.369873046875, + "learning_rate": 1.84e-06, + "loss": 455.4454, "step": 920 }, { - "epoch": 0.007514604998424357, - "grad_norm": 935.7294311523438, - "learning_rate": 3.72e-06, - "loss": 380.2881, + "epoch": 0.0037573176791897123, + "grad_norm": 719.9152221679688, + "learning_rate": 1.86e-06, + "loss": 291.1318, "step": 930 }, { - "epoch": 0.00759540720270849, - "grad_norm": 5652.1103515625, - "learning_rate": 3.7600000000000004e-06, - "loss": 373.3185, + "epoch": 0.0037977189445573436, + "grad_norm": 765.2400512695312, + "learning_rate": 1.8800000000000002e-06, + "loss": 453.5919, "step": 940 }, { - "epoch": 0.007676209406992623, - "grad_norm": 1333.6368408203125, - "learning_rate": 3.8e-06, - "loss": 370.8047, + "epoch": 0.003838120209924975, + "grad_norm": 586.1494140625, + "learning_rate": 1.9e-06, + "loss": 338.6631, "step": 950 }, { - "epoch": 0.007757011611276756, - "grad_norm": 889.0906372070312, - "learning_rate": 3.84e-06, - "loss": 396.7503, + "epoch": 0.0038785214752926063, + "grad_norm": 559.566650390625, + "learning_rate": 1.92e-06, + "loss": 309.9115, "step": 960 }, { - "epoch": 0.007837813815560889, - "grad_norm": 1351.9283447265625, - "learning_rate": 3.88e-06, - "loss": 384.5369, + "epoch": 0.003918922740660237, + "grad_norm": 2274.710205078125, + "learning_rate": 1.94e-06, + "loss": 366.6058, "step": 970 }, { - "epoch": 0.007918616019845021, - "grad_norm": 1260.0374755859375, - "learning_rate": 3.92e-06, - "loss": 299.2764, + "epoch": 0.0039593240060278685, + "grad_norm": 2413.857666015625, + "learning_rate": 1.96e-06, + "loss": 323.5407, "step": 980 }, { - "epoch": 0.007999418224129154, - "grad_norm": 979.3607177734375, - "learning_rate": 3.96e-06, - "loss": 405.3479, + "epoch": 0.0039997252713955, + "grad_norm": 466.5931396484375, + "learning_rate": 1.98e-06, + "loss": 332.9158, "step": 990 }, { - "epoch": 0.008080220428413287, - "grad_norm": 937.3470458984375, - "learning_rate": 4.000000000000001e-06, - "loss": 334.54, + "epoch": 0.004040126536763131, + "grad_norm": 1368.92626953125, + "learning_rate": 2.0000000000000003e-06, + "loss": 309.8214, "step": 1000 }, { - "epoch": 0.00816102263269742, - "grad_norm": 2103.044677734375, - "learning_rate": 4.04e-06, - "loss": 323.1982, + "epoch": 0.0040805278021307624, + "grad_norm": 578.7489624023438, + "learning_rate": 2.02e-06, + "loss": 293.1464, "step": 1010 }, { - "epoch": 0.008241824836981554, - "grad_norm": 1720.14892578125, - "learning_rate": 4.080000000000001e-06, - "loss": 386.8237, + "epoch": 0.004120929067498394, + "grad_norm": 932.1323852539062, + "learning_rate": 2.0400000000000004e-06, + "loss": 382.6469, "step": 1020 }, { - "epoch": 0.008322627041265686, - "grad_norm": 12011.7265625, - "learning_rate": 4.12e-06, - "loss": 344.0202, + "epoch": 0.004161330332866025, + "grad_norm": 861.5737915039062, + "learning_rate": 2.06e-06, + "loss": 394.3037, "step": 1030 }, { - "epoch": 0.008403429245549819, - "grad_norm": 1741.755615234375, - "learning_rate": 4.16e-06, - "loss": 373.6182, + "epoch": 0.004201731598233656, + "grad_norm": 540.3167114257812, + "learning_rate": 2.08e-06, + "loss": 356.1217, "step": 1040 }, { - "epoch": 0.008484231449833951, - "grad_norm": 919.65673828125, - "learning_rate": 4.2000000000000004e-06, - "loss": 315.8019, + "epoch": 0.004242132863601288, + "grad_norm": 645.2182006835938, + "learning_rate": 2.1000000000000002e-06, + "loss": 375.9719, "step": 1050 }, { - "epoch": 0.008565033654118084, - "grad_norm": 1153.2763671875, - "learning_rate": 4.24e-06, - "loss": 300.3839, + "epoch": 0.004282534128968919, + "grad_norm": 3072.72607421875, + "learning_rate": 2.12e-06, + "loss": 385.9836, "step": 1060 }, { - "epoch": 0.008645835858402217, - "grad_norm": 1367.701171875, - "learning_rate": 4.28e-06, - "loss": 349.8502, + "epoch": 0.00432293539433655, + "grad_norm": 1441.8131103515625, + "learning_rate": 2.14e-06, + "loss": 447.29, "step": 1070 }, { - "epoch": 0.00872663806268635, - "grad_norm": 1734.5045166015625, - "learning_rate": 4.32e-06, - "loss": 325.2381, + "epoch": 0.004363336659704182, + "grad_norm": 531.906494140625, + "learning_rate": 2.16e-06, + "loss": 307.2454, "step": 1080 }, { - "epoch": 0.008807440266970484, - "grad_norm": 1296.18896484375, - "learning_rate": 4.360000000000001e-06, - "loss": 324.7117, + "epoch": 0.004403737925071813, + "grad_norm": 4712.17236328125, + "learning_rate": 2.1800000000000003e-06, + "loss": 599.3445, "step": 1090 }, { - "epoch": 0.008888242471254616, - "grad_norm": 886.7296752929688, - "learning_rate": 4.4e-06, - "loss": 302.4112, + "epoch": 0.004444139190439444, + "grad_norm": 543.140380859375, + "learning_rate": 2.2e-06, + "loss": 236.8443, "step": 1100 }, { - "epoch": 0.008969044675538749, - "grad_norm": 1204.2841796875, - "learning_rate": 4.440000000000001e-06, - "loss": 279.8843, + "epoch": 0.004484540455807076, + "grad_norm": 724.6696166992188, + "learning_rate": 2.2200000000000003e-06, + "loss": 402.6835, "step": 1110 }, { - "epoch": 0.009049846879822881, - "grad_norm": 4668.9091796875, - "learning_rate": 4.48e-06, - "loss": 425.2566, + "epoch": 0.004524941721174707, + "grad_norm": 890.041015625, + "learning_rate": 2.24e-06, + "loss": 335.9333, "step": 1120 }, { - "epoch": 0.009130649084107014, - "grad_norm": 1889.599853515625, - "learning_rate": 4.52e-06, - "loss": 323.0021, + "epoch": 0.004565342986542338, + "grad_norm": 517.4255981445312, + "learning_rate": 2.26e-06, + "loss": 377.6742, "step": 1130 }, { - "epoch": 0.009211451288391147, - "grad_norm": 3771.015869140625, - "learning_rate": 4.56e-06, - "loss": 306.9036, + "epoch": 0.00460574425190997, + "grad_norm": 1773.51025390625, + "learning_rate": 2.28e-06, + "loss": 387.5232, "step": 1140 }, { - "epoch": 0.00929225349267528, - "grad_norm": 2646.349853515625, - "learning_rate": 4.6e-06, - "loss": 260.4376, + "epoch": 0.004646145517277601, + "grad_norm": 1350.251220703125, + "learning_rate": 2.3e-06, + "loss": 430.9959, "step": 1150 }, { - "epoch": 0.009373055696959414, - "grad_norm": 1026.1678466796875, - "learning_rate": 4.64e-06, - "loss": 308.0339, + "epoch": 0.004686546782645232, + "grad_norm": 1267.9136962890625, + "learning_rate": 2.32e-06, + "loss": 417.8642, "step": 1160 }, { - "epoch": 0.009453857901243546, - "grad_norm": 1080.6373291015625, - "learning_rate": 4.68e-06, - "loss": 308.2251, + "epoch": 0.0047269480480128636, + "grad_norm": 1760.7581787109375, + "learning_rate": 2.34e-06, + "loss": 335.7049, "step": 1170 }, { - "epoch": 0.009534660105527679, - "grad_norm": 2103.670654296875, - "learning_rate": 4.72e-06, - "loss": 415.5648, + "epoch": 0.004767349313380495, + "grad_norm": 471.7587890625, + "learning_rate": 2.36e-06, + "loss": 321.8847, "step": 1180 }, { - "epoch": 0.009615462309811812, - "grad_norm": 1400.092041015625, - "learning_rate": 4.76e-06, - "loss": 311.9965, + "epoch": 0.004807750578748126, + "grad_norm": 1277.0931396484375, + "learning_rate": 2.38e-06, + "loss": 339.8787, "step": 1190 }, { - "epoch": 0.009696264514095944, - "grad_norm": 620.5350341796875, - "learning_rate": 4.800000000000001e-06, - "loss": 380.1777, + "epoch": 0.0048481518441157575, + "grad_norm": 766.57568359375, + "learning_rate": 2.4000000000000003e-06, + "loss": 343.6708, "step": 1200 }, { - "epoch": 0.009777066718380077, - "grad_norm": 1039.4932861328125, - "learning_rate": 4.84e-06, - "loss": 262.8926, + "epoch": 0.004888553109483389, + "grad_norm": 733.90234375, + "learning_rate": 2.42e-06, + "loss": 284.1223, "step": 1210 }, { - "epoch": 0.00985786892266421, - "grad_norm": 972.1863403320312, - "learning_rate": 4.880000000000001e-06, - "loss": 300.6945, + "epoch": 0.00492895437485102, + "grad_norm": 705.3512573242188, + "learning_rate": 2.4400000000000004e-06, + "loss": 290.2316, "step": 1220 }, { - "epoch": 0.009938671126948344, - "grad_norm": 682.8450317382812, - "learning_rate": 4.92e-06, - "loss": 215.8538, + "epoch": 0.0049693556402186515, + "grad_norm": 1195.5079345703125, + "learning_rate": 2.46e-06, + "loss": 351.2376, "step": 1230 }, { - "epoch": 0.010019473331232476, - "grad_norm": 1634.9962158203125, - "learning_rate": 4.96e-06, - "loss": 339.8867, + "epoch": 0.005009756905586283, + "grad_norm": 1140.5634765625, + "learning_rate": 2.48e-06, + "loss": 364.3263, "step": 1240 }, { - "epoch": 0.010100275535516609, - "grad_norm": 767.261962890625, - "learning_rate": 5e-06, - "loss": 315.1374, + "epoch": 0.005050158170953914, + "grad_norm": 1419.2325439453125, + "learning_rate": 2.5e-06, + "loss": 420.2925, "step": 1250 }, { - "epoch": 0.010181077739800742, - "grad_norm": 861.3399658203125, - "learning_rate": 5.04e-06, - "loss": 266.3382, + "epoch": 0.0050905594363215454, + "grad_norm": 945.1781616210938, + "learning_rate": 2.52e-06, + "loss": 377.5975, "step": 1260 }, { - "epoch": 0.010261879944084874, - "grad_norm": 1114.8607177734375, - "learning_rate": 5.08e-06, - "loss": 254.9846, + "epoch": 0.005130960701689177, + "grad_norm": 1156.38818359375, + "learning_rate": 2.54e-06, + "loss": 438.8611, "step": 1270 }, { - "epoch": 0.010342682148369007, - "grad_norm": 950.5001220703125, - "learning_rate": 5.12e-06, - "loss": 349.8637, + "epoch": 0.005171361967056808, + "grad_norm": 803.0604858398438, + "learning_rate": 2.56e-06, + "loss": 275.1979, "step": 1280 }, { - "epoch": 0.01042348435265314, - "grad_norm": 957.1220092773438, - "learning_rate": 5.1600000000000006e-06, - "loss": 315.3476, + "epoch": 0.005211763232424439, + "grad_norm": 4027.070556640625, + "learning_rate": 2.5800000000000003e-06, + "loss": 417.5213, "step": 1290 }, { - "epoch": 0.010504286556937274, - "grad_norm": 3296.61181640625, - "learning_rate": 5.2e-06, - "loss": 305.2104, + "epoch": 0.005252164497792071, + "grad_norm": 762.65771484375, + "learning_rate": 2.6e-06, + "loss": 338.176, "step": 1300 }, { - "epoch": 0.010585088761221407, - "grad_norm": 1295.2667236328125, - "learning_rate": 5.240000000000001e-06, - "loss": 332.7288, + "epoch": 0.005292565763159702, + "grad_norm": 514.9345092773438, + "learning_rate": 2.6200000000000003e-06, + "loss": 360.1529, "step": 1310 }, { - "epoch": 0.01066589096550554, - "grad_norm": 1020.7820434570312, - "learning_rate": 5.28e-06, - "loss": 371.7549, + "epoch": 0.005332967028527333, + "grad_norm": 0.0, + "learning_rate": 2.64e-06, + "loss": 437.6661, "step": 1320 }, { - "epoch": 0.010746693169789672, - "grad_norm": 693.24267578125, - "learning_rate": 5.32e-06, - "loss": 234.0047, + "epoch": 0.005373368293894965, + "grad_norm": 964.8443603515625, + "learning_rate": 2.66e-06, + "loss": 348.5103, "step": 1330 }, { - "epoch": 0.010827495374073804, - "grad_norm": 1412.2923583984375, - "learning_rate": 5.36e-06, - "loss": 325.309, + "epoch": 0.005413769559262596, + "grad_norm": 1033.2138671875, + "learning_rate": 2.68e-06, + "loss": 351.3489, "step": 1340 }, { - "epoch": 0.010908297578357937, - "grad_norm": 3597.651123046875, - "learning_rate": 5.4e-06, - "loss": 363.1182, + "epoch": 0.005454170824630227, + "grad_norm": 621.0077514648438, + "learning_rate": 2.7e-06, + "loss": 297.7263, "step": 1350 }, { - "epoch": 0.01098909978264207, - "grad_norm": 819.6455688476562, - "learning_rate": 5.44e-06, - "loss": 411.9063, + "epoch": 0.005494572089997859, + "grad_norm": 670.5285034179688, + "learning_rate": 2.72e-06, + "loss": 266.751, "step": 1360 }, { - "epoch": 0.011069901986926204, - "grad_norm": 2086.99267578125, - "learning_rate": 5.48e-06, - "loss": 270.0551, + "epoch": 0.00553497335536549, + "grad_norm": 1063.011962890625, + "learning_rate": 2.74e-06, + "loss": 388.5708, "step": 1370 }, { - "epoch": 0.011150704191210337, - "grad_norm": 1487.234375, - "learning_rate": 5.5200000000000005e-06, - "loss": 299.9755, + "epoch": 0.005575374620733121, + "grad_norm": 1352.453369140625, + "learning_rate": 2.7600000000000003e-06, + "loss": 413.9111, "step": 1380 }, { - "epoch": 0.01123150639549447, - "grad_norm": 2780.013671875, - "learning_rate": 5.56e-06, - "loss": 252.8466, + "epoch": 0.005615775886100753, + "grad_norm": 1039.40966796875, + "learning_rate": 2.78e-06, + "loss": 425.6994, "step": 1390 }, { - "epoch": 0.011312308599778602, - "grad_norm": 1112.380126953125, - "learning_rate": 5.600000000000001e-06, - "loss": 317.6261, + "epoch": 0.005656177151468384, + "grad_norm": 852.0213012695312, + "learning_rate": 2.8000000000000003e-06, + "loss": 379.7008, "step": 1400 }, { - "epoch": 0.011393110804062734, - "grad_norm": 885.0089111328125, - "learning_rate": 5.64e-06, - "loss": 283.4526, + "epoch": 0.005696578416836015, + "grad_norm": 560.5407104492188, + "learning_rate": 2.82e-06, + "loss": 373.5774, "step": 1410 }, { - "epoch": 0.011473913008346867, - "grad_norm": 1191.376708984375, - "learning_rate": 5.680000000000001e-06, - "loss": 312.9826, + "epoch": 0.0057369796822036466, + "grad_norm": 2221.54248046875, + "learning_rate": 2.8400000000000003e-06, + "loss": 377.4932, "step": 1420 }, { - "epoch": 0.011554715212631, - "grad_norm": 980.507568359375, - "learning_rate": 5.72e-06, - "loss": 306.7443, + "epoch": 0.005777380947571278, + "grad_norm": 986.341064453125, + "learning_rate": 2.86e-06, + "loss": 381.8052, "step": 1430 }, { - "epoch": 0.011635517416915134, - "grad_norm": 1093.092529296875, - "learning_rate": 5.76e-06, - "loss": 267.1355, + "epoch": 0.005817782212938909, + "grad_norm": 2288.12060546875, + "learning_rate": 2.88e-06, + "loss": 396.2358, "step": 1440 }, { - "epoch": 0.011716319621199267, - "grad_norm": 1268.4532470703125, - "learning_rate": 5.8e-06, - "loss": 339.6959, + "epoch": 0.0058581834783065405, + "grad_norm": 506.0441589355469, + "learning_rate": 2.9e-06, + "loss": 306.8475, "step": 1450 }, { - "epoch": 0.0117971218254834, - "grad_norm": 940.1403198242188, - "learning_rate": 5.84e-06, - "loss": 331.6033, + "epoch": 0.005898584743674172, + "grad_norm": 1451.433837890625, + "learning_rate": 2.92e-06, + "loss": 421.7085, "step": 1460 }, { - "epoch": 0.011877924029767532, - "grad_norm": 1112.1826171875, - "learning_rate": 5.8800000000000005e-06, - "loss": 268.7629, + "epoch": 0.005938986009041803, + "grad_norm": 410.0963439941406, + "learning_rate": 2.9400000000000002e-06, + "loss": 245.0907, "step": 1470 }, { - "epoch": 0.011958726234051665, - "grad_norm": 3810.572021484375, - "learning_rate": 5.920000000000001e-06, - "loss": 489.5545, + "epoch": 0.0059793872744094345, + "grad_norm": 644.2957153320312, + "learning_rate": 2.9600000000000005e-06, + "loss": 256.7748, "step": 1480 }, { - "epoch": 0.012039528438335797, - "grad_norm": 1026.4713134765625, - "learning_rate": 5.9600000000000005e-06, - "loss": 326.0129, + "epoch": 0.006019788539777066, + "grad_norm": 587.813232421875, + "learning_rate": 2.9800000000000003e-06, + "loss": 307.0451, "step": 1490 }, { - "epoch": 0.01212033064261993, - "grad_norm": 955.7890014648438, - "learning_rate": 6e-06, - "loss": 302.8571, + "epoch": 0.006060189805144697, + "grad_norm": 661.851806640625, + "learning_rate": 3e-06, + "loss": 240.7283, "step": 1500 }, { - "epoch": 0.012201132846904064, - "grad_norm": 1472.0972900390625, - "learning_rate": 6.040000000000001e-06, - "loss": 267.3414, + "epoch": 0.0061005910705123284, + "grad_norm": 9922.1162109375, + "learning_rate": 3.0200000000000003e-06, + "loss": 314.2891, "step": 1510 }, { - "epoch": 0.012281935051188197, - "grad_norm": 1580.728515625, - "learning_rate": 6.08e-06, - "loss": 369.9728, + "epoch": 0.00614099233587996, + "grad_norm": 681.8471069335938, + "learning_rate": 3.04e-06, + "loss": 304.0082, "step": 1520 }, { - "epoch": 0.01236273725547233, - "grad_norm": 1310.898681640625, - "learning_rate": 6.12e-06, - "loss": 352.3494, + "epoch": 0.006181393601247591, + "grad_norm": 1679.9188232421875, + "learning_rate": 3.06e-06, + "loss": 316.8349, "step": 1530 }, { - "epoch": 0.012443539459756462, - "grad_norm": 1393.8817138671875, - "learning_rate": 6.16e-06, - "loss": 323.2435, + "epoch": 0.006221794866615222, + "grad_norm": 0.0, + "learning_rate": 3.08e-06, + "loss": 273.9679, "step": 1540 }, { - "epoch": 0.012524341664040595, - "grad_norm": 3312.1982421875, - "learning_rate": 6.2e-06, - "loss": 376.0802, + "epoch": 0.006262196131982854, + "grad_norm": 934.2025756835938, + "learning_rate": 3.1e-06, + "loss": 281.7329, "step": 1550 }, { - "epoch": 0.012605143868324727, - "grad_norm": 2410.134521484375, - "learning_rate": 6.24e-06, - "loss": 380.9307, + "epoch": 0.006302597397350485, + "grad_norm": 1820.7791748046875, + "learning_rate": 3.12e-06, + "loss": 292.2456, "step": 1560 }, { - "epoch": 0.01268594607260886, - "grad_norm": 1723.5726318359375, - "learning_rate": 6.28e-06, - "loss": 250.1708, + "epoch": 0.006342998662718116, + "grad_norm": 1122.6767578125, + "learning_rate": 3.14e-06, + "loss": 327.0943, "step": 1570 }, { - "epoch": 0.012766748276892994, - "grad_norm": 1414.7744140625, - "learning_rate": 6.320000000000001e-06, - "loss": 321.4116, + "epoch": 0.006383399928085748, + "grad_norm": 3407.740234375, + "learning_rate": 3.1600000000000007e-06, + "loss": 282.118, "step": 1580 }, { - "epoch": 0.012847550481177127, - "grad_norm": 1703.7847900390625, - "learning_rate": 6.360000000000001e-06, - "loss": 302.414, + "epoch": 0.006423801193453379, + "grad_norm": 556.7740478515625, + "learning_rate": 3.1800000000000005e-06, + "loss": 332.0084, "step": 1590 }, { - "epoch": 0.01292835268546126, - "grad_norm": 18853.111328125, - "learning_rate": 6.4000000000000006e-06, - "loss": 330.5393, + "epoch": 0.00646420245882101, + "grad_norm": 1229.8035888671875, + "learning_rate": 3.2000000000000003e-06, + "loss": 344.9013, "step": 1600 }, { - "epoch": 0.013009154889745392, - "grad_norm": 828.843505859375, - "learning_rate": 6.44e-06, - "loss": 304.8511, + "epoch": 0.006504603724188642, + "grad_norm": 949.2359008789062, + "learning_rate": 3.22e-06, + "loss": 371.0438, "step": 1610 }, { - "epoch": 0.013089957094029525, - "grad_norm": 2509.326171875, - "learning_rate": 6.48e-06, - "loss": 309.2272, + "epoch": 0.006545004989556273, + "grad_norm": 1869.7620849609375, + "learning_rate": 3.24e-06, + "loss": 388.4585, "step": 1620 }, { - "epoch": 0.013170759298313657, - "grad_norm": 1249.705322265625, - "learning_rate": 6.519999999999999e-06, - "loss": 374.0655, + "epoch": 0.006585406254923904, + "grad_norm": 527.3277587890625, + "learning_rate": 3.2599999999999997e-06, + "loss": 372.7793, "step": 1630 }, { - "epoch": 0.01325156150259779, - "grad_norm": 1567.087158203125, - "learning_rate": 6.560000000000001e-06, - "loss": 353.2634, + "epoch": 0.006625807520291536, + "grad_norm": 1427.3751220703125, + "learning_rate": 3.2800000000000004e-06, + "loss": 386.9533, "step": 1640 }, { - "epoch": 0.013332363706881924, - "grad_norm": 1130.07373046875, - "learning_rate": 6.6e-06, - "loss": 373.2439, + "epoch": 0.006666208785659167, + "grad_norm": 1839.386474609375, + "learning_rate": 3.3e-06, + "loss": 414.8817, "step": 1650 }, { - "epoch": 0.013413165911166057, - "grad_norm": 1015.62890625, - "learning_rate": 6.640000000000001e-06, - "loss": 312.5177, + "epoch": 0.006706610051026798, + "grad_norm": 767.32373046875, + "learning_rate": 3.3200000000000004e-06, + "loss": 262.8998, "step": 1660 }, { - "epoch": 0.01349396811545019, - "grad_norm": 1167.2283935546875, - "learning_rate": 6.68e-06, - "loss": 317.5417, + "epoch": 0.0067470113163944296, + "grad_norm": 487.2662353515625, + "learning_rate": 3.34e-06, + "loss": 349.8497, "step": 1670 }, { - "epoch": 0.013574770319734322, - "grad_norm": 1113.1024169921875, - "learning_rate": 6.72e-06, - "loss": 232.4917, + "epoch": 0.006787412581762061, + "grad_norm": 1609.6903076171875, + "learning_rate": 3.36e-06, + "loss": 328.737, "step": 1680 }, { - "epoch": 0.013655572524018455, - "grad_norm": 779.2466430664062, - "learning_rate": 6.76e-06, - "loss": 307.4087, + "epoch": 0.006827813847129692, + "grad_norm": 824.4249267578125, + "learning_rate": 3.38e-06, + "loss": 285.9903, "step": 1690 }, { - "epoch": 0.013736374728302587, - "grad_norm": 859.6519775390625, - "learning_rate": 6.800000000000001e-06, - "loss": 281.6118, + "epoch": 0.0068682151124973235, + "grad_norm": 3939.00244140625, + "learning_rate": 3.4000000000000005e-06, + "loss": 360.8893, "step": 1700 }, { - "epoch": 0.01381717693258672, - "grad_norm": 1344.0513916015625, - "learning_rate": 6.840000000000001e-06, - "loss": 427.468, + "epoch": 0.006908616377864955, + "grad_norm": 611.8134155273438, + "learning_rate": 3.4200000000000003e-06, + "loss": 316.7729, "step": 1710 }, { - "epoch": 0.013897979136870854, - "grad_norm": 1003.258544921875, - "learning_rate": 6.88e-06, - "loss": 233.702, + "epoch": 0.006949017643232586, + "grad_norm": 3673.492919921875, + "learning_rate": 3.44e-06, + "loss": 446.8052, "step": 1720 }, { - "epoch": 0.013978781341154987, - "grad_norm": 1059.145751953125, - "learning_rate": 6.92e-06, - "loss": 272.7395, + "epoch": 0.0069894189086002175, + "grad_norm": 580.3406982421875, + "learning_rate": 3.46e-06, + "loss": 306.8866, "step": 1730 }, { - "epoch": 0.01405958354543912, - "grad_norm": 1443.6258544921875, - "learning_rate": 6.9599999999999994e-06, - "loss": 289.2196, + "epoch": 0.007029820173967849, + "grad_norm": 797.6139526367188, + "learning_rate": 3.4799999999999997e-06, + "loss": 360.2713, "step": 1740 }, { - "epoch": 0.014140385749723252, - "grad_norm": 1565.5257568359375, - "learning_rate": 7.000000000000001e-06, - "loss": 221.3261, + "epoch": 0.00707022143933548, + "grad_norm": 1184.318115234375, + "learning_rate": 3.5000000000000004e-06, + "loss": 360.695, "step": 1750 }, { - "epoch": 0.014221187954007385, - "grad_norm": 1084.2279052734375, - "learning_rate": 7.04e-06, - "loss": 263.6064, + "epoch": 0.0071106227047031114, + "grad_norm": 659.1159057617188, + "learning_rate": 3.52e-06, + "loss": 393.7832, "step": 1760 }, { - "epoch": 0.014301990158291518, - "grad_norm": 1349.6220703125, - "learning_rate": 7.080000000000001e-06, - "loss": 224.2884, + "epoch": 0.007151023970070743, + "grad_norm": 611.4116821289062, + "learning_rate": 3.5400000000000004e-06, + "loss": 302.0126, "step": 1770 }, { - "epoch": 0.01438279236257565, - "grad_norm": 732.1233520507812, - "learning_rate": 7.1200000000000004e-06, - "loss": 240.856, + "epoch": 0.007191425235438374, + "grad_norm": 2148.29541015625, + "learning_rate": 3.5600000000000002e-06, + "loss": 411.9108, "step": 1780 }, { - "epoch": 0.014463594566859785, - "grad_norm": 687.3779907226562, - "learning_rate": 7.16e-06, - "loss": 208.5219, + "epoch": 0.007231826500806005, + "grad_norm": 635.7216796875, + "learning_rate": 3.58e-06, + "loss": 236.3176, "step": 1790 }, { - "epoch": 0.014544396771143917, - "grad_norm": 1115.8699951171875, - "learning_rate": 7.2e-06, - "loss": 234.2945, + "epoch": 0.007272227766173637, + "grad_norm": 367.6564025878906, + "learning_rate": 3.6e-06, + "loss": 293.0558, "step": 1800 }, { - "epoch": 0.01462519897542805, - "grad_norm": 796.1567993164062, - "learning_rate": 7.240000000000001e-06, - "loss": 258.9677, + "epoch": 0.007312629031541268, + "grad_norm": 712.876220703125, + "learning_rate": 3.6200000000000005e-06, + "loss": 349.5894, "step": 1810 }, { - "epoch": 0.014706001179712182, - "grad_norm": 1313.21240234375, - "learning_rate": 7.280000000000001e-06, - "loss": 244.3882, + "epoch": 0.007353030296908899, + "grad_norm": 505.4992980957031, + "learning_rate": 3.6400000000000003e-06, + "loss": 300.7681, "step": 1820 }, { - "epoch": 0.014786803383996315, - "grad_norm": 2064.98095703125, - "learning_rate": 7.32e-06, - "loss": 274.6626, + "epoch": 0.007393431562276531, + "grad_norm": 852.1588745117188, + "learning_rate": 3.66e-06, + "loss": 408.5499, "step": 1830 }, { - "epoch": 0.014867605588280448, - "grad_norm": 1497.9664306640625, - "learning_rate": 7.36e-06, - "loss": 310.429, + "epoch": 0.007433832827644162, + "grad_norm": 1410.185791015625, + "learning_rate": 3.68e-06, + "loss": 253.6884, "step": 1840 }, { - "epoch": 0.01494840779256458, - "grad_norm": 1633.9671630859375, - "learning_rate": 7.4e-06, - "loss": 407.6417, + "epoch": 0.007474234093011793, + "grad_norm": 889.8245849609375, + "learning_rate": 3.7e-06, + "loss": 408.2236, "step": 1850 }, { - "epoch": 0.015029209996848715, - "grad_norm": 1377.6971435546875, - "learning_rate": 7.44e-06, - "loss": 251.2472, + "epoch": 0.007514635358379425, + "grad_norm": 906.4953002929688, + "learning_rate": 3.72e-06, + "loss": 397.8984, "step": 1860 }, { - "epoch": 0.015110012201132847, - "grad_norm": 727.3643798828125, - "learning_rate": 7.480000000000001e-06, - "loss": 306.3324, + "epoch": 0.007555036623747056, + "grad_norm": 0.0, + "learning_rate": 3.7400000000000006e-06, + "loss": 238.5037, "step": 1870 }, { - "epoch": 0.01519081440541698, - "grad_norm": 956.7907104492188, - "learning_rate": 7.520000000000001e-06, - "loss": 344.1451, + "epoch": 0.007595437889114687, + "grad_norm": 13512.4267578125, + "learning_rate": 3.7600000000000004e-06, + "loss": 348.5812, "step": 1880 }, { - "epoch": 0.015271616609701113, - "grad_norm": 1412.259521484375, - "learning_rate": 7.5600000000000005e-06, - "loss": 295.6861, + "epoch": 0.007635839154482319, + "grad_norm": 4588.23193359375, + "learning_rate": 3.7800000000000002e-06, + "loss": 364.4923, "step": 1890 }, { - "epoch": 0.015352418813985245, - "grad_norm": 958.1197509765625, - "learning_rate": 7.6e-06, - "loss": 297.5455, + "epoch": 0.00767624041984995, + "grad_norm": 803.103759765625, + "learning_rate": 3.8e-06, + "loss": 372.9157, "step": 1900 }, { - "epoch": 0.015433221018269378, - "grad_norm": 771.83837890625, - "learning_rate": 7.64e-06, - "loss": 277.1934, + "epoch": 0.007716641685217581, + "grad_norm": 1621.907958984375, + "learning_rate": 3.82e-06, + "loss": 360.8791, "step": 1910 }, { - "epoch": 0.015514023222553512, - "grad_norm": 1224.0018310546875, - "learning_rate": 7.68e-06, - "loss": 306.2997, + "epoch": 0.0077570429505852126, + "grad_norm": 592.7820434570312, + "learning_rate": 3.84e-06, + "loss": 375.443, "step": 1920 }, { - "epoch": 0.015594825426837645, - "grad_norm": 1169.1146240234375, - "learning_rate": 7.72e-06, - "loss": 249.0125, + "epoch": 0.007797444215952844, + "grad_norm": 2514.99462890625, + "learning_rate": 3.86e-06, + "loss": 332.9206, "step": 1930 }, { - "epoch": 0.015675627631121777, - "grad_norm": 1132.9344482421875, - "learning_rate": 7.76e-06, - "loss": 297.8244, + "epoch": 0.007837845481320474, + "grad_norm": 1829.7589111328125, + "learning_rate": 3.88e-06, + "loss": 442.6601, "step": 1940 }, { - "epoch": 0.01575642983540591, - "grad_norm": 1522.798583984375, - "learning_rate": 7.8e-06, - "loss": 270.4976, + "epoch": 0.007878246746688106, + "grad_norm": 897.420166015625, + "learning_rate": 3.9e-06, + "loss": 288.4365, "step": 1950 }, { - "epoch": 0.015837232039690043, - "grad_norm": 1786.5406494140625, - "learning_rate": 7.84e-06, - "loss": 246.7759, + "epoch": 0.007918648012055737, + "grad_norm": 1314.1402587890625, + "learning_rate": 3.92e-06, + "loss": 334.6603, "step": 1960 }, { - "epoch": 0.015918034243974175, - "grad_norm": 3162.774658203125, - "learning_rate": 7.879999999999999e-06, - "loss": 256.3574, + "epoch": 0.007959049277423368, + "grad_norm": 2495.09130859375, + "learning_rate": 3.9399999999999995e-06, + "loss": 434.7513, "step": 1970 }, { - "epoch": 0.015998836448258308, - "grad_norm": 1396.468017578125, - "learning_rate": 7.92e-06, - "loss": 308.9431, + "epoch": 0.007999450542791, + "grad_norm": 402.0351867675781, + "learning_rate": 3.96e-06, + "loss": 307.8936, "step": 1980 }, { - "epoch": 0.01607963865254244, - "grad_norm": 1186.03125, - "learning_rate": 7.96e-06, - "loss": 344.9355, + "epoch": 0.008039851808158631, + "grad_norm": 751.7537841796875, + "learning_rate": 3.98e-06, + "loss": 308.6638, "step": 1990 }, { - "epoch": 0.016160440856826573, - "grad_norm": 1449.1376953125, - "learning_rate": 8.000000000000001e-06, - "loss": 275.0313, + "epoch": 0.008080253073526262, + "grad_norm": 604.2171630859375, + "learning_rate": 4.000000000000001e-06, + "loss": 392.2409, "step": 2000 }, { - "epoch": 0.016241243061110706, - "grad_norm": 1768.13232421875, - "learning_rate": 8.040000000000001e-06, - "loss": 258.0124, + "epoch": 0.008120654338893894, + "grad_norm": 1126.428955078125, + "learning_rate": 4.0200000000000005e-06, + "loss": 318.5683, "step": 2010 }, { - "epoch": 0.01632204526539484, - "grad_norm": 2093.3515625, - "learning_rate": 8.08e-06, - "loss": 315.0089, + "epoch": 0.008161055604261525, + "grad_norm": 4945.88037109375, + "learning_rate": 4.04e-06, + "loss": 311.324, "step": 2020 }, { - "epoch": 0.016402847469678974, - "grad_norm": 1033.447021484375, - "learning_rate": 8.12e-06, - "loss": 313.4489, + "epoch": 0.008201456869629156, + "grad_norm": 745.3814697265625, + "learning_rate": 4.06e-06, + "loss": 381.683, "step": 2030 }, { - "epoch": 0.016483649673963107, - "grad_norm": 1200.64208984375, - "learning_rate": 8.160000000000001e-06, - "loss": 253.9576, + "epoch": 0.008241858134996788, + "grad_norm": 556.3217163085938, + "learning_rate": 4.080000000000001e-06, + "loss": 335.636, "step": 2040 }, { - "epoch": 0.01656445187824724, - "grad_norm": 1944.759033203125, - "learning_rate": 8.200000000000001e-06, - "loss": 298.4216, + "epoch": 0.008282259400364419, + "grad_norm": 1249.5528564453125, + "learning_rate": 4.1000000000000006e-06, + "loss": 216.6521, "step": 2050 }, { - "epoch": 0.016645254082531372, - "grad_norm": 3179.149658203125, - "learning_rate": 8.24e-06, - "loss": 298.9085, + "epoch": 0.00832266066573205, + "grad_norm": 4082.633544921875, + "learning_rate": 4.12e-06, + "loss": 422.9517, "step": 2060 }, { - "epoch": 0.016726056286815505, - "grad_norm": 1557.4892578125, - "learning_rate": 8.28e-06, - "loss": 260.3383, + "epoch": 0.008363061931099681, + "grad_norm": 1134.3583984375, + "learning_rate": 4.14e-06, + "loss": 368.1808, "step": 2070 }, { - "epoch": 0.016806858491099638, - "grad_norm": 1269.2259521484375, - "learning_rate": 8.32e-06, - "loss": 352.0171, + "epoch": 0.008403463196467313, + "grad_norm": 473.27655029296875, + "learning_rate": 4.16e-06, + "loss": 323.5618, "step": 2080 }, { - "epoch": 0.01688766069538377, - "grad_norm": 1645.8583984375, - "learning_rate": 8.36e-06, - "loss": 310.5882, + "epoch": 0.008443864461834944, + "grad_norm": 1147.6612548828125, + "learning_rate": 4.18e-06, + "loss": 358.5086, "step": 2090 }, { - "epoch": 0.016968462899667903, - "grad_norm": 948.7568969726562, - "learning_rate": 8.400000000000001e-06, - "loss": 253.1353, + "epoch": 0.008484265727202575, + "grad_norm": 716.1121826171875, + "learning_rate": 4.2000000000000004e-06, + "loss": 289.7142, "step": 2100 }, { - "epoch": 0.017049265103952035, - "grad_norm": 968.5134887695312, - "learning_rate": 8.44e-06, - "loss": 253.9631, + "epoch": 0.008524666992570207, + "grad_norm": 595.6019287109375, + "learning_rate": 4.22e-06, + "loss": 249.1531, "step": 2110 }, { - "epoch": 0.017130067308236168, - "grad_norm": 1202.3250732421875, - "learning_rate": 8.48e-06, - "loss": 326.3459, + "epoch": 0.008565068257937838, + "grad_norm": 1831.8653564453125, + "learning_rate": 4.24e-06, + "loss": 336.2978, "step": 2120 }, { - "epoch": 0.0172108695125203, - "grad_norm": 940.4733276367188, - "learning_rate": 8.52e-06, - "loss": 349.8383, + "epoch": 0.00860546952330547, + "grad_norm": 549.8616943359375, + "learning_rate": 4.26e-06, + "loss": 310.2739, "step": 2130 }, { - "epoch": 0.017291671716804433, - "grad_norm": 1564.2154541015625, - "learning_rate": 8.56e-06, - "loss": 327.3158, + "epoch": 0.0086458707886731, + "grad_norm": 888.9124755859375, + "learning_rate": 4.28e-06, + "loss": 372.0684, "step": 2140 }, { - "epoch": 0.017372473921088566, - "grad_norm": 3648.453369140625, - "learning_rate": 8.599999999999999e-06, - "loss": 203.9852, + "epoch": 0.008686272054040732, + "grad_norm": 654.9121704101562, + "learning_rate": 4.2999999999999995e-06, + "loss": 218.5617, "step": 2150 }, { - "epoch": 0.0174532761253727, - "grad_norm": 1549.84130859375, - "learning_rate": 8.64e-06, - "loss": 293.8607, + "epoch": 0.008726673319408363, + "grad_norm": 1140.73486328125, + "learning_rate": 4.32e-06, + "loss": 396.7231, "step": 2160 }, { - "epoch": 0.017534078329656835, - "grad_norm": 1631.2021484375, - "learning_rate": 8.68e-06, - "loss": 218.4349, + "epoch": 0.008767074584775995, + "grad_norm": 818.670166015625, + "learning_rate": 4.34e-06, + "loss": 335.7865, "step": 2170 }, { - "epoch": 0.017614880533940967, - "grad_norm": 1058.605712890625, - "learning_rate": 8.720000000000001e-06, - "loss": 223.0288, + "epoch": 0.008807475850143626, + "grad_norm": 792.7094116210938, + "learning_rate": 4.360000000000001e-06, + "loss": 321.234, "step": 2180 }, { - "epoch": 0.0176956827382251, - "grad_norm": 1162.4736328125, - "learning_rate": 8.76e-06, - "loss": 286.7649, + "epoch": 0.008847877115511257, + "grad_norm": 2842.64697265625, + "learning_rate": 4.38e-06, + "loss": 247.4936, "step": 2190 }, { - "epoch": 0.017776484942509233, - "grad_norm": 979.60595703125, - "learning_rate": 8.8e-06, - "loss": 289.1377, + "epoch": 0.008888278380878889, + "grad_norm": 756.8375244140625, + "learning_rate": 4.4e-06, + "loss": 358.118, "step": 2200 }, { - "epoch": 0.017857287146793365, - "grad_norm": 1633.070068359375, - "learning_rate": 8.840000000000002e-06, - "loss": 225.9887, + "epoch": 0.00892867964624652, + "grad_norm": 447.1920166015625, + "learning_rate": 4.420000000000001e-06, + "loss": 243.4523, "step": 2210 }, { - "epoch": 0.017938089351077498, - "grad_norm": 888.2476806640625, - "learning_rate": 8.880000000000001e-06, - "loss": 252.1924, + "epoch": 0.008969080911614151, + "grad_norm": 598.7188110351562, + "learning_rate": 4.440000000000001e-06, + "loss": 317.7062, "step": 2220 }, { - "epoch": 0.01801889155536163, - "grad_norm": 1197.532470703125, - "learning_rate": 8.920000000000001e-06, - "loss": 250.3678, + "epoch": 0.009009482176981783, + "grad_norm": 782.6261596679688, + "learning_rate": 4.4600000000000005e-06, + "loss": 423.8147, "step": 2230 }, { - "epoch": 0.018099693759645763, - "grad_norm": 1812.9381103515625, - "learning_rate": 8.96e-06, - "loss": 257.2437, + "epoch": 0.009049883442349414, + "grad_norm": 650.9381103515625, + "learning_rate": 4.48e-06, + "loss": 372.274, "step": 2240 }, { - "epoch": 0.018180495963929896, - "grad_norm": 1018.7303466796875, - "learning_rate": 9e-06, - "loss": 246.5754, + "epoch": 0.009090284707717045, + "grad_norm": 911.272216796875, + "learning_rate": 4.5e-06, + "loss": 309.0599, "step": 2250 }, { - "epoch": 0.018261298168214028, - "grad_norm": 2371.834228515625, - "learning_rate": 9.04e-06, - "loss": 233.2349, + "epoch": 0.009130685973084677, + "grad_norm": 1821.9324951171875, + "learning_rate": 4.52e-06, + "loss": 291.9244, "step": 2260 }, { - "epoch": 0.01834210037249816, - "grad_norm": 618.4129638671875, - "learning_rate": 9.080000000000001e-06, - "loss": 238.5665, + "epoch": 0.009171087238452308, + "grad_norm": 1428.2496337890625, + "learning_rate": 4.540000000000001e-06, + "loss": 299.4853, "step": 2270 }, { - "epoch": 0.018422902576782293, - "grad_norm": 1489.2613525390625, - "learning_rate": 9.12e-06, - "loss": 368.2135, + "epoch": 0.00921148850381994, + "grad_norm": 474.319580078125, + "learning_rate": 4.56e-06, + "loss": 284.1177, "step": 2280 }, { - "epoch": 0.018503704781066426, - "grad_norm": 982.7802734375, - "learning_rate": 9.16e-06, - "loss": 278.2691, + "epoch": 0.00925188976918757, + "grad_norm": 521.900146484375, + "learning_rate": 4.58e-06, + "loss": 305.8711, "step": 2290 }, { - "epoch": 0.01858450698535056, - "grad_norm": 1286.537353515625, - "learning_rate": 9.2e-06, - "loss": 343.7667, + "epoch": 0.009292291034555202, + "grad_norm": 742.1409301757812, + "learning_rate": 4.6e-06, + "loss": 233.3259, "step": 2300 }, { - "epoch": 0.018665309189634695, - "grad_norm": 1177.6224365234375, - "learning_rate": 9.24e-06, - "loss": 198.3753, + "epoch": 0.009332692299922833, + "grad_norm": 702.3360595703125, + "learning_rate": 4.62e-06, + "loss": 351.8805, "step": 2310 }, { - "epoch": 0.018746111393918827, - "grad_norm": 783.3850708007812, - "learning_rate": 9.28e-06, - "loss": 208.5219, + "epoch": 0.009373093565290464, + "grad_norm": 969.7785034179688, + "learning_rate": 4.64e-06, + "loss": 273.1551, "step": 2320 }, { - "epoch": 0.01882691359820296, - "grad_norm": 1449.9591064453125, - "learning_rate": 9.32e-06, - "loss": 306.1865, + "epoch": 0.009413494830658096, + "grad_norm": 7805.5947265625, + "learning_rate": 4.66e-06, + "loss": 303.4039, "step": 2330 }, { - "epoch": 0.018907715802487093, - "grad_norm": 1271.75537109375, - "learning_rate": 9.36e-06, - "loss": 261.487, + "epoch": 0.009453896096025727, + "grad_norm": 683.2047119140625, + "learning_rate": 4.68e-06, + "loss": 299.0721, "step": 2340 }, { - "epoch": 0.018988518006771225, - "grad_norm": 5165.896484375, - "learning_rate": 9.4e-06, - "loss": 247.9272, + "epoch": 0.009494297361393358, + "grad_norm": 433.588134765625, + "learning_rate": 4.7e-06, + "loss": 275.1868, "step": 2350 }, { - "epoch": 0.019069320211055358, - "grad_norm": 6211.90771484375, - "learning_rate": 9.44e-06, - "loss": 261.6099, + "epoch": 0.00953469862676099, + "grad_norm": 970.4158325195312, + "learning_rate": 4.72e-06, + "loss": 508.6281, "step": 2360 }, { - "epoch": 0.01915012241533949, - "grad_norm": 1567.0113525390625, - "learning_rate": 9.48e-06, - "loss": 244.8898, + "epoch": 0.009575099892128621, + "grad_norm": 1364.8466796875, + "learning_rate": 4.74e-06, + "loss": 333.4644, "step": 2370 }, { - "epoch": 0.019230924619623623, - "grad_norm": 1807.3084716796875, - "learning_rate": 9.52e-06, - "loss": 284.4969, + "epoch": 0.009615501157496252, + "grad_norm": 1310.9354248046875, + "learning_rate": 4.76e-06, + "loss": 271.3296, "step": 2380 }, { - "epoch": 0.019311726823907756, - "grad_norm": 1295.265625, - "learning_rate": 9.560000000000002e-06, - "loss": 269.5252, + "epoch": 0.009655902422863884, + "grad_norm": 551.7647094726562, + "learning_rate": 4.780000000000001e-06, + "loss": 339.265, "step": 2390 }, { - "epoch": 0.01939252902819189, - "grad_norm": 725.8295288085938, - "learning_rate": 9.600000000000001e-06, - "loss": 201.9719, + "epoch": 0.009696303688231515, + "grad_norm": 542.0902709960938, + "learning_rate": 4.800000000000001e-06, + "loss": 375.706, "step": 2400 }, { - "epoch": 0.01947333123247602, - "grad_norm": 1188.0689697265625, - "learning_rate": 9.640000000000001e-06, - "loss": 271.0631, + "epoch": 0.009736704953599146, + "grad_norm": 517.1104736328125, + "learning_rate": 4.8200000000000004e-06, + "loss": 183.125, "step": 2410 }, { - "epoch": 0.019554133436760154, - "grad_norm": 4583.4580078125, - "learning_rate": 9.68e-06, - "loss": 251.5727, + "epoch": 0.009777106218966778, + "grad_norm": 1165.0673828125, + "learning_rate": 4.84e-06, + "loss": 303.7644, "step": 2420 }, { - "epoch": 0.019634935641044286, - "grad_norm": 789.2398071289062, - "learning_rate": 9.72e-06, - "loss": 244.2227, + "epoch": 0.009817507484334409, + "grad_norm": 4149.34130859375, + "learning_rate": 4.86e-06, + "loss": 361.1945, "step": 2430 }, { - "epoch": 0.01971573784532842, - "grad_norm": 2027.4886474609375, - "learning_rate": 9.760000000000001e-06, - "loss": 275.5922, + "epoch": 0.00985790874970204, + "grad_norm": 957.4842529296875, + "learning_rate": 4.880000000000001e-06, + "loss": 238.9853, "step": 2440 }, { - "epoch": 0.019796540049612555, - "grad_norm": 1207.19384765625, - "learning_rate": 9.800000000000001e-06, - "loss": 272.8758, + "epoch": 0.009898310015069672, + "grad_norm": 493.18341064453125, + "learning_rate": 4.9000000000000005e-06, + "loss": 253.369, "step": 2450 }, { - "epoch": 0.019877342253896688, - "grad_norm": 1035.569580078125, - "learning_rate": 9.84e-06, - "loss": 235.8457, + "epoch": 0.009938711280437303, + "grad_norm": 913.3916015625, + "learning_rate": 4.92e-06, + "loss": 197.3718, "step": 2460 }, { - "epoch": 0.01995814445818082, - "grad_norm": 1105.7423095703125, - "learning_rate": 9.88e-06, - "loss": 304.1108, + "epoch": 0.009979112545804934, + "grad_norm": 641.3722534179688, + "learning_rate": 4.94e-06, + "loss": 359.7244, "step": 2470 }, { - "epoch": 0.020038946662464953, - "grad_norm": 1206.0045166015625, - "learning_rate": 9.92e-06, - "loss": 260.7186, + "epoch": 0.010019513811172566, + "grad_norm": 805.710693359375, + "learning_rate": 4.96e-06, + "loss": 314.5141, "step": 2480 }, { - "epoch": 0.020119748866749085, - "grad_norm": 1386.5986328125, - "learning_rate": 9.96e-06, - "loss": 352.3332, + "epoch": 0.010059915076540197, + "grad_norm": 979.5321655273438, + "learning_rate": 4.98e-06, + "loss": 402.7761, "step": 2490 }, { - "epoch": 0.020200551071033218, - "grad_norm": 1350.6053466796875, - "learning_rate": 1e-05, - "loss": 291.9337, + "epoch": 0.010100316341907828, + "grad_norm": 434.0482177734375, + "learning_rate": 5e-06, + "loss": 190.8965, "step": 2500 }, { - "epoch": 0.02028135327531735, - "grad_norm": 1102.558349609375, - "learning_rate": 1.004e-05, - "loss": 198.0854, + "epoch": 0.01014071760727546, + "grad_norm": 907.0119018554688, + "learning_rate": 5.02e-06, + "loss": 269.7839, "step": 2510 }, { - "epoch": 0.020362155479601483, - "grad_norm": 1326.783935546875, - "learning_rate": 1.008e-05, - "loss": 296.1028, + "epoch": 0.010181118872643091, + "grad_norm": 363.0292053222656, + "learning_rate": 5.04e-06, + "loss": 258.706, "step": 2520 }, { - "epoch": 0.020442957683885616, - "grad_norm": 1940.3212890625, - "learning_rate": 1.012e-05, - "loss": 271.7902, + "epoch": 0.010221520138010722, + "grad_norm": 794.2672729492188, + "learning_rate": 5.06e-06, + "loss": 220.4875, "step": 2530 }, { - "epoch": 0.02052375988816975, - "grad_norm": 1627.4825439453125, - "learning_rate": 1.016e-05, - "loss": 252.0585, + "epoch": 0.010261921403378354, + "grad_norm": 956.221923828125, + "learning_rate": 5.08e-06, + "loss": 283.6568, "step": 2540 }, { - "epoch": 0.02060456209245388, - "grad_norm": 796.6646728515625, - "learning_rate": 1.02e-05, - "loss": 276.719, + "epoch": 0.010302322668745985, + "grad_norm": 779.488037109375, + "learning_rate": 5.1e-06, + "loss": 356.618, "step": 2550 }, { - "epoch": 0.020685364296738014, - "grad_norm": 972.654541015625, - "learning_rate": 1.024e-05, - "loss": 240.4665, + "epoch": 0.010342723934113616, + "grad_norm": 505.3084716796875, + "learning_rate": 5.12e-06, + "loss": 195.6782, "step": 2560 }, { - "epoch": 0.020766166501022146, - "grad_norm": 1098.8206787109375, - "learning_rate": 1.0280000000000002e-05, - "loss": 200.4528, + "epoch": 0.010383125199481247, + "grad_norm": 1260.047119140625, + "learning_rate": 5.140000000000001e-06, + "loss": 321.1118, "step": 2570 }, { - "epoch": 0.02084696870530628, - "grad_norm": 1253.0360107421875, - "learning_rate": 1.0320000000000001e-05, - "loss": 249.324, + "epoch": 0.010423526464848879, + "grad_norm": 776.1239624023438, + "learning_rate": 5.1600000000000006e-06, + "loss": 303.4421, "step": 2580 }, { - "epoch": 0.020927770909590415, - "grad_norm": 1241.728271484375, - "learning_rate": 1.036e-05, - "loss": 259.3147, + "epoch": 0.01046392773021651, + "grad_norm": 899.60009765625, + "learning_rate": 5.18e-06, + "loss": 267.6064, "step": 2590 }, { - "epoch": 0.021008573113874548, - "grad_norm": 896.2220458984375, - "learning_rate": 1.04e-05, - "loss": 254.4159, + "epoch": 0.010504328995584141, + "grad_norm": 1198.875732421875, + "learning_rate": 5.2e-06, + "loss": 311.9983, "step": 2600 }, { - "epoch": 0.02108937531815868, - "grad_norm": 1116.9381103515625, - "learning_rate": 1.0440000000000002e-05, - "loss": 255.4192, + "epoch": 0.010544730260951773, + "grad_norm": 2578.52734375, + "learning_rate": 5.220000000000001e-06, + "loss": 285.3186, "step": 2610 }, { - "epoch": 0.021170177522442813, - "grad_norm": 1615.5068359375, - "learning_rate": 1.0480000000000001e-05, - "loss": 303.0913, + "epoch": 0.010585131526319404, + "grad_norm": 613.3502807617188, + "learning_rate": 5.240000000000001e-06, + "loss": 407.8097, "step": 2620 }, { - "epoch": 0.021250979726726946, - "grad_norm": 1435.218994140625, - "learning_rate": 1.0520000000000001e-05, - "loss": 301.8539, + "epoch": 0.010625532791687035, + "grad_norm": 724.2944946289062, + "learning_rate": 5.2600000000000005e-06, + "loss": 375.3231, "step": 2630 }, { - "epoch": 0.02133178193101108, - "grad_norm": 4280.59912109375, - "learning_rate": 1.056e-05, - "loss": 256.9668, + "epoch": 0.010665934057054667, + "grad_norm": 625.8546752929688, + "learning_rate": 5.28e-06, + "loss": 337.0123, "step": 2640 }, { - "epoch": 0.02141258413529521, - "grad_norm": 1075.5289306640625, - "learning_rate": 1.06e-05, - "loss": 269.062, + "epoch": 0.010706335322422298, + "grad_norm": 2968.8515625, + "learning_rate": 5.3e-06, + "loss": 279.1451, "step": 2650 }, { - "epoch": 0.021493386339579344, - "grad_norm": 2138.95458984375, - "learning_rate": 1.064e-05, - "loss": 279.204, + "epoch": 0.01074673658778993, + "grad_norm": 507.51885986328125, + "learning_rate": 5.32e-06, + "loss": 214.3604, "step": 2660 }, { - "epoch": 0.021574188543863476, - "grad_norm": 1505.0517578125, - "learning_rate": 1.0680000000000001e-05, - "loss": 252.0488, + "epoch": 0.01078713785315756, + "grad_norm": 738.8612060546875, + "learning_rate": 5.3400000000000005e-06, + "loss": 332.2985, "step": 2670 }, { - "epoch": 0.02165499074814761, - "grad_norm": 1122.780517578125, - "learning_rate": 1.072e-05, - "loss": 297.5027, + "epoch": 0.010827539118525192, + "grad_norm": 684.1400146484375, + "learning_rate": 5.36e-06, + "loss": 330.1413, "step": 2680 }, { - "epoch": 0.02173579295243174, - "grad_norm": 1419.3516845703125, - "learning_rate": 1.076e-05, - "loss": 302.7382, + "epoch": 0.010867940383892823, + "grad_norm": 6400.771484375, + "learning_rate": 5.38e-06, + "loss": 344.3884, "step": 2690 }, { - "epoch": 0.021816595156715874, - "grad_norm": 1287.330322265625, - "learning_rate": 1.08e-05, - "loss": 328.1729, + "epoch": 0.010908341649260455, + "grad_norm": 986.3028564453125, + "learning_rate": 5.4e-06, + "loss": 354.2751, "step": 2700 }, { - "epoch": 0.021897397361000007, - "grad_norm": 1536.410400390625, - "learning_rate": 1.084e-05, - "loss": 277.5261, + "epoch": 0.010948742914628086, + "grad_norm": 933.0596313476562, + "learning_rate": 5.42e-06, + "loss": 367.7645, "step": 2710 }, { - "epoch": 0.02197819956528414, - "grad_norm": 1302.607177734375, - "learning_rate": 1.088e-05, - "loss": 306.6451, + "epoch": 0.010989144179995717, + "grad_norm": 956.349365234375, + "learning_rate": 5.44e-06, + "loss": 293.7472, "step": 2720 }, { - "epoch": 0.022059001769568275, - "grad_norm": 1333.87060546875, - "learning_rate": 1.092e-05, - "loss": 422.3587, + "epoch": 0.011029545445363349, + "grad_norm": 473.7074890136719, + "learning_rate": 5.46e-06, + "loss": 281.8573, "step": 2730 }, { - "epoch": 0.022139803973852408, - "grad_norm": 2312.619384765625, - "learning_rate": 1.096e-05, - "loss": 259.5899, + "epoch": 0.01106994671073098, + "grad_norm": 1302.126953125, + "learning_rate": 5.48e-06, + "loss": 275.9536, "step": 2740 }, { - "epoch": 0.02222060617813654, - "grad_norm": 1443.182861328125, - "learning_rate": 1.1000000000000001e-05, - "loss": 286.23, + "epoch": 0.011110347976098611, + "grad_norm": 557.58203125, + "learning_rate": 5.500000000000001e-06, + "loss": 205.2503, "step": 2750 }, { - "epoch": 0.022301408382420673, - "grad_norm": 800.3855590820312, - "learning_rate": 1.1040000000000001e-05, - "loss": 243.2208, + "epoch": 0.011150749241466243, + "grad_norm": 3368.84228515625, + "learning_rate": 5.5200000000000005e-06, + "loss": 328.5797, "step": 2760 }, { - "epoch": 0.022382210586704806, - "grad_norm": 3863.56884765625, - "learning_rate": 1.108e-05, - "loss": 236.9019, + "epoch": 0.011191150506833874, + "grad_norm": 1747.6932373046875, + "learning_rate": 5.54e-06, + "loss": 246.7141, "step": 2770 }, { - "epoch": 0.02246301279098894, - "grad_norm": 1288.9324951171875, - "learning_rate": 1.112e-05, - "loss": 251.4443, + "epoch": 0.011231551772201505, + "grad_norm": 871.47802734375, + "learning_rate": 5.56e-06, + "loss": 261.7437, "step": 2780 }, { - "epoch": 0.02254381499527307, - "grad_norm": 1568.700927734375, - "learning_rate": 1.1160000000000002e-05, - "loss": 183.5424, + "epoch": 0.011271953037569137, + "grad_norm": 1074.76416015625, + "learning_rate": 5.580000000000001e-06, + "loss": 292.0746, "step": 2790 }, { - "epoch": 0.022624617199557204, - "grad_norm": 972.0908203125, - "learning_rate": 1.1200000000000001e-05, - "loss": 244.7627, + "epoch": 0.011312354302936768, + "grad_norm": 575.856689453125, + "learning_rate": 5.600000000000001e-06, + "loss": 247.1602, "step": 2800 }, { - "epoch": 0.022705419403841336, - "grad_norm": 1498.342041015625, - "learning_rate": 1.124e-05, - "loss": 224.7318, + "epoch": 0.0113527555683044, + "grad_norm": 847.0436401367188, + "learning_rate": 5.62e-06, + "loss": 244.562, "step": 2810 }, { - "epoch": 0.02278622160812547, - "grad_norm": 1999.7972412109375, - "learning_rate": 1.128e-05, - "loss": 289.9476, + "epoch": 0.01139315683367203, + "grad_norm": 479.6651306152344, + "learning_rate": 5.64e-06, + "loss": 196.8626, "step": 2820 }, { - "epoch": 0.0228670238124096, - "grad_norm": 1346.731201171875, - "learning_rate": 1.132e-05, - "loss": 284.2617, + "epoch": 0.011433558099039662, + "grad_norm": 1245.398681640625, + "learning_rate": 5.66e-06, + "loss": 338.9747, "step": 2830 }, { - "epoch": 0.022947826016693734, - "grad_norm": 702.3638305664062, - "learning_rate": 1.1360000000000001e-05, - "loss": 211.2037, + "epoch": 0.011473959364407293, + "grad_norm": 502.24951171875, + "learning_rate": 5.680000000000001e-06, + "loss": 243.1993, "step": 2840 }, { - "epoch": 0.023028628220977867, - "grad_norm": 1725.1688232421875, - "learning_rate": 1.1400000000000001e-05, - "loss": 328.617, + "epoch": 0.011514360629774924, + "grad_norm": 790.6267700195312, + "learning_rate": 5.7000000000000005e-06, + "loss": 289.827, "step": 2850 }, { - "epoch": 0.023109430425262, - "grad_norm": 769.3167724609375, - "learning_rate": 1.144e-05, - "loss": 228.8884, + "epoch": 0.011554761895142556, + "grad_norm": 996.7022094726562, + "learning_rate": 5.72e-06, + "loss": 285.2487, "step": 2860 }, { - "epoch": 0.023190232629546136, - "grad_norm": 597.2233276367188, - "learning_rate": 1.148e-05, - "loss": 203.8586, + "epoch": 0.011595163160510187, + "grad_norm": 1381.54541015625, + "learning_rate": 5.74e-06, + "loss": 299.6274, "step": 2870 }, { - "epoch": 0.023271034833830268, - "grad_norm": 1434.7232666015625, - "learning_rate": 1.152e-05, - "loss": 307.0999, + "epoch": 0.011635564425877818, + "grad_norm": 674.7173461914062, + "learning_rate": 5.76e-06, + "loss": 212.4647, "step": 2880 }, { - "epoch": 0.0233518370381144, - "grad_norm": 1075.2532958984375, - "learning_rate": 1.156e-05, - "loss": 220.6475, + "epoch": 0.01167596569124545, + "grad_norm": 1115.6590576171875, + "learning_rate": 5.78e-06, + "loss": 350.1185, "step": 2890 }, { - "epoch": 0.023432639242398533, - "grad_norm": 2138.16748046875, - "learning_rate": 1.16e-05, - "loss": 274.6861, + "epoch": 0.011716366956613081, + "grad_norm": 967.9352416992188, + "learning_rate": 5.8e-06, + "loss": 303.8514, "step": 2900 }, { - "epoch": 0.023513441446682666, - "grad_norm": 1381.7962646484375, - "learning_rate": 1.164e-05, - "loss": 212.5038, + "epoch": 0.011756768221980712, + "grad_norm": 826.4132690429688, + "learning_rate": 5.82e-06, + "loss": 390.5727, "step": 2910 }, { - "epoch": 0.0235942436509668, - "grad_norm": 1554.7813720703125, - "learning_rate": 1.168e-05, - "loss": 248.4523, + "epoch": 0.011797169487348344, + "grad_norm": 554.1842651367188, + "learning_rate": 5.84e-06, + "loss": 228.6073, "step": 2920 }, { - "epoch": 0.02367504585525093, - "grad_norm": 1465.4317626953125, - "learning_rate": 1.172e-05, - "loss": 368.4438, + "epoch": 0.011837570752715975, + "grad_norm": 555.0242919921875, + "learning_rate": 5.86e-06, + "loss": 237.2674, "step": 2930 }, { - "epoch": 0.023755848059535064, - "grad_norm": 990.619873046875, - "learning_rate": 1.1760000000000001e-05, - "loss": 246.5309, + "epoch": 0.011877972018083606, + "grad_norm": 554.800048828125, + "learning_rate": 5.8800000000000005e-06, + "loss": 307.6302, "step": 2940 }, { - "epoch": 0.023836650263819197, - "grad_norm": 1547.040283203125, - "learning_rate": 1.18e-05, - "loss": 314.3385, + "epoch": 0.011918373283451238, + "grad_norm": 7435.06005859375, + "learning_rate": 5.9e-06, + "loss": 363.9746, "step": 2950 }, { - "epoch": 0.02391745246810333, - "grad_norm": 793.2789306640625, - "learning_rate": 1.1840000000000002e-05, - "loss": 202.7129, + "epoch": 0.011958774548818869, + "grad_norm": 3039.5869140625, + "learning_rate": 5.920000000000001e-06, + "loss": 294.7946, "step": 2960 }, { - "epoch": 0.023998254672387462, - "grad_norm": 1068.3546142578125, - "learning_rate": 1.1880000000000001e-05, - "loss": 227.2545, + "epoch": 0.0119991758141865, + "grad_norm": 9456.8857421875, + "learning_rate": 5.940000000000001e-06, + "loss": 348.9202, "step": 2970 }, { - "epoch": 0.024079056876671594, - "grad_norm": 1186.552001953125, - "learning_rate": 1.1920000000000001e-05, - "loss": 238.5089, + "epoch": 0.012039577079554132, + "grad_norm": 833.540771484375, + "learning_rate": 5.9600000000000005e-06, + "loss": 302.1644, "step": 2980 }, { - "epoch": 0.024159859080955727, - "grad_norm": 1320.97021484375, - "learning_rate": 1.196e-05, - "loss": 262.4691, + "epoch": 0.012079978344921763, + "grad_norm": 1317.080810546875, + "learning_rate": 5.98e-06, + "loss": 346.3184, "step": 2990 }, { - "epoch": 0.02424066128523986, - "grad_norm": 1807.1370849609375, - "learning_rate": 1.2e-05, - "loss": 180.7167, + "epoch": 0.012120379610289394, + "grad_norm": 546.1310424804688, + "learning_rate": 6e-06, + "loss": 185.3799, "step": 3000 }, { - "epoch": 0.024321463489523996, - "grad_norm": 1258.6356201171875, - "learning_rate": 1.204e-05, - "loss": 197.6735, + "epoch": 0.012160780875657026, + "grad_norm": 1277.5128173828125, + "learning_rate": 6.02e-06, + "loss": 271.2112, "step": 3010 }, { - "epoch": 0.02440226569380813, - "grad_norm": 1839.364501953125, - "learning_rate": 1.2080000000000001e-05, - "loss": 231.7536, + "epoch": 0.012201182141024657, + "grad_norm": 651.244384765625, + "learning_rate": 6.040000000000001e-06, + "loss": 206.7404, "step": 3020 }, { - "epoch": 0.02448306789809226, - "grad_norm": 1132.482666015625, - "learning_rate": 1.2120000000000001e-05, - "loss": 247.8347, + "epoch": 0.012241583406392288, + "grad_norm": 886.7020874023438, + "learning_rate": 6.0600000000000004e-06, + "loss": 309.8574, "step": 3030 }, { - "epoch": 0.024563870102376394, - "grad_norm": 3672.969970703125, - "learning_rate": 1.216e-05, - "loss": 253.7983, + "epoch": 0.01228198467175992, + "grad_norm": 765.9307861328125, + "learning_rate": 6.08e-06, + "loss": 412.906, "step": 3040 }, { - "epoch": 0.024644672306660526, - "grad_norm": 1456.1275634765625, - "learning_rate": 1.22e-05, - "loss": 262.7614, + "epoch": 0.01232238593712755, + "grad_norm": 1054.11669921875, + "learning_rate": 6.1e-06, + "loss": 356.0523, "step": 3050 }, { - "epoch": 0.02472547451094466, - "grad_norm": 1266.7822265625, - "learning_rate": 1.224e-05, - "loss": 364.5354, + "epoch": 0.012362787202495182, + "grad_norm": 786.2109375, + "learning_rate": 6.12e-06, + "loss": 260.9853, "step": 3060 }, { - "epoch": 0.02480627671522879, - "grad_norm": 1553.5853271484375, - "learning_rate": 1.2280000000000001e-05, - "loss": 259.4998, + "epoch": 0.012403188467862813, + "grad_norm": 1215.10400390625, + "learning_rate": 6.1400000000000005e-06, + "loss": 259.1632, "step": 3070 }, { - "epoch": 0.024887078919512924, - "grad_norm": 982.126953125, - "learning_rate": 1.232e-05, - "loss": 245.7501, + "epoch": 0.012443589733230445, + "grad_norm": 943.6842651367188, + "learning_rate": 6.16e-06, + "loss": 216.0937, "step": 3080 }, { - "epoch": 0.024967881123797057, - "grad_norm": 1078.702392578125, - "learning_rate": 1.236e-05, - "loss": 248.4797, + "epoch": 0.012483990998598076, + "grad_norm": 1460.5831298828125, + "learning_rate": 6.18e-06, + "loss": 337.1417, "step": 3090 }, { - "epoch": 0.02504868332808119, - "grad_norm": 1744.9505615234375, - "learning_rate": 1.24e-05, - "loss": 257.4596, + "epoch": 0.012524392263965707, + "grad_norm": 3781.843017578125, + "learning_rate": 6.2e-06, + "loss": 374.3547, "step": 3100 }, { - "epoch": 0.025129485532365322, - "grad_norm": 2129.53955078125, - "learning_rate": 1.244e-05, - "loss": 325.0301, + "epoch": 0.012564793529333339, + "grad_norm": 676.937744140625, + "learning_rate": 6.22e-06, + "loss": 350.8003, "step": 3110 }, { - "epoch": 0.025210287736649455, - "grad_norm": 1200.5882568359375, - "learning_rate": 1.248e-05, - "loss": 202.2953, + "epoch": 0.01260519479470097, + "grad_norm": 1300.2470703125, + "learning_rate": 6.24e-06, + "loss": 354.9542, "step": 3120 }, { - "epoch": 0.025291089940933587, - "grad_norm": 2342.281494140625, - "learning_rate": 1.252e-05, - "loss": 281.442, + "epoch": 0.012645596060068601, + "grad_norm": 715.6893310546875, + "learning_rate": 6.26e-06, + "loss": 225.5772, "step": 3130 }, { - "epoch": 0.02537189214521772, - "grad_norm": 1983.6246337890625, - "learning_rate": 1.256e-05, - "loss": 307.9054, + "epoch": 0.012685997325436233, + "grad_norm": 438.3706359863281, + "learning_rate": 6.28e-06, + "loss": 262.8397, "step": 3140 }, { - "epoch": 0.025452694349501856, - "grad_norm": 3306.292236328125, - "learning_rate": 1.2600000000000001e-05, - "loss": 285.5872, + "epoch": 0.012726398590803864, + "grad_norm": 5010.02392578125, + "learning_rate": 6.300000000000001e-06, + "loss": 282.8991, "step": 3150 }, { - "epoch": 0.02553349655378599, - "grad_norm": 1246.3935546875, - "learning_rate": 1.2640000000000003e-05, - "loss": 221.9955, + "epoch": 0.012766799856171495, + "grad_norm": 1642.5548095703125, + "learning_rate": 6.320000000000001e-06, + "loss": 273.512, "step": 3160 }, { - "epoch": 0.02561429875807012, - "grad_norm": 2914.101806640625, - "learning_rate": 1.268e-05, - "loss": 305.7593, + "epoch": 0.012807201121539127, + "grad_norm": 484.15069580078125, + "learning_rate": 6.34e-06, + "loss": 190.9169, "step": 3170 }, { - "epoch": 0.025695100962354254, - "grad_norm": 1304.0582275390625, - "learning_rate": 1.2720000000000002e-05, - "loss": 263.157, + "epoch": 0.012847602386906758, + "grad_norm": 1157.349365234375, + "learning_rate": 6.360000000000001e-06, + "loss": 378.3686, "step": 3180 }, { - "epoch": 0.025775903166638386, - "grad_norm": 2030.8707275390625, - "learning_rate": 1.276e-05, - "loss": 273.5134, + "epoch": 0.01288800365227439, + "grad_norm": 547.7994384765625, + "learning_rate": 6.38e-06, + "loss": 241.6315, "step": 3190 }, { - "epoch": 0.02585670537092252, - "grad_norm": 764.6029052734375, - "learning_rate": 1.2800000000000001e-05, - "loss": 295.3281, + "epoch": 0.01292840491764202, + "grad_norm": 0.0, + "learning_rate": 6.4000000000000006e-06, + "loss": 233.8554, "step": 3200 }, { - "epoch": 0.02593750757520665, - "grad_norm": 1144.525634765625, - "learning_rate": 1.2839999999999999e-05, - "loss": 243.3899, + "epoch": 0.012968806183009652, + "grad_norm": 953.7396850585938, + "learning_rate": 6.4199999999999995e-06, + "loss": 316.3731, "step": 3210 }, { - "epoch": 0.026018309779490784, - "grad_norm": 1877.0623779296875, - "learning_rate": 1.288e-05, - "loss": 278.613, + "epoch": 0.013009207448377283, + "grad_norm": 539.7330932617188, + "learning_rate": 6.44e-06, + "loss": 234.0129, "step": 3220 }, { - "epoch": 0.026099111983774917, - "grad_norm": 1167.0555419921875, - "learning_rate": 1.2920000000000002e-05, - "loss": 245.6273, + "epoch": 0.013049608713744915, + "grad_norm": 736.7625732421875, + "learning_rate": 6.460000000000001e-06, + "loss": 335.0041, "step": 3230 }, { - "epoch": 0.02617991418805905, - "grad_norm": 1279.2628173828125, - "learning_rate": 1.296e-05, - "loss": 279.5205, + "epoch": 0.013090009979112546, + "grad_norm": 1194.715576171875, + "learning_rate": 6.48e-06, + "loss": 276.9117, "step": 3240 }, { - "epoch": 0.026260716392343182, - "grad_norm": 931.5763549804688, - "learning_rate": 1.3000000000000001e-05, - "loss": 237.3054, + "epoch": 0.013130411244480177, + "grad_norm": 8021.32763671875, + "learning_rate": 6.5000000000000004e-06, + "loss": 374.7519, "step": 3250 }, { - "epoch": 0.026341518596627315, - "grad_norm": 1100.44140625, - "learning_rate": 1.3039999999999999e-05, - "loss": 242.4479, + "epoch": 0.013170812509847809, + "grad_norm": 931.0237426757812, + "learning_rate": 6.519999999999999e-06, + "loss": 278.108, "step": 3260 }, { - "epoch": 0.026422320800911447, - "grad_norm": 4110.998046875, - "learning_rate": 1.308e-05, - "loss": 289.9703, + "epoch": 0.01321121377521544, + "grad_norm": 953.587890625, + "learning_rate": 6.54e-06, + "loss": 368.7168, "step": 3270 }, { - "epoch": 0.02650312300519558, - "grad_norm": 2212.603759765625, - "learning_rate": 1.3120000000000001e-05, - "loss": 225.1909, + "epoch": 0.013251615040583071, + "grad_norm": 1098.186767578125, + "learning_rate": 6.560000000000001e-06, + "loss": 276.9152, "step": 3280 }, { - "epoch": 0.026583925209479716, - "grad_norm": 1521.9654541015625, - "learning_rate": 1.316e-05, - "loss": 211.7131, + "epoch": 0.013292016305950703, + "grad_norm": 1590.1026611328125, + "learning_rate": 6.58e-06, + "loss": 422.8925, "step": 3290 }, { - "epoch": 0.02666472741376385, - "grad_norm": 1365.2470703125, - "learning_rate": 1.32e-05, - "loss": 417.6974, + "epoch": 0.013332417571318334, + "grad_norm": 857.4612426757812, + "learning_rate": 6.6e-06, + "loss": 343.3155, "step": 3300 }, { - "epoch": 0.02674552961804798, - "grad_norm": 1388.3101806640625, - "learning_rate": 1.324e-05, - "loss": 254.6758, + "epoch": 0.013372818836685965, + "grad_norm": 941.4984130859375, + "learning_rate": 6.62e-06, + "loss": 187.542, "step": 3310 }, { - "epoch": 0.026826331822332114, - "grad_norm": 992.1864013671875, - "learning_rate": 1.3280000000000002e-05, - "loss": 294.1028, + "epoch": 0.013413220102053596, + "grad_norm": 938.3494262695312, + "learning_rate": 6.640000000000001e-06, + "loss": 237.5327, "step": 3320 }, { - "epoch": 0.026907134026616247, - "grad_norm": 1042.743408203125, - "learning_rate": 1.3320000000000001e-05, - "loss": 198.8202, + "epoch": 0.013453621367421228, + "grad_norm": 936.9948120117188, + "learning_rate": 6.660000000000001e-06, + "loss": 275.3572, "step": 3330 }, { - "epoch": 0.02698793623090038, - "grad_norm": 1109.8814697265625, - "learning_rate": 1.336e-05, - "loss": 193.5389, + "epoch": 0.013494022632788859, + "grad_norm": 1025.8851318359375, + "learning_rate": 6.68e-06, + "loss": 346.2491, "step": 3340 }, { - "epoch": 0.027068738435184512, - "grad_norm": 2412.195556640625, - "learning_rate": 1.3400000000000002e-05, - "loss": 287.6545, + "epoch": 0.01353442389815649, + "grad_norm": 2196.15478515625, + "learning_rate": 6.700000000000001e-06, + "loss": 242.7346, "step": 3350 }, { - "epoch": 0.027149540639468644, - "grad_norm": 1525.7947998046875, - "learning_rate": 1.344e-05, - "loss": 259.9322, + "epoch": 0.013574825163524122, + "grad_norm": 843.5136108398438, + "learning_rate": 6.72e-06, + "loss": 188.1487, "step": 3360 }, { - "epoch": 0.027230342843752777, - "grad_norm": 919.1216430664062, - "learning_rate": 1.3480000000000001e-05, - "loss": 379.4648, + "epoch": 0.013615226428891753, + "grad_norm": 714.3571166992188, + "learning_rate": 6.740000000000001e-06, + "loss": 358.2911, "step": 3370 }, { - "epoch": 0.02731114504803691, - "grad_norm": 1242.6904296875, - "learning_rate": 1.352e-05, - "loss": 319.7951, + "epoch": 0.013655627694259384, + "grad_norm": 689.7639770507812, + "learning_rate": 6.76e-06, + "loss": 150.4375, "step": 3380 }, { - "epoch": 0.027391947252321042, - "grad_norm": 1618.703369140625, - "learning_rate": 1.356e-05, - "loss": 234.9021, + "epoch": 0.013696028959627016, + "grad_norm": 1244.08837890625, + "learning_rate": 6.78e-06, + "loss": 300.2315, "step": 3390 }, { - "epoch": 0.027472749456605175, - "grad_norm": 2833.884521484375, - "learning_rate": 1.3600000000000002e-05, - "loss": 289.6614, + "epoch": 0.013736430224994647, + "grad_norm": 1171.05419921875, + "learning_rate": 6.800000000000001e-06, + "loss": 241.5314, "step": 3400 }, { - "epoch": 0.027553551660889308, - "grad_norm": 1492.5230712890625, - "learning_rate": 1.364e-05, - "loss": 224.5652, + "epoch": 0.013776831490362278, + "grad_norm": 1291.3603515625, + "learning_rate": 6.82e-06, + "loss": 449.0854, "step": 3410 }, { - "epoch": 0.02763435386517344, - "grad_norm": 1137.0823974609375, - "learning_rate": 1.3680000000000001e-05, - "loss": 200.0403, + "epoch": 0.01381723275572991, + "grad_norm": 1006.1640014648438, + "learning_rate": 6.840000000000001e-06, + "loss": 346.3529, "step": 3420 }, { - "epoch": 0.027715156069457576, - "grad_norm": 1330.4561767578125, - "learning_rate": 1.3719999999999999e-05, - "loss": 237.1255, + "epoch": 0.013857634021097541, + "grad_norm": 6614.48583984375, + "learning_rate": 6.8599999999999995e-06, + "loss": 289.8388, "step": 3430 }, { - "epoch": 0.02779595827374171, - "grad_norm": 2768.703369140625, - "learning_rate": 1.376e-05, - "loss": 296.2196, + "epoch": 0.013898035286465172, + "grad_norm": 574.2523803710938, + "learning_rate": 6.88e-06, + "loss": 187.4786, "step": 3440 }, { - "epoch": 0.02787676047802584, - "grad_norm": 1068.980224609375, - "learning_rate": 1.3800000000000002e-05, - "loss": 264.4762, + "epoch": 0.013938436551832804, + "grad_norm": 577.289794921875, + "learning_rate": 6.900000000000001e-06, + "loss": 280.2674, "step": 3450 }, { - "epoch": 0.027957562682309974, - "grad_norm": 853.5343627929688, - "learning_rate": 1.384e-05, - "loss": 226.0527, + "epoch": 0.013978837817200435, + "grad_norm": 1416.127197265625, + "learning_rate": 6.92e-06, + "loss": 330.4, "step": 3460 }, { - "epoch": 0.028038364886594107, - "grad_norm": 1424.2513427734375, - "learning_rate": 1.3880000000000001e-05, - "loss": 210.0746, + "epoch": 0.014019239082568066, + "grad_norm": 973.31689453125, + "learning_rate": 6.9400000000000005e-06, + "loss": 251.0885, "step": 3470 }, { - "epoch": 0.02811916709087824, - "grad_norm": 1245.1417236328125, - "learning_rate": 1.3919999999999999e-05, - "loss": 194.043, + "epoch": 0.014059640347935698, + "grad_norm": 2889.614013671875, + "learning_rate": 6.9599999999999994e-06, + "loss": 288.5922, "step": 3480 }, { - "epoch": 0.028199969295162372, - "grad_norm": 3672.158935546875, - "learning_rate": 1.396e-05, - "loss": 205.4042, + "epoch": 0.014100041613303329, + "grad_norm": 734.4885864257812, + "learning_rate": 6.98e-06, + "loss": 247.6378, "step": 3490 }, { - "epoch": 0.028280771499446505, - "grad_norm": 1612.9573974609375, - "learning_rate": 1.4000000000000001e-05, - "loss": 238.6044, + "epoch": 0.01414044287867096, + "grad_norm": 1920.7076416015625, + "learning_rate": 7.000000000000001e-06, + "loss": 175.3216, "step": 3500 }, { - "epoch": 0.028361573703730637, - "grad_norm": 1845.0887451171875, - "learning_rate": 1.4040000000000001e-05, - "loss": 243.2849, + "epoch": 0.014180844144038592, + "grad_norm": 676.3031005859375, + "learning_rate": 7.0200000000000006e-06, + "loss": 209.18, "step": 3510 }, { - "epoch": 0.02844237590801477, - "grad_norm": 1434.4193115234375, - "learning_rate": 1.408e-05, - "loss": 223.1181, + "epoch": 0.014221245409406223, + "grad_norm": 1840.922119140625, + "learning_rate": 7.04e-06, + "loss": 241.9013, "step": 3520 }, { - "epoch": 0.028523178112298903, - "grad_norm": 886.107666015625, - "learning_rate": 1.412e-05, - "loss": 262.0864, + "epoch": 0.014261646674773854, + "grad_norm": 1004.5390625, + "learning_rate": 7.06e-06, + "loss": 219.0837, "step": 3530 }, { - "epoch": 0.028603980316583035, - "grad_norm": 731.3050537109375, - "learning_rate": 1.4160000000000002e-05, - "loss": 200.1181, + "epoch": 0.014302047940141486, + "grad_norm": 2489.928955078125, + "learning_rate": 7.080000000000001e-06, + "loss": 207.0227, "step": 3540 }, { - "epoch": 0.028684782520867168, - "grad_norm": 834.052734375, - "learning_rate": 1.42e-05, - "loss": 252.3755, + "epoch": 0.014342449205509117, + "grad_norm": 713.509033203125, + "learning_rate": 7.1e-06, + "loss": 208.6873, "step": 3550 }, { - "epoch": 0.0287655847251513, - "grad_norm": 840.7152709960938, - "learning_rate": 1.4240000000000001e-05, - "loss": 264.0922, + "epoch": 0.014382850470876748, + "grad_norm": 453.5292053222656, + "learning_rate": 7.1200000000000004e-06, + "loss": 281.9877, "step": 3560 }, { - "epoch": 0.028846386929435437, - "grad_norm": 1238.5738525390625, - "learning_rate": 1.4280000000000002e-05, - "loss": 236.1651, + "epoch": 0.01442325173624438, + "grad_norm": 900.6409912109375, + "learning_rate": 7.140000000000001e-06, + "loss": 235.5468, "step": 3570 }, { - "epoch": 0.02892718913371957, - "grad_norm": 1469.219970703125, - "learning_rate": 1.432e-05, - "loss": 422.1472, + "epoch": 0.01446365300161201, + "grad_norm": 438.5726623535156, + "learning_rate": 7.16e-06, + "loss": 162.5427, "step": 3580 }, { - "epoch": 0.029007991338003702, - "grad_norm": 22283.98046875, - "learning_rate": 1.4360000000000001e-05, - "loss": 315.1224, + "epoch": 0.014504054266979642, + "grad_norm": 861.850341796875, + "learning_rate": 7.180000000000001e-06, + "loss": 236.0929, "step": 3590 }, { - "epoch": 0.029088793542287834, - "grad_norm": 2152.561279296875, - "learning_rate": 1.44e-05, - "loss": 249.663, + "epoch": 0.014544455532347273, + "grad_norm": 892.953369140625, + "learning_rate": 7.2e-06, + "loss": 249.5097, "step": 3600 }, { - "epoch": 0.029169595746571967, - "grad_norm": 896.6244506835938, - "learning_rate": 1.444e-05, - "loss": 220.4215, + "epoch": 0.014584856797714905, + "grad_norm": 1040.248046875, + "learning_rate": 7.22e-06, + "loss": 268.9143, "step": 3610 }, { - "epoch": 0.0292503979508561, - "grad_norm": 1172.474365234375, - "learning_rate": 1.4480000000000002e-05, - "loss": 240.2923, + "epoch": 0.014625258063082536, + "grad_norm": 774.4244384765625, + "learning_rate": 7.240000000000001e-06, + "loss": 235.6966, "step": 3620 }, { - "epoch": 0.029331200155140232, - "grad_norm": 2668.1337890625, - "learning_rate": 1.452e-05, - "loss": 241.7765, + "epoch": 0.014665659328450167, + "grad_norm": 540.7510986328125, + "learning_rate": 7.26e-06, + "loss": 222.3721, "step": 3630 }, { - "epoch": 0.029412002359424365, - "grad_norm": 1302.285888671875, - "learning_rate": 1.4560000000000001e-05, - "loss": 222.4546, + "epoch": 0.014706060593817799, + "grad_norm": 3457.84912109375, + "learning_rate": 7.280000000000001e-06, + "loss": 262.0487, "step": 3640 }, { - "epoch": 0.029492804563708497, - "grad_norm": 1539.5206298828125, - "learning_rate": 1.4599999999999999e-05, - "loss": 230.4968, + "epoch": 0.01474646185918543, + "grad_norm": 802.951416015625, + "learning_rate": 7.2999999999999996e-06, + "loss": 277.8951, "step": 3650 }, { - "epoch": 0.02957360676799263, - "grad_norm": 1219.3082275390625, - "learning_rate": 1.464e-05, - "loss": 264.3233, + "epoch": 0.014786863124553061, + "grad_norm": 1426.4849853515625, + "learning_rate": 7.32e-06, + "loss": 273.6454, "step": 3660 }, { - "epoch": 0.029654408972276763, - "grad_norm": 1184.2034912109375, - "learning_rate": 1.4680000000000002e-05, - "loss": 259.6289, + "epoch": 0.014827264389920693, + "grad_norm": 763.5048828125, + "learning_rate": 7.340000000000001e-06, + "loss": 324.4447, "step": 3670 }, { - "epoch": 0.029735211176560895, - "grad_norm": 4313.5712890625, - "learning_rate": 1.472e-05, - "loss": 215.6232, + "epoch": 0.014867665655288324, + "grad_norm": 2634.498046875, + "learning_rate": 7.36e-06, + "loss": 285.5299, "step": 3680 }, { - "epoch": 0.029816013380845028, - "grad_norm": 1337.511474609375, - "learning_rate": 1.4760000000000001e-05, - "loss": 195.3004, + "epoch": 0.014908066920655955, + "grad_norm": 1109.640380859375, + "learning_rate": 7.3800000000000005e-06, + "loss": 421.3942, "step": 3690 }, { - "epoch": 0.02989681558512916, - "grad_norm": 1705.420654296875, - "learning_rate": 1.48e-05, - "loss": 294.9566, + "epoch": 0.014948468186023587, + "grad_norm": 1876.8143310546875, + "learning_rate": 7.4e-06, + "loss": 345.6086, "step": 3700 }, { - "epoch": 0.029977617789413297, - "grad_norm": 1603.8756103515625, - "learning_rate": 1.4840000000000002e-05, - "loss": 247.8541, + "epoch": 0.014988869451391218, + "grad_norm": 820.827880859375, + "learning_rate": 7.420000000000001e-06, + "loss": 241.7691, "step": 3710 }, { - "epoch": 0.03005841999369743, - "grad_norm": 2027.82666015625, - "learning_rate": 1.488e-05, - "loss": 181.4365, + "epoch": 0.01502927071675885, + "grad_norm": 1500.841552734375, + "learning_rate": 7.44e-06, + "loss": 247.7824, "step": 3720 }, { - "epoch": 0.030139222197981562, - "grad_norm": 2163.25146484375, - "learning_rate": 1.4920000000000001e-05, - "loss": 290.6712, + "epoch": 0.01506967198212648, + "grad_norm": 1121.291259765625, + "learning_rate": 7.4600000000000006e-06, + "loss": 302.3958, "step": 3730 }, { - "epoch": 0.030220024402265695, - "grad_norm": 1098.857177734375, - "learning_rate": 1.4960000000000002e-05, - "loss": 259.7422, + "epoch": 0.015110073247494112, + "grad_norm": 1057.3134765625, + "learning_rate": 7.480000000000001e-06, + "loss": 271.7261, "step": 3740 }, { - "epoch": 0.030300826606549827, - "grad_norm": 1292.9393310546875, - "learning_rate": 1.5e-05, - "loss": 294.7247, + "epoch": 0.015150474512861743, + "grad_norm": 1772.322509765625, + "learning_rate": 7.5e-06, + "loss": 404.0533, "step": 3750 }, { - "epoch": 0.03038162881083396, - "grad_norm": 2220.01611328125, - "learning_rate": 1.5040000000000002e-05, - "loss": 265.5237, + "epoch": 0.015190875778229375, + "grad_norm": 547.7960815429688, + "learning_rate": 7.520000000000001e-06, + "loss": 255.6221, "step": 3760 }, { - "epoch": 0.030462431015118092, - "grad_norm": 1562.214599609375, - "learning_rate": 1.508e-05, - "loss": 255.554, + "epoch": 0.015231277043597006, + "grad_norm": 980.1959228515625, + "learning_rate": 7.54e-06, + "loss": 310.8835, "step": 3770 }, { - "epoch": 0.030543233219402225, - "grad_norm": 963.2359619140625, - "learning_rate": 1.5120000000000001e-05, - "loss": 215.5685, + "epoch": 0.015271678308964637, + "grad_norm": 1686.9091796875, + "learning_rate": 7.5600000000000005e-06, + "loss": 246.4056, "step": 3780 }, { - "epoch": 0.030624035423686358, - "grad_norm": 1751.441650390625, - "learning_rate": 1.5160000000000002e-05, - "loss": 249.1441, + "epoch": 0.015312079574332269, + "grad_norm": 867.049072265625, + "learning_rate": 7.580000000000001e-06, + "loss": 246.8477, "step": 3790 }, { - "epoch": 0.03070483762797049, - "grad_norm": 2362.97509765625, - "learning_rate": 1.52e-05, - "loss": 319.2007, + "epoch": 0.0153524808396999, + "grad_norm": 1169.5635986328125, + "learning_rate": 7.6e-06, + "loss": 184.4648, "step": 3800 }, { - "epoch": 0.030785639832254623, - "grad_norm": 1199.3076171875, - "learning_rate": 1.5240000000000001e-05, - "loss": 252.0641, + "epoch": 0.015392882105067531, + "grad_norm": 689.9214477539062, + "learning_rate": 7.620000000000001e-06, + "loss": 323.659, "step": 3810 }, { - "epoch": 0.030866442036538756, - "grad_norm": 555.8883666992188, - "learning_rate": 1.528e-05, - "loss": 206.8853, + "epoch": 0.015433283370435162, + "grad_norm": 519.1124267578125, + "learning_rate": 7.64e-06, + "loss": 225.6972, "step": 3820 }, { - "epoch": 0.030947244240822888, - "grad_norm": 786.2750244140625, - "learning_rate": 1.5320000000000002e-05, - "loss": 266.4819, + "epoch": 0.015473684635802794, + "grad_norm": 1257.6265869140625, + "learning_rate": 7.660000000000001e-06, + "loss": 337.094, "step": 3830 }, { - "epoch": 0.031028046445107024, - "grad_norm": 1596.8973388671875, - "learning_rate": 1.536e-05, - "loss": 267.247, + "epoch": 0.015514085901170425, + "grad_norm": 5566.001953125, + "learning_rate": 7.68e-06, + "loss": 191.7031, "step": 3840 }, { - "epoch": 0.031108848649391157, - "grad_norm": 961.9694213867188, - "learning_rate": 1.54e-05, - "loss": 260.6433, + "epoch": 0.015554487166538056, + "grad_norm": 734.2090454101562, + "learning_rate": 7.7e-06, + "loss": 221.7874, "step": 3850 }, { - "epoch": 0.03118965085367529, - "grad_norm": 1149.3822021484375, - "learning_rate": 1.544e-05, - "loss": 220.1721, + "epoch": 0.015594888431905688, + "grad_norm": 1421.68359375, + "learning_rate": 7.72e-06, + "loss": 266.4913, "step": 3860 }, { - "epoch": 0.03127045305795942, - "grad_norm": 785.8920288085938, - "learning_rate": 1.548e-05, - "loss": 184.7455, + "epoch": 0.01563528969727332, + "grad_norm": 2573.87353515625, + "learning_rate": 7.74e-06, + "loss": 301.2037, "step": 3870 }, { - "epoch": 0.031351255262243555, - "grad_norm": 1458.1995849609375, - "learning_rate": 1.552e-05, - "loss": 295.4266, + "epoch": 0.01567569096264095, + "grad_norm": 492.0390319824219, + "learning_rate": 7.76e-06, + "loss": 234.7707, "step": 3880 }, { - "epoch": 0.031432057466527684, - "grad_norm": 1276.711181640625, - "learning_rate": 1.556e-05, - "loss": 220.6965, + "epoch": 0.01571609222800858, + "grad_norm": 600.4080200195312, + "learning_rate": 7.78e-06, + "loss": 267.7107, "step": 3890 }, { - "epoch": 0.03151285967081182, - "grad_norm": 1452.5382080078125, - "learning_rate": 1.56e-05, - "loss": 229.4156, + "epoch": 0.01575649349337621, + "grad_norm": 1099.673828125, + "learning_rate": 7.8e-06, + "loss": 292.7474, "step": 3900 }, { - "epoch": 0.031593661875095956, - "grad_norm": 1885.9638671875, - "learning_rate": 1.5640000000000003e-05, - "loss": 227.5622, + "epoch": 0.015796894758743844, + "grad_norm": 541.96875, + "learning_rate": 7.820000000000001e-06, + "loss": 229.897, "step": 3910 }, { - "epoch": 0.031674464079380085, - "grad_norm": 1566.416748046875, - "learning_rate": 1.568e-05, - "loss": 208.7988, + "epoch": 0.015837296024111474, + "grad_norm": 3221.322509765625, + "learning_rate": 7.84e-06, + "loss": 200.1891, "step": 3920 }, { - "epoch": 0.03175526628366422, - "grad_norm": 1472.25732421875, - "learning_rate": 1.5720000000000002e-05, - "loss": 233.2323, + "epoch": 0.015877697289479107, + "grad_norm": 2533.82177734375, + "learning_rate": 7.860000000000001e-06, + "loss": 240.3558, "step": 3930 }, { - "epoch": 0.03183606848794835, - "grad_norm": 3457.270263671875, - "learning_rate": 1.5759999999999998e-05, - "loss": 308.7386, + "epoch": 0.015918098554846737, + "grad_norm": 909.9031372070312, + "learning_rate": 7.879999999999999e-06, + "loss": 231.9597, "step": 3940 }, { - "epoch": 0.03191687069223249, - "grad_norm": 923.0127563476562, - "learning_rate": 1.58e-05, - "loss": 170.279, + "epoch": 0.01595849982021437, + "grad_norm": 669.5313110351562, + "learning_rate": 7.9e-06, + "loss": 316.9614, "step": 3950 }, { - "epoch": 0.031997672896516616, - "grad_norm": 884.5118408203125, - "learning_rate": 1.584e-05, - "loss": 258.7806, + "epoch": 0.015998901085582, + "grad_norm": 694.2930297851562, + "learning_rate": 7.92e-06, + "loss": 272.3228, "step": 3960 }, { - "epoch": 0.03207847510080075, - "grad_norm": 3672.524169921875, - "learning_rate": 1.588e-05, - "loss": 249.4709, + "epoch": 0.016039302350949632, + "grad_norm": 528.3792114257812, + "learning_rate": 7.94e-06, + "loss": 329.1032, "step": 3970 }, { - "epoch": 0.03215927730508488, - "grad_norm": 1763.4033203125, - "learning_rate": 1.592e-05, - "loss": 300.7331, + "epoch": 0.016079703616317262, + "grad_norm": 579.0252075195312, + "learning_rate": 7.96e-06, + "loss": 258.9907, "step": 3980 }, { - "epoch": 0.03224007950936902, - "grad_norm": 1443.6868896484375, - "learning_rate": 1.596e-05, - "loss": 265.3076, + "epoch": 0.016120104881684895, + "grad_norm": 645.7807006835938, + "learning_rate": 7.98e-06, + "loss": 242.0821, "step": 3990 }, { - "epoch": 0.032320881713653146, - "grad_norm": 1217.42333984375, - "learning_rate": 1.6000000000000003e-05, - "loss": 227.337, + "epoch": 0.016160506147052525, + "grad_norm": 543.9231567382812, + "learning_rate": 8.000000000000001e-06, + "loss": 284.7925, "step": 4000 }, { - "epoch": 0.03240168391793728, - "grad_norm": 2161.5205078125, - "learning_rate": 1.604e-05, - "loss": 372.776, + "epoch": 0.016200907412420158, + "grad_norm": 610.683349609375, + "learning_rate": 8.02e-06, + "loss": 220.6932, "step": 4010 }, { - "epoch": 0.03248248612222141, - "grad_norm": 1007.3255615234375, - "learning_rate": 1.6080000000000002e-05, - "loss": 255.6254, + "epoch": 0.016241308677787787, + "grad_norm": 0.0, + "learning_rate": 8.040000000000001e-06, + "loss": 230.9501, "step": 4020 }, { - "epoch": 0.03256328832650555, - "grad_norm": 1462.6104736328125, - "learning_rate": 1.612e-05, - "loss": 253.8938, + "epoch": 0.01628170994315542, + "grad_norm": 1050.20947265625, + "learning_rate": 8.06e-06, + "loss": 292.7622, "step": 4030 }, { - "epoch": 0.03264409053078968, - "grad_norm": 1253.7725830078125, - "learning_rate": 1.616e-05, - "loss": 218.3035, + "epoch": 0.01632211120852305, + "grad_norm": 1064.6005859375, + "learning_rate": 8.08e-06, + "loss": 346.057, "step": 4040 }, { - "epoch": 0.03272489273507381, - "grad_norm": 1011.6146240234375, - "learning_rate": 1.62e-05, - "loss": 216.4191, + "epoch": 0.016362512473890683, + "grad_norm": 1817.19482421875, + "learning_rate": 8.1e-06, + "loss": 229.4932, "step": 4050 }, { - "epoch": 0.03280569493935795, - "grad_norm": 1421.2760009765625, - "learning_rate": 1.624e-05, - "loss": 246.4713, + "epoch": 0.016402913739258312, + "grad_norm": 1169.279052734375, + "learning_rate": 8.12e-06, + "loss": 346.2139, "step": 4060 }, { - "epoch": 0.03288649714364208, - "grad_norm": 1593.51318359375, - "learning_rate": 1.628e-05, - "loss": 214.1486, + "epoch": 0.016443315004625945, + "grad_norm": 1004.5889282226562, + "learning_rate": 8.14e-06, + "loss": 257.1552, "step": 4070 }, { - "epoch": 0.032967299347926214, - "grad_norm": 1737.602783203125, - "learning_rate": 1.6320000000000003e-05, - "loss": 197.031, + "epoch": 0.016483716269993575, + "grad_norm": 660.7535400390625, + "learning_rate": 8.160000000000001e-06, + "loss": 247.7336, "step": 4080 }, { - "epoch": 0.03304810155221034, - "grad_norm": 1183.92626953125, - "learning_rate": 1.636e-05, - "loss": 201.6871, + "epoch": 0.016524117535361208, + "grad_norm": 638.9656372070312, + "learning_rate": 8.18e-06, + "loss": 360.9811, "step": 4090 }, { - "epoch": 0.03312890375649448, - "grad_norm": 4638.69921875, - "learning_rate": 1.6400000000000002e-05, - "loss": 271.8158, + "epoch": 0.016564518800728838, + "grad_norm": 865.1559448242188, + "learning_rate": 8.200000000000001e-06, + "loss": 267.7802, "step": 4100 }, { - "epoch": 0.03320970596077861, - "grad_norm": 1380.73291015625, - "learning_rate": 1.644e-05, - "loss": 234.8128, + "epoch": 0.01660492006609647, + "grad_norm": 528.1417236328125, + "learning_rate": 8.22e-06, + "loss": 211.1934, "step": 4110 }, { - "epoch": 0.033290508165062745, - "grad_norm": 1769.43115234375, - "learning_rate": 1.648e-05, - "loss": 209.7356, + "epoch": 0.0166453213314641, + "grad_norm": 602.795166015625, + "learning_rate": 8.24e-06, + "loss": 342.2845, "step": 4120 }, { - "epoch": 0.033371310369346874, - "grad_norm": 752.7643432617188, - "learning_rate": 1.652e-05, - "loss": 260.7428, + "epoch": 0.016685722596831733, + "grad_norm": 537.5540161132812, + "learning_rate": 8.26e-06, + "loss": 222.6927, "step": 4130 }, { - "epoch": 0.03345211257363101, - "grad_norm": 1045.0377197265625, - "learning_rate": 1.656e-05, - "loss": 287.8467, + "epoch": 0.016726123862199363, + "grad_norm": 1009.3809814453125, + "learning_rate": 8.28e-06, + "loss": 278.8901, "step": 4140 }, { - "epoch": 0.03353291477791514, - "grad_norm": 957.307373046875, - "learning_rate": 1.66e-05, - "loss": 251.1729, + "epoch": 0.016766525127566996, + "grad_norm": 1916.926513671875, + "learning_rate": 8.3e-06, + "loss": 335.051, "step": 4150 }, { - "epoch": 0.033613716982199275, - "grad_norm": 2887.29638671875, - "learning_rate": 1.664e-05, - "loss": 237.806, + "epoch": 0.016806926392934626, + "grad_norm": 759.0120849609375, + "learning_rate": 8.32e-06, + "loss": 292.8282, "step": 4160 }, { - "epoch": 0.033694519186483404, - "grad_norm": 1108.643310546875, - "learning_rate": 1.668e-05, - "loss": 205.0025, + "epoch": 0.01684732765830226, + "grad_norm": 1126.7659912109375, + "learning_rate": 8.34e-06, + "loss": 356.8466, "step": 4170 }, { - "epoch": 0.03377532139076754, - "grad_norm": 1745.2540283203125, - "learning_rate": 1.672e-05, - "loss": 250.4789, + "epoch": 0.016887728923669888, + "grad_norm": 4519.04833984375, + "learning_rate": 8.36e-06, + "loss": 254.2794, "step": 4180 }, { - "epoch": 0.033856123595051676, - "grad_norm": 1259.505615234375, - "learning_rate": 1.6760000000000002e-05, - "loss": 187.1045, + "epoch": 0.01692813018903752, + "grad_norm": 625.6685791015625, + "learning_rate": 8.380000000000001e-06, + "loss": 256.8828, "step": 4190 }, { - "epoch": 0.033936925799335806, - "grad_norm": 2127.105712890625, - "learning_rate": 1.6800000000000002e-05, - "loss": 241.0329, + "epoch": 0.01696853145440515, + "grad_norm": 901.2313842773438, + "learning_rate": 8.400000000000001e-06, + "loss": 173.8549, "step": 4200 }, { - "epoch": 0.03401772800361994, - "grad_norm": 899.3731079101562, - "learning_rate": 1.684e-05, - "loss": 224.4188, + "epoch": 0.017008932719772784, + "grad_norm": 612.3013916015625, + "learning_rate": 8.42e-06, + "loss": 278.5784, "step": 4210 }, { - "epoch": 0.03409853020790407, - "grad_norm": 1598.4705810546875, - "learning_rate": 1.688e-05, - "loss": 212.8042, + "epoch": 0.017049333985140414, + "grad_norm": 500.49169921875, + "learning_rate": 8.44e-06, + "loss": 197.3738, "step": 4220 }, { - "epoch": 0.03417933241218821, - "grad_norm": 1147.515869140625, - "learning_rate": 1.692e-05, - "loss": 173.9525, + "epoch": 0.017089735250508047, + "grad_norm": 1060.2108154296875, + "learning_rate": 8.46e-06, + "loss": 298.4693, "step": 4230 }, { - "epoch": 0.034260134616472336, - "grad_norm": 1514.278076171875, - "learning_rate": 1.696e-05, - "loss": 226.0929, + "epoch": 0.017130136515875676, + "grad_norm": 612.9854736328125, + "learning_rate": 8.48e-06, + "loss": 329.1869, "step": 4240 }, { - "epoch": 0.03434093682075647, - "grad_norm": 793.54833984375, - "learning_rate": 1.7000000000000003e-05, - "loss": 276.8738, + "epoch": 0.01717053778124331, + "grad_norm": 1364.545654296875, + "learning_rate": 8.500000000000002e-06, + "loss": 334.3979, "step": 4250 }, { - "epoch": 0.0344217390250406, - "grad_norm": 1370.0628662109375, - "learning_rate": 1.704e-05, - "loss": 269.6619, + "epoch": 0.01721093904661094, + "grad_norm": 696.9126586914062, + "learning_rate": 8.52e-06, + "loss": 271.8317, "step": 4260 }, { - "epoch": 0.03450254122932474, - "grad_norm": 2025.7630615234375, - "learning_rate": 1.7080000000000002e-05, - "loss": 198.3601, + "epoch": 0.017251340311978572, + "grad_norm": 688.8264770507812, + "learning_rate": 8.540000000000001e-06, + "loss": 203.2293, "step": 4270 }, { - "epoch": 0.03458334343360887, - "grad_norm": 1085.4031982421875, - "learning_rate": 1.712e-05, - "loss": 270.7385, + "epoch": 0.0172917415773462, + "grad_norm": 1147.045654296875, + "learning_rate": 8.56e-06, + "loss": 349.6926, "step": 4280 }, { - "epoch": 0.034664145637893, - "grad_norm": 1837.051025390625, - "learning_rate": 1.7160000000000002e-05, - "loss": 250.3432, + "epoch": 0.017332142842713835, + "grad_norm": 477.3202209472656, + "learning_rate": 8.580000000000001e-06, + "loss": 146.8848, "step": 4290 }, { - "epoch": 0.03474494784217713, - "grad_norm": 1163.429931640625, - "learning_rate": 1.7199999999999998e-05, - "loss": 369.4985, + "epoch": 0.017372544108081464, + "grad_norm": 1310.89599609375, + "learning_rate": 8.599999999999999e-06, + "loss": 247.6962, "step": 4300 }, { - "epoch": 0.03482575004646127, - "grad_norm": 985.8709716796875, - "learning_rate": 1.724e-05, - "loss": 254.3368, + "epoch": 0.017412945373449097, + "grad_norm": 1124.722900390625, + "learning_rate": 8.62e-06, + "loss": 347.9728, "step": 4310 }, { - "epoch": 0.0349065522507454, - "grad_norm": 1717.9407958984375, - "learning_rate": 1.728e-05, - "loss": 245.3844, + "epoch": 0.017453346638816727, + "grad_norm": 1394.403564453125, + "learning_rate": 8.64e-06, + "loss": 261.2298, "step": 4320 }, { - "epoch": 0.03498735445502953, - "grad_norm": 2029.035400390625, - "learning_rate": 1.732e-05, - "loss": 218.9389, + "epoch": 0.01749374790418436, + "grad_norm": 889.3135375976562, + "learning_rate": 8.66e-06, + "loss": 176.3773, "step": 4330 }, { - "epoch": 0.03506815665931367, - "grad_norm": 5775.7578125, - "learning_rate": 1.736e-05, - "loss": 202.7436, + "epoch": 0.01753414916955199, + "grad_norm": 1988.6873779296875, + "learning_rate": 8.68e-06, + "loss": 260.4097, "step": 4340 }, { - "epoch": 0.0351489588635978, - "grad_norm": 803.3091430664062, - "learning_rate": 1.74e-05, - "loss": 230.8249, + "epoch": 0.017574550434919622, + "grad_norm": 1403.53955078125, + "learning_rate": 8.7e-06, + "loss": 222.8027, "step": 4350 }, { - "epoch": 0.035229761067881935, - "grad_norm": 1491.943603515625, - "learning_rate": 1.7440000000000002e-05, - "loss": 347.896, + "epoch": 0.017614951700287252, + "grad_norm": 1020.5213623046875, + "learning_rate": 8.720000000000001e-06, + "loss": 202.7748, "step": 4360 }, { - "epoch": 0.035310563272166064, - "grad_norm": 1860.998291015625, - "learning_rate": 1.7480000000000002e-05, - "loss": 280.704, + "epoch": 0.017655352965654885, + "grad_norm": 4106.30126953125, + "learning_rate": 8.740000000000001e-06, + "loss": 257.8873, "step": 4370 }, { - "epoch": 0.0353913654764502, - "grad_norm": 1073.6658935546875, - "learning_rate": 1.752e-05, - "loss": 239.5408, + "epoch": 0.017695754231022515, + "grad_norm": 458.9610290527344, + "learning_rate": 8.76e-06, + "loss": 274.0975, "step": 4380 }, { - "epoch": 0.03547216768073433, - "grad_norm": 1513.69482421875, - "learning_rate": 1.756e-05, - "loss": 211.6129, + "epoch": 0.017736155496390148, + "grad_norm": 1450.5250244140625, + "learning_rate": 8.78e-06, + "loss": 212.7885, "step": 4390 }, { - "epoch": 0.035552969885018465, - "grad_norm": 738.4833374023438, - "learning_rate": 1.76e-05, - "loss": 217.8898, + "epoch": 0.017776556761757777, + "grad_norm": 658.0136108398438, + "learning_rate": 8.8e-06, + "loss": 313.3228, "step": 4400 }, { - "epoch": 0.035633772089302594, - "grad_norm": 985.9658203125, - "learning_rate": 1.764e-05, - "loss": 229.6193, + "epoch": 0.01781695802712541, + "grad_norm": 607.8909301757812, + "learning_rate": 8.82e-06, + "loss": 220.5824, "step": 4410 }, { - "epoch": 0.03571457429358673, - "grad_norm": 1359.5565185546875, - "learning_rate": 1.7680000000000004e-05, - "loss": 220.6117, + "epoch": 0.01785735929249304, + "grad_norm": 964.3424682617188, + "learning_rate": 8.840000000000002e-06, + "loss": 214.6224, "step": 4420 }, { - "epoch": 0.03579537649787086, - "grad_norm": 1126.1280517578125, - "learning_rate": 1.772e-05, - "loss": 247.6776, + "epoch": 0.017897760557860673, + "grad_norm": 1267.5516357421875, + "learning_rate": 8.86e-06, + "loss": 231.4749, "step": 4430 }, { - "epoch": 0.035876178702154995, - "grad_norm": 2267.62255859375, - "learning_rate": 1.7760000000000003e-05, - "loss": 214.3918, + "epoch": 0.017938161823228303, + "grad_norm": 852.99462890625, + "learning_rate": 8.880000000000001e-06, + "loss": 209.5046, "step": 4440 }, { - "epoch": 0.035956980906439125, - "grad_norm": 1662.36865234375, - "learning_rate": 1.78e-05, - "loss": 240.1594, + "epoch": 0.017978563088595936, + "grad_norm": 895.4765014648438, + "learning_rate": 8.9e-06, + "loss": 227.3372, "step": 4450 }, { - "epoch": 0.03603778311072326, - "grad_norm": 1222.072021484375, - "learning_rate": 1.7840000000000002e-05, - "loss": 210.1431, + "epoch": 0.018018964353963565, + "grad_norm": 658.2288818359375, + "learning_rate": 8.920000000000001e-06, + "loss": 235.962, "step": 4460 }, { - "epoch": 0.0361185853150074, - "grad_norm": 1343.1220703125, - "learning_rate": 1.7879999999999998e-05, - "loss": 210.0152, + "epoch": 0.0180593656193312, + "grad_norm": 1219.0494384765625, + "learning_rate": 8.939999999999999e-06, + "loss": 222.9912, "step": 4470 }, { - "epoch": 0.036199387519291526, - "grad_norm": 2286.17333984375, - "learning_rate": 1.792e-05, - "loss": 261.0413, + "epoch": 0.018099766884698828, + "grad_norm": 976.2411499023438, + "learning_rate": 8.96e-06, + "loss": 251.0392, "step": 4480 }, { - "epoch": 0.03628018972357566, - "grad_norm": 878.9855346679688, - "learning_rate": 1.796e-05, - "loss": 203.6624, + "epoch": 0.01814016815006646, + "grad_norm": 1230.3253173828125, + "learning_rate": 8.98e-06, + "loss": 208.2115, "step": 4490 }, { - "epoch": 0.03636099192785979, - "grad_norm": 1368.7535400390625, - "learning_rate": 1.8e-05, - "loss": 230.5439, + "epoch": 0.01818056941543409, + "grad_norm": 738.1622314453125, + "learning_rate": 9e-06, + "loss": 248.237, "step": 4500 }, { - "epoch": 0.03644179413214393, - "grad_norm": 1889.8624267578125, - "learning_rate": 1.804e-05, - "loss": 219.1165, + "epoch": 0.018220970680801724, + "grad_norm": 825.9674072265625, + "learning_rate": 9.02e-06, + "loss": 231.5556, "step": 4510 }, { - "epoch": 0.036522596336428056, - "grad_norm": 1708.1866455078125, - "learning_rate": 1.808e-05, - "loss": 208.0205, + "epoch": 0.018261371946169353, + "grad_norm": 782.737060546875, + "learning_rate": 9.04e-06, + "loss": 213.6435, "step": 4520 }, { - "epoch": 0.03660339854071219, - "grad_norm": 1475.5697021484375, - "learning_rate": 1.812e-05, - "loss": 278.3266, + "epoch": 0.018301773211536986, + "grad_norm": 712.6553344726562, + "learning_rate": 9.06e-06, + "loss": 234.0471, "step": 4530 }, { - "epoch": 0.03668420074499632, - "grad_norm": 813.5365600585938, - "learning_rate": 1.8160000000000002e-05, - "loss": 240.388, + "epoch": 0.018342174476904616, + "grad_norm": 1101.6629638671875, + "learning_rate": 9.080000000000001e-06, + "loss": 198.3389, "step": 4540 }, { - "epoch": 0.03676500294928046, - "grad_norm": 1509.516357421875, - "learning_rate": 1.8200000000000002e-05, - "loss": 227.5149, + "epoch": 0.01838257574227225, + "grad_norm": 1747.929443359375, + "learning_rate": 9.100000000000001e-06, + "loss": 332.4391, "step": 4550 }, { - "epoch": 0.03684580515356459, - "grad_norm": 1119.7554931640625, - "learning_rate": 1.824e-05, - "loss": 172.6106, + "epoch": 0.01842297700763988, + "grad_norm": 2980.9208984375, + "learning_rate": 9.12e-06, + "loss": 337.9396, "step": 4560 }, { - "epoch": 0.03692660735784872, - "grad_norm": 928.3190307617188, - "learning_rate": 1.828e-05, - "loss": 181.8961, + "epoch": 0.01846337827300751, + "grad_norm": 1282.68115234375, + "learning_rate": 9.14e-06, + "loss": 237.8889, "step": 4570 }, { - "epoch": 0.03700740956213285, - "grad_norm": 1846.45654296875, - "learning_rate": 1.832e-05, - "loss": 266.5626, + "epoch": 0.01850377953837514, + "grad_norm": 948.6932983398438, + "learning_rate": 9.16e-06, + "loss": 288.0977, "step": 4580 }, { - "epoch": 0.03708821176641699, - "grad_norm": 3318.25732421875, - "learning_rate": 1.8360000000000004e-05, - "loss": 208.0338, + "epoch": 0.018544180803742774, + "grad_norm": 501.8581237792969, + "learning_rate": 9.180000000000002e-06, + "loss": 329.9324, "step": 4590 }, { - "epoch": 0.03716901397070112, - "grad_norm": 1821.5848388671875, - "learning_rate": 1.84e-05, - "loss": 203.3513, + "epoch": 0.018584582069110404, + "grad_norm": 1850.9791259765625, + "learning_rate": 9.2e-06, + "loss": 319.7892, "step": 4600 }, { - "epoch": 0.037249816174985254, - "grad_norm": 950.8689575195312, - "learning_rate": 1.8440000000000003e-05, - "loss": 246.1736, + "epoch": 0.018624983334478037, + "grad_norm": 730.577392578125, + "learning_rate": 9.220000000000002e-06, + "loss": 217.722, "step": 4610 }, { - "epoch": 0.03733061837926939, - "grad_norm": 4509.83984375, - "learning_rate": 1.848e-05, - "loss": 234.2551, + "epoch": 0.018665384599845666, + "grad_norm": 408.53619384765625, + "learning_rate": 9.24e-06, + "loss": 190.9173, "step": 4620 }, { - "epoch": 0.03741142058355352, - "grad_norm": 4344.130859375, - "learning_rate": 1.8520000000000002e-05, - "loss": 225.5145, + "epoch": 0.0187057858652133, + "grad_norm": 884.1920776367188, + "learning_rate": 9.260000000000001e-06, + "loss": 193.8064, "step": 4630 }, { - "epoch": 0.037492222787837655, - "grad_norm": 968.5219116210938, - "learning_rate": 1.856e-05, - "loss": 246.9949, + "epoch": 0.01874618713058093, + "grad_norm": 1684.18408203125, + "learning_rate": 9.28e-06, + "loss": 197.2281, "step": 4640 }, { - "epoch": 0.037573024992121784, - "grad_norm": 1397.7412109375, - "learning_rate": 1.86e-05, - "loss": 244.9769, + "epoch": 0.018786588395948562, + "grad_norm": 1591.9307861328125, + "learning_rate": 9.3e-06, + "loss": 282.6273, "step": 4650 }, { - "epoch": 0.03765382719640592, - "grad_norm": 2875.123046875, - "learning_rate": 1.864e-05, - "loss": 239.3926, + "epoch": 0.01882698966131619, + "grad_norm": 1377.8363037109375, + "learning_rate": 9.32e-06, + "loss": 306.7148, "step": 4660 }, { - "epoch": 0.03773462940069005, - "grad_norm": 1230.772705078125, - "learning_rate": 1.868e-05, - "loss": 264.2549, + "epoch": 0.018867390926683825, + "grad_norm": 600.4591674804688, + "learning_rate": 9.34e-06, + "loss": 290.5179, "step": 4670 }, { - "epoch": 0.037815431604974185, - "grad_norm": 1240.8465576171875, - "learning_rate": 1.872e-05, - "loss": 217.8366, + "epoch": 0.018907792192051454, + "grad_norm": 1007.29296875, + "learning_rate": 9.36e-06, + "loss": 211.2058, "step": 4680 }, { - "epoch": 0.037896233809258315, - "grad_norm": 970.4682006835938, - "learning_rate": 1.876e-05, - "loss": 210.5743, + "epoch": 0.018948193457419087, + "grad_norm": 1834.026611328125, + "learning_rate": 9.38e-06, + "loss": 214.4897, "step": 4690 }, { - "epoch": 0.03797703601354245, - "grad_norm": 799.42236328125, - "learning_rate": 1.88e-05, - "loss": 189.6499, + "epoch": 0.018988594722786717, + "grad_norm": 895.9391479492188, + "learning_rate": 9.4e-06, + "loss": 241.2341, "step": 4700 }, { - "epoch": 0.03805783821782658, - "grad_norm": 1531.7857666015625, - "learning_rate": 1.8840000000000003e-05, - "loss": 212.7048, + "epoch": 0.01902899598815435, + "grad_norm": 940.978271484375, + "learning_rate": 9.420000000000001e-06, + "loss": 255.7137, "step": 4710 }, { - "epoch": 0.038138640422110716, - "grad_norm": 1257.698486328125, - "learning_rate": 1.888e-05, - "loss": 194.7889, + "epoch": 0.01906939725352198, + "grad_norm": 0.0, + "learning_rate": 9.44e-06, + "loss": 202.176, "step": 4720 }, { - "epoch": 0.038219442626394845, - "grad_norm": 786.5772094726562, - "learning_rate": 1.8920000000000002e-05, - "loss": 234.8988, + "epoch": 0.019109798518889613, + "grad_norm": 1429.115234375, + "learning_rate": 9.460000000000001e-06, + "loss": 199.2825, "step": 4730 }, { - "epoch": 0.03830024483067898, - "grad_norm": 1255.7796630859375, - "learning_rate": 1.896e-05, - "loss": 280.3407, + "epoch": 0.019150199784257242, + "grad_norm": 1368.30322265625, + "learning_rate": 9.48e-06, + "loss": 265.0907, "step": 4740 }, { - "epoch": 0.03838104703496312, - "grad_norm": 878.3697509765625, - "learning_rate": 1.9e-05, - "loss": 234.983, + "epoch": 0.019190601049624875, + "grad_norm": 773.6226196289062, + "learning_rate": 9.5e-06, + "loss": 272.2148, "step": 4750 }, { - "epoch": 0.038461849239247246, - "grad_norm": 1391.1988525390625, - "learning_rate": 1.904e-05, - "loss": 204.5077, + "epoch": 0.019231002314992505, + "grad_norm": 799.7369384765625, + "learning_rate": 9.52e-06, + "loss": 259.8739, "step": 4760 }, { - "epoch": 0.03854265144353138, - "grad_norm": 865.9118041992188, - "learning_rate": 1.908e-05, - "loss": 177.5614, + "epoch": 0.019271403580360138, + "grad_norm": 718.1935424804688, + "learning_rate": 9.54e-06, + "loss": 255.6761, "step": 4770 }, { - "epoch": 0.03862345364781551, - "grad_norm": 1631.27978515625, - "learning_rate": 1.9120000000000003e-05, - "loss": 235.9493, + "epoch": 0.019311804845727767, + "grad_norm": 1052.681396484375, + "learning_rate": 9.560000000000002e-06, + "loss": 240.1528, "step": 4780 }, { - "epoch": 0.03870425585209965, - "grad_norm": 1351.4879150390625, - "learning_rate": 1.916e-05, - "loss": 210.5913, + "epoch": 0.0193522061110954, + "grad_norm": 351.5966491699219, + "learning_rate": 9.58e-06, + "loss": 170.535, "step": 4790 }, { - "epoch": 0.03878505805638378, - "grad_norm": 1149.90478515625, - "learning_rate": 1.9200000000000003e-05, - "loss": 205.5302, + "epoch": 0.01939260737646303, + "grad_norm": 759.4265747070312, + "learning_rate": 9.600000000000001e-06, + "loss": 185.2903, "step": 4800 }, { - "epoch": 0.03886586026066791, - "grad_norm": 1391.6519775390625, - "learning_rate": 1.924e-05, - "loss": 185.5637, + "epoch": 0.019433008641830663, + "grad_norm": 763.7293701171875, + "learning_rate": 9.62e-06, + "loss": 233.2488, "step": 4810 }, { - "epoch": 0.03894666246495204, - "grad_norm": 1031.23828125, - "learning_rate": 1.9280000000000002e-05, - "loss": 254.7395, + "epoch": 0.019473409907198293, + "grad_norm": 1342.382568359375, + "learning_rate": 9.640000000000001e-06, + "loss": 275.3923, "step": 4820 }, { - "epoch": 0.03902746466923618, - "grad_norm": 1481.7857666015625, - "learning_rate": 1.932e-05, - "loss": 249.218, + "epoch": 0.019513811172565926, + "grad_norm": 3298.531005859375, + "learning_rate": 9.66e-06, + "loss": 226.5765, "step": 4830 }, { - "epoch": 0.03910826687352031, - "grad_norm": 1569.617919921875, - "learning_rate": 1.936e-05, - "loss": 164.8599, + "epoch": 0.019554212437933555, + "grad_norm": 1122.7933349609375, + "learning_rate": 9.68e-06, + "loss": 178.3835, "step": 4840 }, { - "epoch": 0.03918906907780444, - "grad_norm": 966.9464111328125, - "learning_rate": 1.94e-05, - "loss": 246.2672, + "epoch": 0.01959461370330119, + "grad_norm": 1547.0048828125, + "learning_rate": 9.7e-06, + "loss": 305.318, "step": 4850 }, { - "epoch": 0.03926987128208857, - "grad_norm": 1091.4852294921875, - "learning_rate": 1.944e-05, - "loss": 274.1528, + "epoch": 0.019635014968668818, + "grad_norm": 364.29541015625, + "learning_rate": 9.72e-06, + "loss": 170.9627, "step": 4860 }, { - "epoch": 0.03935067348637271, - "grad_norm": 900.5, - "learning_rate": 1.948e-05, - "loss": 172.6424, + "epoch": 0.01967541623403645, + "grad_norm": 1955.25634765625, + "learning_rate": 9.74e-06, + "loss": 302.9251, "step": 4870 }, { - "epoch": 0.03943147569065684, - "grad_norm": 1368.6942138671875, - "learning_rate": 1.9520000000000003e-05, - "loss": 230.6911, + "epoch": 0.01971581749940408, + "grad_norm": 2048.748291015625, + "learning_rate": 9.760000000000001e-06, + "loss": 202.4392, "step": 4880 }, { - "epoch": 0.039512277894940974, - "grad_norm": 1007.75146484375, - "learning_rate": 1.956e-05, - "loss": 188.4303, + "epoch": 0.019756218764771714, + "grad_norm": 660.9871215820312, + "learning_rate": 9.78e-06, + "loss": 187.9075, "step": 4890 }, { - "epoch": 0.03959308009922511, - "grad_norm": 926.8175659179688, - "learning_rate": 1.9600000000000002e-05, - "loss": 211.3782, + "epoch": 0.019796620030139343, + "grad_norm": 689.9225463867188, + "learning_rate": 9.800000000000001e-06, + "loss": 311.5491, "step": 4900 }, { - "epoch": 0.03967388230350924, - "grad_norm": 1757.5794677734375, - "learning_rate": 1.9640000000000002e-05, - "loss": 231.4304, + "epoch": 0.019837021295506976, + "grad_norm": 953.7089233398438, + "learning_rate": 9.820000000000001e-06, + "loss": 299.3845, "step": 4910 }, { - "epoch": 0.039754684507793375, - "grad_norm": 1936.01953125, - "learning_rate": 1.968e-05, - "loss": 258.8447, + "epoch": 0.019877422560874606, + "grad_norm": 614.9231567382812, + "learning_rate": 9.84e-06, + "loss": 168.2812, "step": 4920 }, { - "epoch": 0.039835486712077504, - "grad_norm": 1309.0240478515625, - "learning_rate": 1.972e-05, - "loss": 219.2068, + "epoch": 0.01991782382624224, + "grad_norm": 1575.7044677734375, + "learning_rate": 9.86e-06, + "loss": 273.8306, "step": 4930 }, { - "epoch": 0.03991628891636164, - "grad_norm": 1204.9296875, - "learning_rate": 1.976e-05, - "loss": 207.5814, + "epoch": 0.01995822509160987, + "grad_norm": 826.9859619140625, + "learning_rate": 9.88e-06, + "loss": 277.1096, "step": 4940 }, { - "epoch": 0.03999709112064577, - "grad_norm": 1366.7415771484375, - "learning_rate": 1.9800000000000004e-05, - "loss": 194.2691, + "epoch": 0.0199986263569775, + "grad_norm": 934.8515625, + "learning_rate": 9.900000000000002e-06, + "loss": 230.0742, "step": 4950 }, { - "epoch": 0.040077893324929906, - "grad_norm": 1462.8206787109375, - "learning_rate": 1.984e-05, - "loss": 266.8531, + "epoch": 0.02003902762234513, + "grad_norm": 2514.587646484375, + "learning_rate": 9.92e-06, + "loss": 143.4857, "step": 4960 }, { - "epoch": 0.040158695529214035, - "grad_norm": 876.000244140625, - "learning_rate": 1.9880000000000003e-05, - "loss": 201.3296, + "epoch": 0.020079428887712764, + "grad_norm": 0.0, + "learning_rate": 9.940000000000001e-06, + "loss": 292.2021, "step": 4970 }, { - "epoch": 0.04023949773349817, - "grad_norm": 1129.036865234375, - "learning_rate": 1.992e-05, - "loss": 193.2419, + "epoch": 0.020119830153080394, + "grad_norm": 1051.95068359375, + "learning_rate": 9.96e-06, + "loss": 243.0733, "step": 4980 }, { - "epoch": 0.0403202999377823, - "grad_norm": 1737.701904296875, - "learning_rate": 1.9960000000000002e-05, - "loss": 256.7378, + "epoch": 0.020160231418448027, + "grad_norm": 522.6783447265625, + "learning_rate": 9.980000000000001e-06, + "loss": 255.3195, "step": 4990 }, { - "epoch": 0.040401102142066436, - "grad_norm": 1281.869140625, - "learning_rate": 2e-05, - "loss": 181.1852, + "epoch": 0.020200632683815656, + "grad_norm": 1430.8314208984375, + "learning_rate": 1e-05, + "loss": 181.9884, "step": 5000 }, { - "epoch": 0.040481904346350565, - "grad_norm": 1347.7447509765625, - "learning_rate": 2.004e-05, - "loss": 209.5422, + "epoch": 0.02024103394918329, + "grad_norm": 2610.226806640625, + "learning_rate": 1.002e-05, + "loss": 240.1806, "step": 5010 }, { - "epoch": 0.0405627065506347, - "grad_norm": 760.0932006835938, - "learning_rate": 2.008e-05, - "loss": 222.7581, + "epoch": 0.02028143521455092, + "grad_norm": 706.251220703125, + "learning_rate": 1.004e-05, + "loss": 154.691, "step": 5020 }, { - "epoch": 0.04064350875491884, - "grad_norm": 2836.815185546875, - "learning_rate": 2.012e-05, - "loss": 182.0209, + "epoch": 0.020321836479918552, + "grad_norm": 1110.47021484375, + "learning_rate": 1.006e-05, + "loss": 270.7392, "step": 5030 }, { - "epoch": 0.04072431095920297, - "grad_norm": 2007.478759765625, - "learning_rate": 2.016e-05, - "loss": 239.4505, + "epoch": 0.020362237745286182, + "grad_norm": 1037.9814453125, + "learning_rate": 1.008e-05, + "loss": 260.6087, "step": 5040 }, { - "epoch": 0.0408051131634871, - "grad_norm": 830.1823120117188, - "learning_rate": 2.0200000000000003e-05, - "loss": 225.4689, + "epoch": 0.020402639010653815, + "grad_norm": 988.474609375, + "learning_rate": 1.0100000000000002e-05, + "loss": 195.7535, "step": 5050 }, { - "epoch": 0.04088591536777123, - "grad_norm": 603.6580200195312, - "learning_rate": 2.024e-05, - "loss": 198.7501, + "epoch": 0.020443040276021444, + "grad_norm": 0.0, + "learning_rate": 1.012e-05, + "loss": 292.6674, "step": 5060 }, { - "epoch": 0.04096671757205537, - "grad_norm": 2317.827880859375, - "learning_rate": 2.0280000000000002e-05, - "loss": 219.7897, + "epoch": 0.020483441541389077, + "grad_norm": 1622.9769287109375, + "learning_rate": 1.0140000000000001e-05, + "loss": 226.0767, "step": 5070 }, { - "epoch": 0.0410475197763395, - "grad_norm": 1382.4962158203125, - "learning_rate": 2.032e-05, - "loss": 239.3777, + "epoch": 0.020523842806756707, + "grad_norm": 2696.41845703125, + "learning_rate": 1.016e-05, + "loss": 242.0753, "step": 5080 }, { - "epoch": 0.04112832198062363, - "grad_norm": 672.8855590820312, - "learning_rate": 2.036e-05, - "loss": 221.3797, + "epoch": 0.02056424407212434, + "grad_norm": 2494.242431640625, + "learning_rate": 1.018e-05, + "loss": 310.918, "step": 5090 }, { - "epoch": 0.04120912418490776, - "grad_norm": 970.1513671875, - "learning_rate": 2.04e-05, - "loss": 242.433, + "epoch": 0.02060464533749197, + "grad_norm": 549.554443359375, + "learning_rate": 1.02e-05, + "loss": 238.1347, "step": 5100 }, { - "epoch": 0.0412899263891919, - "grad_norm": 1200.8358154296875, - "learning_rate": 2.044e-05, - "loss": 212.5112, + "epoch": 0.020645046602859603, + "grad_norm": 753.8233642578125, + "learning_rate": 1.022e-05, + "loss": 271.2417, "step": 5110 }, { - "epoch": 0.04137072859347603, - "grad_norm": 1295.498291015625, - "learning_rate": 2.048e-05, - "loss": 185.9661, + "epoch": 0.020685447868227232, + "grad_norm": 905.9545288085938, + "learning_rate": 1.024e-05, + "loss": 204.41, "step": 5120 }, { - "epoch": 0.041451530797760164, - "grad_norm": 1166.29296875, - "learning_rate": 2.052e-05, - "loss": 247.6106, + "epoch": 0.020725849133594865, + "grad_norm": 1900.184326171875, + "learning_rate": 1.026e-05, + "loss": 203.9288, "step": 5130 }, { - "epoch": 0.04153233300204429, - "grad_norm": 3098.2197265625, - "learning_rate": 2.0560000000000003e-05, - "loss": 221.3217, + "epoch": 0.020766250398962495, + "grad_norm": 533.8387451171875, + "learning_rate": 1.0280000000000002e-05, + "loss": 186.7773, "step": 5140 }, { - "epoch": 0.04161313520632843, - "grad_norm": 1139.6650390625, - "learning_rate": 2.06e-05, - "loss": 186.2231, + "epoch": 0.020806651664330128, + "grad_norm": 671.30322265625, + "learning_rate": 1.03e-05, + "loss": 164.2021, "step": 5150 }, { - "epoch": 0.04169393741061256, - "grad_norm": 807.0874633789062, - "learning_rate": 2.0640000000000002e-05, - "loss": 248.8766, + "epoch": 0.020847052929697758, + "grad_norm": 764.4326171875, + "learning_rate": 1.0320000000000001e-05, + "loss": 234.4929, "step": 5160 }, { - "epoch": 0.041774739614896694, - "grad_norm": 1678.1353759765625, - "learning_rate": 2.0680000000000002e-05, - "loss": 166.2287, + "epoch": 0.02088745419506539, + "grad_norm": 921.5126953125, + "learning_rate": 1.0340000000000001e-05, + "loss": 257.7942, "step": 5170 }, { - "epoch": 0.04185554181918083, - "grad_norm": 1944.0137939453125, - "learning_rate": 2.072e-05, - "loss": 219.6341, + "epoch": 0.02092785546043302, + "grad_norm": 1028.72412109375, + "learning_rate": 1.036e-05, + "loss": 219.407, "step": 5180 }, { - "epoch": 0.04193634402346496, - "grad_norm": 1544.1195068359375, - "learning_rate": 2.076e-05, - "loss": 277.2531, + "epoch": 0.020968256725800653, + "grad_norm": 857.1285400390625, + "learning_rate": 1.038e-05, + "loss": 267.7374, "step": 5190 }, { - "epoch": 0.042017146227749096, - "grad_norm": 1029.3516845703125, - "learning_rate": 2.08e-05, - "loss": 191.8421, + "epoch": 0.021008657991168283, + "grad_norm": 733.449951171875, + "learning_rate": 1.04e-05, + "loss": 259.0882, "step": 5200 }, { - "epoch": 0.042097948432033225, - "grad_norm": 976.2647705078125, - "learning_rate": 2.084e-05, - "loss": 183.5917, + "epoch": 0.021049059256535916, + "grad_norm": 853.8319091796875, + "learning_rate": 1.042e-05, + "loss": 236.0859, "step": 5210 }, { - "epoch": 0.04217875063631736, - "grad_norm": 1440.924560546875, - "learning_rate": 2.0880000000000003e-05, - "loss": 222.6888, + "epoch": 0.021089460521903546, + "grad_norm": 780.4057006835938, + "learning_rate": 1.0440000000000002e-05, + "loss": 235.1703, "step": 5220 }, { - "epoch": 0.04225955284060149, - "grad_norm": 1549.0025634765625, - "learning_rate": 2.092e-05, - "loss": 207.1, + "epoch": 0.02112986178727118, + "grad_norm": 1369.0264892578125, + "learning_rate": 1.046e-05, + "loss": 262.7009, "step": 5230 }, { - "epoch": 0.042340355044885626, - "grad_norm": 779.8838500976562, - "learning_rate": 2.0960000000000003e-05, - "loss": 157.6226, + "epoch": 0.021170263052638808, + "grad_norm": 802.2042236328125, + "learning_rate": 1.0480000000000001e-05, + "loss": 317.5121, "step": 5240 }, { - "epoch": 0.042421157249169755, - "grad_norm": 1479.7491455078125, - "learning_rate": 2.1e-05, - "loss": 222.0956, + "epoch": 0.02121066431800644, + "grad_norm": 3069.8564453125, + "learning_rate": 1.05e-05, + "loss": 266.7954, "step": 5250 }, { - "epoch": 0.04250195945345389, - "grad_norm": 1877.4952392578125, - "learning_rate": 2.1040000000000002e-05, - "loss": 200.6935, + "epoch": 0.02125106558337407, + "grad_norm": 1437.6041259765625, + "learning_rate": 1.0520000000000001e-05, + "loss": 290.8906, "step": 5260 }, { - "epoch": 0.04258276165773802, - "grad_norm": 1077.92041015625, - "learning_rate": 2.1079999999999998e-05, - "loss": 195.7596, + "epoch": 0.021291466848741704, + "grad_norm": 1116.5682373046875, + "learning_rate": 1.0539999999999999e-05, + "loss": 217.5333, "step": 5270 }, { - "epoch": 0.04266356386202216, - "grad_norm": 2036.39404296875, - "learning_rate": 2.112e-05, - "loss": 171.7718, + "epoch": 0.021331868114109333, + "grad_norm": 674.6525268554688, + "learning_rate": 1.056e-05, + "loss": 262.4274, "step": 5280 }, { - "epoch": 0.042744366066306286, - "grad_norm": 1406.621826171875, - "learning_rate": 2.116e-05, - "loss": 185.7908, + "epoch": 0.021372269379476967, + "grad_norm": 980.0327758789062, + "learning_rate": 1.058e-05, + "loss": 253.3151, "step": 5290 }, { - "epoch": 0.04282516827059042, - "grad_norm": 1543.626220703125, - "learning_rate": 2.12e-05, - "loss": 206.7706, + "epoch": 0.021412670644844596, + "grad_norm": 1546.599609375, + "learning_rate": 1.06e-05, + "loss": 284.0318, "step": 5300 }, { - "epoch": 0.04290597047487456, - "grad_norm": 1605.6107177734375, - "learning_rate": 2.124e-05, - "loss": 255.117, + "epoch": 0.02145307191021223, + "grad_norm": 4072.65478515625, + "learning_rate": 1.062e-05, + "loss": 299.3221, "step": 5310 }, { - "epoch": 0.04298677267915869, - "grad_norm": 1711.4632568359375, - "learning_rate": 2.128e-05, - "loss": 208.9307, + "epoch": 0.02149347317557986, + "grad_norm": 835.0826416015625, + "learning_rate": 1.064e-05, + "loss": 217.7071, "step": 5320 }, { - "epoch": 0.04306757488344282, - "grad_norm": 1183.892578125, - "learning_rate": 2.1320000000000003e-05, - "loss": 227.7505, + "epoch": 0.021533874440947492, + "grad_norm": 825.3660278320312, + "learning_rate": 1.0660000000000001e-05, + "loss": 250.3456, "step": 5330 }, { - "epoch": 0.04314837708772695, - "grad_norm": 722.8084716796875, - "learning_rate": 2.1360000000000002e-05, - "loss": 185.3006, + "epoch": 0.02157427570631512, + "grad_norm": 1196.41455078125, + "learning_rate": 1.0680000000000001e-05, + "loss": 217.1434, "step": 5340 }, { - "epoch": 0.04322917929201109, - "grad_norm": 1443.2947998046875, - "learning_rate": 2.1400000000000002e-05, - "loss": 278.7803, + "epoch": 0.021614676971682754, + "grad_norm": 556.876220703125, + "learning_rate": 1.0700000000000001e-05, + "loss": 239.3144, "step": 5350 }, { - "epoch": 0.04330998149629522, - "grad_norm": 1594.63671875, - "learning_rate": 2.144e-05, - "loss": 249.6102, + "epoch": 0.021655078237050384, + "grad_norm": 602.3602905273438, + "learning_rate": 1.072e-05, + "loss": 308.0275, "step": 5360 }, { - "epoch": 0.043390783700579354, - "grad_norm": 1329.8583984375, - "learning_rate": 2.148e-05, - "loss": 195.4803, + "epoch": 0.021695479502418017, + "grad_norm": 747.0403442382812, + "learning_rate": 1.074e-05, + "loss": 249.5218, "step": 5370 }, { - "epoch": 0.04347158590486348, - "grad_norm": 3100.29248046875, - "learning_rate": 2.152e-05, - "loss": 262.2818, + "epoch": 0.021735880767785647, + "grad_norm": 2065.047607421875, + "learning_rate": 1.076e-05, + "loss": 289.0896, "step": 5380 }, { - "epoch": 0.04355238810914762, - "grad_norm": 962.1038208007812, - "learning_rate": 2.1560000000000004e-05, - "loss": 205.9964, + "epoch": 0.02177628203315328, + "grad_norm": 1931.2476806640625, + "learning_rate": 1.0780000000000002e-05, + "loss": 270.5146, "step": 5390 }, { - "epoch": 0.04363319031343175, - "grad_norm": 955.7030639648438, - "learning_rate": 2.16e-05, - "loss": 202.8865, + "epoch": 0.02181668329852091, + "grad_norm": 5352.14013671875, + "learning_rate": 1.08e-05, + "loss": 330.6276, "step": 5400 }, { - "epoch": 0.043713992517715884, - "grad_norm": 1613.6199951171875, - "learning_rate": 2.1640000000000003e-05, - "loss": 293.355, + "epoch": 0.021857084563888542, + "grad_norm": 750.8521728515625, + "learning_rate": 1.0820000000000001e-05, + "loss": 260.1782, "step": 5410 }, { - "epoch": 0.04379479472200001, - "grad_norm": 1535.706787109375, - "learning_rate": 2.168e-05, - "loss": 237.9405, + "epoch": 0.021897485829256172, + "grad_norm": 1566.5867919921875, + "learning_rate": 1.084e-05, + "loss": 277.7564, "step": 5420 }, { - "epoch": 0.04387559692628415, - "grad_norm": 1049.491455078125, - "learning_rate": 2.1720000000000002e-05, - "loss": 218.4353, + "epoch": 0.021937887094623805, + "grad_norm": 4719.2412109375, + "learning_rate": 1.0860000000000001e-05, + "loss": 270.119, "step": 5430 }, { - "epoch": 0.04395639913056828, - "grad_norm": 1006.45947265625, - "learning_rate": 2.176e-05, - "loss": 231.6826, + "epoch": 0.021978288359991435, + "grad_norm": 886.023681640625, + "learning_rate": 1.088e-05, + "loss": 246.386, "step": 5440 }, { - "epoch": 0.044037201334852415, - "grad_norm": 1085.362548828125, - "learning_rate": 2.18e-05, - "loss": 205.6435, + "epoch": 0.022018689625359068, + "grad_norm": 15289.64453125, + "learning_rate": 1.09e-05, + "loss": 360.9848, "step": 5450 }, { - "epoch": 0.04411800353913655, - "grad_norm": 1096.031494140625, - "learning_rate": 2.184e-05, - "loss": 161.6715, + "epoch": 0.022059090890726697, + "grad_norm": 854.2662353515625, + "learning_rate": 1.092e-05, + "loss": 234.6689, "step": 5460 }, { - "epoch": 0.04419880574342068, - "grad_norm": 2974.1650390625, - "learning_rate": 2.188e-05, - "loss": 198.0445, + "epoch": 0.02209949215609433, + "grad_norm": 1195.251953125, + "learning_rate": 1.094e-05, + "loss": 233.2883, "step": 5470 }, { - "epoch": 0.044279607947704816, - "grad_norm": 1008.624267578125, - "learning_rate": 2.192e-05, - "loss": 141.5528, + "epoch": 0.02213989342146196, + "grad_norm": 2974.2724609375, + "learning_rate": 1.096e-05, + "loss": 256.327, "step": 5480 }, { - "epoch": 0.044360410151988945, - "grad_norm": 1445.2733154296875, - "learning_rate": 2.196e-05, - "loss": 219.4691, + "epoch": 0.022180294686829593, + "grad_norm": 1651.259765625, + "learning_rate": 1.098e-05, + "loss": 258.9452, "step": 5490 }, { - "epoch": 0.04444121235627308, - "grad_norm": 1614.0003662109375, - "learning_rate": 2.2000000000000003e-05, - "loss": 299.5626, + "epoch": 0.022220695952197222, + "grad_norm": 1866.4769287109375, + "learning_rate": 1.1000000000000001e-05, + "loss": 277.225, "step": 5500 }, { - "epoch": 0.04452201456055721, - "grad_norm": 1461.0916748046875, - "learning_rate": 2.2040000000000002e-05, - "loss": 217.8277, + "epoch": 0.022261097217564856, + "grad_norm": 2012.682861328125, + "learning_rate": 1.1020000000000001e-05, + "loss": 213.3877, "step": 5510 }, { - "epoch": 0.044602816764841346, - "grad_norm": 893.2160034179688, - "learning_rate": 2.2080000000000002e-05, - "loss": 229.7792, + "epoch": 0.022301498482932485, + "grad_norm": 787.662353515625, + "learning_rate": 1.1040000000000001e-05, + "loss": 223.892, "step": 5520 }, { - "epoch": 0.044683618969125476, - "grad_norm": 915.8477783203125, - "learning_rate": 2.212e-05, - "loss": 159.9235, + "epoch": 0.022341899748300118, + "grad_norm": 9214.21484375, + "learning_rate": 1.106e-05, + "loss": 217.7184, "step": 5530 }, { - "epoch": 0.04476442117340961, - "grad_norm": 2673.85205078125, - "learning_rate": 2.216e-05, - "loss": 186.4143, + "epoch": 0.022382301013667748, + "grad_norm": 1571.4190673828125, + "learning_rate": 1.108e-05, + "loss": 268.1608, "step": 5540 }, { - "epoch": 0.04484522337769374, - "grad_norm": 1298.6927490234375, - "learning_rate": 2.22e-05, - "loss": 206.1805, + "epoch": 0.02242270227903538, + "grad_norm": 1564.1744384765625, + "learning_rate": 1.11e-05, + "loss": 259.3926, "step": 5550 }, { - "epoch": 0.04492602558197788, - "grad_norm": 1256.119140625, - "learning_rate": 2.224e-05, - "loss": 245.9137, + "epoch": 0.02246310354440301, + "grad_norm": 676.7450561523438, + "learning_rate": 1.112e-05, + "loss": 205.3203, "step": 5560 }, { - "epoch": 0.045006827786262006, - "grad_norm": 3194.141845703125, - "learning_rate": 2.228e-05, - "loss": 195.3358, + "epoch": 0.022503504809770643, + "grad_norm": 1126.58837890625, + "learning_rate": 1.114e-05, + "loss": 127.3151, "step": 5570 }, { - "epoch": 0.04508762999054614, - "grad_norm": 2060.29931640625, - "learning_rate": 2.2320000000000003e-05, - "loss": 356.367, + "epoch": 0.022543906075138273, + "grad_norm": 3336.22607421875, + "learning_rate": 1.1160000000000002e-05, + "loss": 226.2071, "step": 5580 }, { - "epoch": 0.04516843219483028, - "grad_norm": 1192.640869140625, - "learning_rate": 2.236e-05, - "loss": 196.4802, + "epoch": 0.022584307340505906, + "grad_norm": 416.8356018066406, + "learning_rate": 1.118e-05, + "loss": 257.1062, "step": 5590 }, { - "epoch": 0.04524923439911441, - "grad_norm": 1691.9290771484375, - "learning_rate": 2.2400000000000002e-05, - "loss": 259.3116, + "epoch": 0.022624708605873536, + "grad_norm": 653.0436401367188, + "learning_rate": 1.1200000000000001e-05, + "loss": 198.1556, "step": 5600 }, { - "epoch": 0.045330036603398544, - "grad_norm": 1580.9140625, - "learning_rate": 2.244e-05, - "loss": 215.4646, + "epoch": 0.02266510987124117, + "grad_norm": 5374.44091796875, + "learning_rate": 1.122e-05, + "loss": 230.6045, "step": 5610 }, { - "epoch": 0.04541083880768267, - "grad_norm": 1411.131591796875, - "learning_rate": 2.248e-05, - "loss": 237.1604, + "epoch": 0.0227055111366088, + "grad_norm": 1517.1585693359375, + "learning_rate": 1.124e-05, + "loss": 217.2467, "step": 5620 }, { - "epoch": 0.04549164101196681, - "grad_norm": 1059.35205078125, - "learning_rate": 2.252e-05, - "loss": 194.5844, + "epoch": 0.02274591240197643, + "grad_norm": 1149.62646484375, + "learning_rate": 1.126e-05, + "loss": 294.9917, "step": 5630 }, { - "epoch": 0.04557244321625094, - "grad_norm": 2758.700927734375, - "learning_rate": 2.256e-05, - "loss": 236.5781, + "epoch": 0.02278631366734406, + "grad_norm": 1075.8157958984375, + "learning_rate": 1.128e-05, + "loss": 273.2365, "step": 5640 }, { - "epoch": 0.045653245420535074, - "grad_norm": 1209.1309814453125, - "learning_rate": 2.26e-05, - "loss": 224.7173, + "epoch": 0.022826714932711694, + "grad_norm": 1706.23388671875, + "learning_rate": 1.13e-05, + "loss": 239.202, "step": 5650 }, { - "epoch": 0.0457340476248192, - "grad_norm": 975.3015747070312, - "learning_rate": 2.264e-05, - "loss": 239.0923, + "epoch": 0.022867116198079324, + "grad_norm": 939.1124877929688, + "learning_rate": 1.132e-05, + "loss": 262.5748, "step": 5660 }, { - "epoch": 0.04581484982910334, - "grad_norm": 1574.2781982421875, - "learning_rate": 2.268e-05, - "loss": 223.5635, + "epoch": 0.022907517463446957, + "grad_norm": 1178.756591796875, + "learning_rate": 1.134e-05, + "loss": 205.6744, "step": 5670 }, { - "epoch": 0.04589565203338747, - "grad_norm": 1897.4033203125, - "learning_rate": 2.2720000000000003e-05, - "loss": 232.7342, + "epoch": 0.022947918728814586, + "grad_norm": 402.2773132324219, + "learning_rate": 1.1360000000000001e-05, + "loss": 192.2235, "step": 5680 }, { - "epoch": 0.045976454237671605, - "grad_norm": 2824.08642578125, - "learning_rate": 2.2760000000000002e-05, - "loss": 179.1106, + "epoch": 0.02298831999418222, + "grad_norm": 0.0, + "learning_rate": 1.1380000000000001e-05, + "loss": 258.3543, "step": 5690 }, { - "epoch": 0.046057256441955734, - "grad_norm": 1680.9097900390625, - "learning_rate": 2.2800000000000002e-05, - "loss": 195.5614, + "epoch": 0.02302872125954985, + "grad_norm": 1016.2149658203125, + "learning_rate": 1.1400000000000001e-05, + "loss": 217.5345, "step": 5700 }, { - "epoch": 0.04613805864623987, - "grad_norm": 2320.5478515625, - "learning_rate": 2.284e-05, - "loss": 192.4904, + "epoch": 0.023069122524917482, + "grad_norm": 1061.48828125, + "learning_rate": 1.142e-05, + "loss": 227.8594, "step": 5710 }, { - "epoch": 0.046218860850524, - "grad_norm": 1368.416015625, - "learning_rate": 2.288e-05, - "loss": 213.5775, + "epoch": 0.02310952379028511, + "grad_norm": 481.3313293457031, + "learning_rate": 1.144e-05, + "loss": 194.8761, "step": 5720 }, { - "epoch": 0.046299663054808135, - "grad_norm": 1644.1671142578125, - "learning_rate": 2.292e-05, - "loss": 219.5659, + "epoch": 0.023149925055652745, + "grad_norm": 782.8972778320312, + "learning_rate": 1.146e-05, + "loss": 173.4156, "step": 5730 }, { - "epoch": 0.04638046525909227, - "grad_norm": 3038.07177734375, - "learning_rate": 2.296e-05, - "loss": 211.6291, + "epoch": 0.023190326321020374, + "grad_norm": 672.4293212890625, + "learning_rate": 1.148e-05, + "loss": 206.9097, "step": 5740 }, { - "epoch": 0.0464612674633764, - "grad_norm": 841.7382202148438, - "learning_rate": 2.3000000000000003e-05, - "loss": 190.6826, + "epoch": 0.023230727586388007, + "grad_norm": 2217.28369140625, + "learning_rate": 1.1500000000000002e-05, + "loss": 238.6014, "step": 5750 }, { - "epoch": 0.046542069667660536, - "grad_norm": 944.84765625, - "learning_rate": 2.304e-05, - "loss": 207.6021, + "epoch": 0.023271128851755637, + "grad_norm": 3414.3115234375, + "learning_rate": 1.152e-05, + "loss": 324.1321, "step": 5760 }, { - "epoch": 0.046622871871944666, - "grad_norm": 1008.6283569335938, - "learning_rate": 2.3080000000000003e-05, - "loss": 223.9232, + "epoch": 0.02331153011712327, + "grad_norm": 864.6550903320312, + "learning_rate": 1.1540000000000001e-05, + "loss": 213.1406, "step": 5770 }, { - "epoch": 0.0467036740762288, - "grad_norm": 1364.02734375, - "learning_rate": 2.312e-05, - "loss": 305.6256, + "epoch": 0.0233519313824909, + "grad_norm": 752.6487426757812, + "learning_rate": 1.156e-05, + "loss": 136.1037, "step": 5780 }, { - "epoch": 0.04678447628051293, - "grad_norm": 637.5635986328125, - "learning_rate": 2.3160000000000002e-05, - "loss": 173.087, + "epoch": 0.023392332647858533, + "grad_norm": 550.0790405273438, + "learning_rate": 1.1580000000000001e-05, + "loss": 295.7111, "step": 5790 }, { - "epoch": 0.04686527848479707, - "grad_norm": 755.398193359375, - "learning_rate": 2.32e-05, - "loss": 176.3031, + "epoch": 0.023432733913226162, + "grad_norm": 918.3283081054688, + "learning_rate": 1.16e-05, + "loss": 234.3793, "step": 5800 }, { - "epoch": 0.046946080689081196, - "grad_norm": 908.0245971679688, - "learning_rate": 2.324e-05, - "loss": 142.1986, + "epoch": 0.023473135178593795, + "grad_norm": 1468.6207275390625, + "learning_rate": 1.162e-05, + "loss": 191.9201, "step": 5810 }, { - "epoch": 0.04702688289336533, - "grad_norm": 1156.88330078125, - "learning_rate": 2.328e-05, - "loss": 230.5496, + "epoch": 0.023513536443961425, + "grad_norm": 1899.0367431640625, + "learning_rate": 1.164e-05, + "loss": 234.0571, "step": 5820 }, { - "epoch": 0.04710768509764946, - "grad_norm": 1062.1318359375, - "learning_rate": 2.332e-05, - "loss": 168.4109, + "epoch": 0.023553937709329058, + "grad_norm": 684.876953125, + "learning_rate": 1.166e-05, + "loss": 180.286, "step": 5830 }, { - "epoch": 0.0471884873019336, - "grad_norm": 1590.9945068359375, - "learning_rate": 2.336e-05, - "loss": 270.5837, + "epoch": 0.023594338974696687, + "grad_norm": 1083.017578125, + "learning_rate": 1.168e-05, + "loss": 300.8844, "step": 5840 }, { - "epoch": 0.047269289506217727, - "grad_norm": 1591.7802734375, - "learning_rate": 2.3400000000000003e-05, - "loss": 261.2365, + "epoch": 0.02363474024006432, + "grad_norm": 1754.2984619140625, + "learning_rate": 1.1700000000000001e-05, + "loss": 391.4959, "step": 5850 }, { - "epoch": 0.04735009171050186, - "grad_norm": 950.18017578125, - "learning_rate": 2.344e-05, - "loss": 172.519, + "epoch": 0.02367514150543195, + "grad_norm": 945.9032592773438, + "learning_rate": 1.172e-05, + "loss": 272.7161, "step": 5860 }, { - "epoch": 0.047430893914786, - "grad_norm": 1637.9906005859375, - "learning_rate": 2.3480000000000002e-05, - "loss": 186.4084, + "epoch": 0.023715542770799583, + "grad_norm": 1564.7225341796875, + "learning_rate": 1.1740000000000001e-05, + "loss": 233.5665, "step": 5870 }, { - "epoch": 0.04751169611907013, - "grad_norm": 782.1890869140625, - "learning_rate": 2.3520000000000002e-05, - "loss": 208.1517, + "epoch": 0.023755944036167213, + "grad_norm": 523.8766479492188, + "learning_rate": 1.1760000000000001e-05, + "loss": 204.2223, "step": 5880 }, { - "epoch": 0.047592498323354264, - "grad_norm": 1019.51708984375, - "learning_rate": 2.356e-05, - "loss": 189.9457, + "epoch": 0.023796345301534846, + "grad_norm": 1801.8782958984375, + "learning_rate": 1.178e-05, + "loss": 215.9124, "step": 5890 }, { - "epoch": 0.04767330052763839, - "grad_norm": 1477.958740234375, - "learning_rate": 2.36e-05, - "loss": 201.8354, + "epoch": 0.023836746566902475, + "grad_norm": 2132.8076171875, + "learning_rate": 1.18e-05, + "loss": 328.9819, "step": 5900 }, { - "epoch": 0.04775410273192253, - "grad_norm": 912.8268432617188, - "learning_rate": 2.364e-05, - "loss": 201.6409, + "epoch": 0.02387714783227011, + "grad_norm": 1254.7607421875, + "learning_rate": 1.182e-05, + "loss": 249.0257, "step": 5910 }, { - "epoch": 0.04783490493620666, - "grad_norm": 699.692626953125, - "learning_rate": 2.3680000000000004e-05, - "loss": 206.0028, + "epoch": 0.023917549097637738, + "grad_norm": 873.1114501953125, + "learning_rate": 1.1840000000000002e-05, + "loss": 156.6504, "step": 5920 }, { - "epoch": 0.047915707140490794, - "grad_norm": 1725.80908203125, - "learning_rate": 2.372e-05, - "loss": 199.9058, + "epoch": 0.02395795036300537, + "grad_norm": 2675.64892578125, + "learning_rate": 1.186e-05, + "loss": 213.5535, "step": 5930 }, { - "epoch": 0.047996509344774924, - "grad_norm": 2695.423828125, - "learning_rate": 2.3760000000000003e-05, - "loss": 267.4356, + "epoch": 0.023998351628373, + "grad_norm": 744.88671875, + "learning_rate": 1.1880000000000001e-05, + "loss": 189.1653, "step": 5940 }, { - "epoch": 0.04807731154905906, - "grad_norm": 4546.9814453125, - "learning_rate": 2.38e-05, - "loss": 267.5431, + "epoch": 0.024038752893740634, + "grad_norm": 979.5391845703125, + "learning_rate": 1.19e-05, + "loss": 223.2056, "step": 5950 }, { - "epoch": 0.04815811375334319, - "grad_norm": 3411.77880859375, - "learning_rate": 2.3840000000000002e-05, - "loss": 179.9019, + "epoch": 0.024079154159108263, + "grad_norm": 872.666015625, + "learning_rate": 1.1920000000000001e-05, + "loss": 194.5632, "step": 5960 }, { - "epoch": 0.048238915957627325, - "grad_norm": 1259.88232421875, - "learning_rate": 2.3880000000000002e-05, - "loss": 193.8401, + "epoch": 0.024119555424475896, + "grad_norm": 1239.266845703125, + "learning_rate": 1.1940000000000001e-05, + "loss": 221.0386, "step": 5970 }, { - "epoch": 0.048319718161911454, - "grad_norm": 949.0712280273438, - "learning_rate": 2.392e-05, - "loss": 253.2978, + "epoch": 0.024159956689843526, + "grad_norm": 1718.065673828125, + "learning_rate": 1.196e-05, + "loss": 237.8773, "step": 5980 }, { - "epoch": 0.04840052036619559, - "grad_norm": 1587.951904296875, - "learning_rate": 2.396e-05, - "loss": 175.6583, + "epoch": 0.02420035795521116, + "grad_norm": 1780.784423828125, + "learning_rate": 1.198e-05, + "loss": 119.0116, "step": 5990 }, { - "epoch": 0.04848132257047972, - "grad_norm": 559.6181030273438, - "learning_rate": 2.4e-05, - "loss": 237.3928, + "epoch": 0.02424075922057879, + "grad_norm": 921.1470947265625, + "learning_rate": 1.2e-05, + "loss": 225.7212, "step": 6000 }, { - "epoch": 0.048562124774763855, - "grad_norm": 3816.6708984375, - "learning_rate": 2.404e-05, - "loss": 200.5205, + "epoch": 0.02428116048594642, + "grad_norm": 2161.641845703125, + "learning_rate": 1.202e-05, + "loss": 181.6732, "step": 6010 }, { - "epoch": 0.04864292697904799, - "grad_norm": 1783.058837890625, - "learning_rate": 2.408e-05, - "loss": 252.9585, + "epoch": 0.02432156175131405, + "grad_norm": 1104.2542724609375, + "learning_rate": 1.204e-05, + "loss": 187.8781, "step": 6020 }, { - "epoch": 0.04872372918333212, - "grad_norm": 820.045654296875, - "learning_rate": 2.412e-05, - "loss": 205.7776, + "epoch": 0.024361963016681684, + "grad_norm": 1214.7061767578125, + "learning_rate": 1.206e-05, + "loss": 221.4846, "step": 6030 }, { - "epoch": 0.04880453138761626, - "grad_norm": 2375.05078125, - "learning_rate": 2.4160000000000002e-05, - "loss": 180.077, + "epoch": 0.024402364282049314, + "grad_norm": 1222.162841796875, + "learning_rate": 1.2080000000000001e-05, + "loss": 172.2742, "step": 6040 }, { - "epoch": 0.048885333591900386, - "grad_norm": 953.7632446289062, - "learning_rate": 2.4200000000000002e-05, - "loss": 263.9052, + "epoch": 0.024442765547416947, + "grad_norm": 828.8873291015625, + "learning_rate": 1.2100000000000001e-05, + "loss": 227.2261, "step": 6050 }, { - "epoch": 0.04896613579618452, - "grad_norm": 1122.314208984375, - "learning_rate": 2.4240000000000002e-05, - "loss": 252.3065, + "epoch": 0.024483166812784576, + "grad_norm": 621.658447265625, + "learning_rate": 1.2120000000000001e-05, + "loss": 236.3511, "step": 6060 }, { - "epoch": 0.04904693800046865, - "grad_norm": 939.9464111328125, - "learning_rate": 2.428e-05, - "loss": 215.8685, + "epoch": 0.02452356807815221, + "grad_norm": 1036.8626708984375, + "learning_rate": 1.214e-05, + "loss": 233.8868, "step": 6070 }, { - "epoch": 0.04912774020475279, - "grad_norm": 2931.0908203125, - "learning_rate": 2.432e-05, - "loss": 249.0549, + "epoch": 0.02456396934351984, + "grad_norm": 1811.7554931640625, + "learning_rate": 1.216e-05, + "loss": 238.5441, "step": 6080 }, { - "epoch": 0.049208542409036916, - "grad_norm": 1206.418212890625, - "learning_rate": 2.4360000000000004e-05, - "loss": 240.7815, + "epoch": 0.02460437060888747, + "grad_norm": 1012.0717163085938, + "learning_rate": 1.2180000000000002e-05, + "loss": 234.7135, "step": 6090 }, { - "epoch": 0.04928934461332105, - "grad_norm": 1684.1275634765625, - "learning_rate": 2.44e-05, - "loss": 205.4823, + "epoch": 0.0246447718742551, + "grad_norm": 1480.242919921875, + "learning_rate": 1.22e-05, + "loss": 275.5092, "step": 6100 }, { - "epoch": 0.04937014681760518, - "grad_norm": 1188.1837158203125, - "learning_rate": 2.4440000000000003e-05, - "loss": 164.9968, + "epoch": 0.02468517313962273, + "grad_norm": 1884.02001953125, + "learning_rate": 1.2220000000000002e-05, + "loss": 242.7474, "step": 6110 }, { - "epoch": 0.04945094902188932, - "grad_norm": 1337.2325439453125, - "learning_rate": 2.448e-05, - "loss": 252.8773, + "epoch": 0.024725574404990364, + "grad_norm": 1144.38916015625, + "learning_rate": 1.224e-05, + "loss": 368.5085, "step": 6120 }, { - "epoch": 0.04953175122617345, - "grad_norm": 881.7661743164062, - "learning_rate": 2.4520000000000002e-05, - "loss": 261.2885, + "epoch": 0.024765975670357994, + "grad_norm": 1017.46875, + "learning_rate": 1.2260000000000001e-05, + "loss": 224.2549, "step": 6130 }, { - "epoch": 0.04961255343045758, - "grad_norm": 4394.3662109375, - "learning_rate": 2.4560000000000002e-05, - "loss": 202.9664, + "epoch": 0.024806376935725627, + "grad_norm": 1646.821044921875, + "learning_rate": 1.2280000000000001e-05, + "loss": 221.9995, "step": 6140 }, { - "epoch": 0.04969335563474172, - "grad_norm": 814.0568237304688, - "learning_rate": 2.46e-05, - "loss": 176.6503, + "epoch": 0.024846778201093257, + "grad_norm": 719.86376953125, + "learning_rate": 1.23e-05, + "loss": 221.8839, "step": 6150 }, { - "epoch": 0.04977415783902585, - "grad_norm": 1262.113525390625, - "learning_rate": 2.464e-05, - "loss": 175.006, + "epoch": 0.02488717946646089, + "grad_norm": 865.7079467773438, + "learning_rate": 1.232e-05, + "loss": 216.4899, "step": 6160 }, { - "epoch": 0.049854960043309984, - "grad_norm": 1184.9598388671875, - "learning_rate": 2.468e-05, - "loss": 263.4592, + "epoch": 0.02492758073182852, + "grad_norm": 837.6893310546875, + "learning_rate": 1.234e-05, + "loss": 212.6082, "step": 6170 }, { - "epoch": 0.049935762247594113, - "grad_norm": 1579.47802734375, - "learning_rate": 2.472e-05, - "loss": 220.0892, + "epoch": 0.024967981997196152, + "grad_norm": 619.9283447265625, + "learning_rate": 1.236e-05, + "loss": 231.417, "step": 6180 }, { - "epoch": 0.05001656445187825, - "grad_norm": 1813.8853759765625, - "learning_rate": 2.476e-05, - "loss": 228.5739, + "epoch": 0.025008383262563782, + "grad_norm": 938.736328125, + "learning_rate": 1.238e-05, + "loss": 222.0059, "step": 6190 }, { - "epoch": 0.05009736665616238, - "grad_norm": 4214.5849609375, - "learning_rate": 2.48e-05, - "loss": 308.5187, + "epoch": 0.025048784527931415, + "grad_norm": 768.3204956054688, + "learning_rate": 1.24e-05, + "loss": 269.6617, "step": 6200 }, { - "epoch": 0.050178168860446515, - "grad_norm": 1495.0234375, - "learning_rate": 2.4840000000000003e-05, - "loss": 281.6624, + "epoch": 0.025089185793299044, + "grad_norm": 25211.0859375, + "learning_rate": 1.2420000000000001e-05, + "loss": 283.9776, "step": 6210 }, { - "epoch": 0.050258971064730644, - "grad_norm": 3596.14990234375, - "learning_rate": 2.488e-05, - "loss": 293.9031, + "epoch": 0.025129587058666678, + "grad_norm": 1000.73046875, + "learning_rate": 1.244e-05, + "loss": 237.4733, "step": 6220 }, { - "epoch": 0.05033977326901478, - "grad_norm": 966.3616943359375, - "learning_rate": 2.4920000000000002e-05, - "loss": 165.7466, + "epoch": 0.025169988324034307, + "grad_norm": 1383.859375, + "learning_rate": 1.2460000000000001e-05, + "loss": 207.5625, "step": 6230 }, { - "epoch": 0.05042057547329891, - "grad_norm": 1551.99951171875, - "learning_rate": 2.496e-05, - "loss": 214.3451, + "epoch": 0.02521038958940194, + "grad_norm": 1157.95654296875, + "learning_rate": 1.248e-05, + "loss": 188.4448, "step": 6240 }, { - "epoch": 0.050501377677583045, - "grad_norm": 790.653076171875, - "learning_rate": 2.5e-05, - "loss": 191.0055, + "epoch": 0.02525079085476957, + "grad_norm": 437.2555847167969, + "learning_rate": 1.25e-05, + "loss": 253.4308, "step": 6250 }, { - "epoch": 0.050582179881867174, - "grad_norm": 3222.141357421875, - "learning_rate": 2.504e-05, - "loss": 225.3768, + "epoch": 0.025291192120137203, + "grad_norm": 735.6442260742188, + "learning_rate": 1.252e-05, + "loss": 239.4786, "step": 6260 }, { - "epoch": 0.05066298208615131, - "grad_norm": 2753.572998046875, - "learning_rate": 2.5080000000000004e-05, - "loss": 215.5454, + "epoch": 0.025331593385504832, + "grad_norm": 3905.33935546875, + "learning_rate": 1.2540000000000002e-05, + "loss": 340.7768, "step": 6270 }, { - "epoch": 0.05074378429043544, - "grad_norm": 1500.980224609375, - "learning_rate": 2.512e-05, - "loss": 201.8538, + "epoch": 0.025371994650872465, + "grad_norm": 1644.240234375, + "learning_rate": 1.256e-05, + "loss": 221.2143, "step": 6280 }, { - "epoch": 0.050824586494719576, - "grad_norm": 578.8954467773438, - "learning_rate": 2.516e-05, - "loss": 283.2478, + "epoch": 0.025412395916240095, + "grad_norm": 745.5984497070312, + "learning_rate": 1.258e-05, + "loss": 154.9017, "step": 6290 }, { - "epoch": 0.05090538869900371, - "grad_norm": 917.5217895507812, - "learning_rate": 2.5200000000000003e-05, - "loss": 196.4676, + "epoch": 0.025452797181607728, + "grad_norm": 2822.624267578125, + "learning_rate": 1.2600000000000001e-05, + "loss": 193.9914, "step": 6300 }, { - "epoch": 0.05098619090328784, - "grad_norm": 1330.8582763671875, - "learning_rate": 2.5240000000000002e-05, - "loss": 242.8309, + "epoch": 0.025493198446975358, + "grad_norm": 415.230712890625, + "learning_rate": 1.2620000000000001e-05, + "loss": 150.345, "step": 6310 }, { - "epoch": 0.05106699310757198, - "grad_norm": 879.3396606445312, - "learning_rate": 2.5280000000000005e-05, - "loss": 220.3776, + "epoch": 0.02553359971234299, + "grad_norm": 2347.072021484375, + "learning_rate": 1.2640000000000003e-05, + "loss": 239.6525, "step": 6320 }, { - "epoch": 0.051147795311856106, - "grad_norm": 1127.9619140625, - "learning_rate": 2.5319999999999998e-05, - "loss": 236.0791, + "epoch": 0.02557400097771062, + "grad_norm": 1561.8658447265625, + "learning_rate": 1.2659999999999999e-05, + "loss": 288.6605, "step": 6330 }, { - "epoch": 0.05122859751614024, - "grad_norm": 1192.15283203125, - "learning_rate": 2.536e-05, - "loss": 202.5739, + "epoch": 0.025614402243078253, + "grad_norm": 651.1387329101562, + "learning_rate": 1.268e-05, + "loss": 266.1772, "step": 6340 }, { - "epoch": 0.05130939972042437, - "grad_norm": 750.57763671875, - "learning_rate": 2.54e-05, - "loss": 202.7816, + "epoch": 0.025654803508445883, + "grad_norm": 544.3804931640625, + "learning_rate": 1.27e-05, + "loss": 273.7383, "step": 6350 }, { - "epoch": 0.05139020192470851, - "grad_norm": 1037.020263671875, - "learning_rate": 2.5440000000000004e-05, - "loss": 151.2522, + "epoch": 0.025695204773813516, + "grad_norm": 1525.947021484375, + "learning_rate": 1.2720000000000002e-05, + "loss": 251.1667, "step": 6360 }, { - "epoch": 0.05147100412899264, - "grad_norm": 1189.6025390625, - "learning_rate": 2.5480000000000003e-05, - "loss": 200.954, + "epoch": 0.025735606039181146, + "grad_norm": 1018.3317260742188, + "learning_rate": 1.2740000000000002e-05, + "loss": 210.4547, "step": 6370 }, { - "epoch": 0.05155180633327677, - "grad_norm": 641.6224365234375, - "learning_rate": 2.552e-05, - "loss": 193.1741, + "epoch": 0.02577600730454878, + "grad_norm": 911.9342651367188, + "learning_rate": 1.276e-05, + "loss": 278.7565, "step": 6380 }, { - "epoch": 0.0516326085375609, - "grad_norm": 1870.927978515625, - "learning_rate": 2.556e-05, - "loss": 248.4198, + "epoch": 0.025816408569916408, + "grad_norm": 3710.921630859375, + "learning_rate": 1.278e-05, + "loss": 229.9252, "step": 6390 }, { - "epoch": 0.05171341074184504, - "grad_norm": 937.3013916015625, - "learning_rate": 2.5600000000000002e-05, - "loss": 246.1624, + "epoch": 0.02585680983528404, + "grad_norm": 709.6052856445312, + "learning_rate": 1.2800000000000001e-05, + "loss": 197.896, "step": 6400 }, { - "epoch": 0.05179421294612917, - "grad_norm": 1138.7061767578125, - "learning_rate": 2.5640000000000002e-05, - "loss": 238.6282, + "epoch": 0.02589721110065167, + "grad_norm": 688.3367919921875, + "learning_rate": 1.2820000000000001e-05, + "loss": 205.0635, "step": 6410 }, { - "epoch": 0.0518750151504133, - "grad_norm": 884.419677734375, - "learning_rate": 2.5679999999999998e-05, - "loss": 230.1365, + "epoch": 0.025937612366019304, + "grad_norm": 1232.222412109375, + "learning_rate": 1.2839999999999999e-05, + "loss": 196.4835, "step": 6420 }, { - "epoch": 0.05195581735469744, - "grad_norm": 1117.8089599609375, - "learning_rate": 2.572e-05, - "loss": 167.4644, + "epoch": 0.025978013631386934, + "grad_norm": 665.0555419921875, + "learning_rate": 1.286e-05, + "loss": 211.4123, "step": 6430 }, { - "epoch": 0.05203661955898157, - "grad_norm": 1291.2059326171875, - "learning_rate": 2.576e-05, - "loss": 211.082, + "epoch": 0.026018414896754567, + "grad_norm": 2699.49755859375, + "learning_rate": 1.288e-05, + "loss": 266.4242, "step": 6440 }, { - "epoch": 0.052117421763265705, - "grad_norm": 1122.94287109375, - "learning_rate": 2.58e-05, - "loss": 197.854, + "epoch": 0.026058816162122196, + "grad_norm": 1464.1005859375, + "learning_rate": 1.29e-05, + "loss": 271.645, "step": 6450 }, { - "epoch": 0.052198223967549834, - "grad_norm": 1405.987060546875, - "learning_rate": 2.5840000000000003e-05, - "loss": 212.8458, + "epoch": 0.02609921742748983, + "grad_norm": 605.3104858398438, + "learning_rate": 1.2920000000000002e-05, + "loss": 158.6754, "step": 6460 }, { - "epoch": 0.05227902617183397, - "grad_norm": 2955.275390625, - "learning_rate": 2.588e-05, - "loss": 222.567, + "epoch": 0.02613961869285746, + "grad_norm": 643.6430053710938, + "learning_rate": 1.294e-05, + "loss": 246.8146, "step": 6470 }, { - "epoch": 0.0523598283761181, - "grad_norm": 1544.647216796875, - "learning_rate": 2.592e-05, - "loss": 257.0431, + "epoch": 0.026180019958225092, + "grad_norm": 780.9293823242188, + "learning_rate": 1.296e-05, + "loss": 270.3225, "step": 6480 }, { - "epoch": 0.052440630580402235, - "grad_norm": 1595.875732421875, - "learning_rate": 2.5960000000000002e-05, - "loss": 170.6415, + "epoch": 0.02622042122359272, + "grad_norm": 2428.1328125, + "learning_rate": 1.2980000000000001e-05, + "loss": 218.6574, "step": 6490 }, { - "epoch": 0.052521432784686364, - "grad_norm": 1119.1539306640625, - "learning_rate": 2.6000000000000002e-05, - "loss": 192.6813, + "epoch": 0.026260822488960354, + "grad_norm": 737.8549194335938, + "learning_rate": 1.3000000000000001e-05, + "loss": 241.2201, "step": 6500 }, { - "epoch": 0.0526022349889705, - "grad_norm": 878.3660278320312, - "learning_rate": 2.6040000000000005e-05, - "loss": 238.7794, + "epoch": 0.026301223754327984, + "grad_norm": 690.761474609375, + "learning_rate": 1.3020000000000002e-05, + "loss": 215.8646, "step": 6510 }, { - "epoch": 0.05268303719325463, - "grad_norm": 1567.8265380859375, - "learning_rate": 2.6079999999999998e-05, - "loss": 190.1196, + "epoch": 0.026341625019695617, + "grad_norm": 693.7470092773438, + "learning_rate": 1.3039999999999999e-05, + "loss": 226.2056, "step": 6520 }, { - "epoch": 0.052763839397538766, - "grad_norm": 932.3654174804688, - "learning_rate": 2.612e-05, - "loss": 169.4979, + "epoch": 0.026382026285063247, + "grad_norm": 1125.502197265625, + "learning_rate": 1.306e-05, + "loss": 250.0323, "step": 6530 }, { - "epoch": 0.052844641601822895, - "grad_norm": 1552.8302001953125, - "learning_rate": 2.616e-05, - "loss": 263.506, + "epoch": 0.02642242755043088, + "grad_norm": 3172.219482421875, + "learning_rate": 1.308e-05, + "loss": 247.7785, "step": 6540 }, { - "epoch": 0.05292544380610703, - "grad_norm": 1410.755859375, - "learning_rate": 2.6200000000000003e-05, - "loss": 155.1898, + "epoch": 0.02646282881579851, + "grad_norm": 1940.4234619140625, + "learning_rate": 1.3100000000000002e-05, + "loss": 178.4448, "step": 6550 }, { - "epoch": 0.05300624601039116, - "grad_norm": 1333.462890625, - "learning_rate": 2.6240000000000003e-05, - "loss": 191.1265, + "epoch": 0.026503230081166142, + "grad_norm": 632.4578857421875, + "learning_rate": 1.3120000000000001e-05, + "loss": 206.7619, "step": 6560 }, { - "epoch": 0.053087048214675296, - "grad_norm": 1367.94970703125, - "learning_rate": 2.628e-05, - "loss": 237.1708, + "epoch": 0.026543631346533772, + "grad_norm": 1491.89306640625, + "learning_rate": 1.314e-05, + "loss": 164.1459, "step": 6570 }, { - "epoch": 0.05316785041895943, - "grad_norm": 1676.188720703125, - "learning_rate": 2.632e-05, - "loss": 242.8356, + "epoch": 0.026584032611901405, + "grad_norm": 831.048828125, + "learning_rate": 1.316e-05, + "loss": 202.0544, "step": 6580 }, { - "epoch": 0.05324865262324356, - "grad_norm": 1407.1265869140625, - "learning_rate": 2.6360000000000002e-05, - "loss": 260.0956, + "epoch": 0.026624433877269035, + "grad_norm": 812.6109619140625, + "learning_rate": 1.3180000000000001e-05, + "loss": 127.2249, "step": 6590 }, { - "epoch": 0.0533294548275277, - "grad_norm": 1252.400634765625, - "learning_rate": 2.64e-05, - "loss": 151.9722, + "epoch": 0.026664835142636668, + "grad_norm": 678.7022094726562, + "learning_rate": 1.32e-05, + "loss": 262.2538, "step": 6600 }, { - "epoch": 0.05341025703181183, - "grad_norm": 955.1424560546875, - "learning_rate": 2.6440000000000004e-05, - "loss": 236.3884, + "epoch": 0.026705236408004297, + "grad_norm": 1193.4014892578125, + "learning_rate": 1.3220000000000002e-05, + "loss": 258.1862, "step": 6610 }, { - "epoch": 0.05349105923609596, - "grad_norm": 798.6595458984375, - "learning_rate": 2.648e-05, - "loss": 210.5857, + "epoch": 0.02674563767337193, + "grad_norm": 881.2777099609375, + "learning_rate": 1.324e-05, + "loss": 181.1106, "step": 6620 }, { - "epoch": 0.05357186144038009, - "grad_norm": 829.872802734375, - "learning_rate": 2.652e-05, - "loss": 203.776, + "epoch": 0.02678603893873956, + "grad_norm": 751.0634765625, + "learning_rate": 1.326e-05, + "loss": 328.8563, "step": 6630 }, { - "epoch": 0.05365266364466423, - "grad_norm": 1118.2623291015625, - "learning_rate": 2.6560000000000003e-05, - "loss": 179.6126, + "epoch": 0.026826440204107193, + "grad_norm": 810.8790893554688, + "learning_rate": 1.3280000000000002e-05, + "loss": 175.6053, "step": 6640 }, { - "epoch": 0.05373346584894836, - "grad_norm": 2747.0791015625, - "learning_rate": 2.6600000000000003e-05, - "loss": 203.2282, + "epoch": 0.026866841469474823, + "grad_norm": 544.2944946289062, + "learning_rate": 1.3300000000000001e-05, + "loss": 172.9304, "step": 6650 }, { - "epoch": 0.05381426805323249, - "grad_norm": 736.2688598632812, - "learning_rate": 2.6640000000000002e-05, - "loss": 153.964, + "epoch": 0.026907242734842456, + "grad_norm": 1003.13818359375, + "learning_rate": 1.3320000000000001e-05, + "loss": 180.5678, "step": 6660 }, { - "epoch": 0.05389507025751662, - "grad_norm": 3286.93798828125, - "learning_rate": 2.668e-05, - "loss": 260.0093, + "epoch": 0.026947644000210085, + "grad_norm": 1979.7833251953125, + "learning_rate": 1.334e-05, + "loss": 212.5934, "step": 6670 }, { - "epoch": 0.05397587246180076, - "grad_norm": 1144.1749267578125, - "learning_rate": 2.672e-05, - "loss": 242.2994, + "epoch": 0.026988045265577718, + "grad_norm": 660.824462890625, + "learning_rate": 1.336e-05, + "loss": 170.068, "step": 6680 }, { - "epoch": 0.05405667466608489, - "grad_norm": 1726.44921875, - "learning_rate": 2.676e-05, - "loss": 198.0787, + "epoch": 0.027028446530945348, + "grad_norm": 753.3848876953125, + "learning_rate": 1.338e-05, + "loss": 226.8918, "step": 6690 }, { - "epoch": 0.054137476870369024, - "grad_norm": 991.6690063476562, - "learning_rate": 2.6800000000000004e-05, - "loss": 161.0064, + "epoch": 0.02706884779631298, + "grad_norm": 1572.1107177734375, + "learning_rate": 1.3400000000000002e-05, + "loss": 285.1505, "step": 6700 }, { - "epoch": 0.05421827907465316, - "grad_norm": 861.3410034179688, - "learning_rate": 2.6840000000000004e-05, - "loss": 220.6545, + "epoch": 0.02710924906168061, + "grad_norm": 727.326416015625, + "learning_rate": 1.3420000000000002e-05, + "loss": 259.9044, "step": 6710 }, { - "epoch": 0.05429908127893729, - "grad_norm": 1375.5999755859375, - "learning_rate": 2.688e-05, - "loss": 183.2657, + "epoch": 0.027149650327048244, + "grad_norm": 1423.973388671875, + "learning_rate": 1.344e-05, + "loss": 208.3222, "step": 6720 }, { - "epoch": 0.054379883483221425, - "grad_norm": 1730.3709716796875, - "learning_rate": 2.692e-05, - "loss": 198.2674, + "epoch": 0.027190051592415873, + "grad_norm": 3651.573974609375, + "learning_rate": 1.346e-05, + "loss": 276.9352, "step": 6730 }, { - "epoch": 0.054460685687505554, - "grad_norm": 1114.51123046875, - "learning_rate": 2.6960000000000003e-05, - "loss": 222.0974, + "epoch": 0.027230452857783506, + "grad_norm": 986.3583984375, + "learning_rate": 1.3480000000000001e-05, + "loss": 233.0887, "step": 6740 }, { - "epoch": 0.05454148789178969, - "grad_norm": 1189.2464599609375, - "learning_rate": 2.7000000000000002e-05, - "loss": 234.9144, + "epoch": 0.027270854123151136, + "grad_norm": 1490.3424072265625, + "learning_rate": 1.3500000000000001e-05, + "loss": 308.2051, "step": 6750 }, { - "epoch": 0.05462229009607382, - "grad_norm": 722.0228271484375, - "learning_rate": 2.704e-05, - "loss": 171.5676, + "epoch": 0.02731125538851877, + "grad_norm": 1850.4970703125, + "learning_rate": 1.352e-05, + "loss": 220.4273, "step": 6760 }, { - "epoch": 0.054703092300357956, - "grad_norm": 1166.3243408203125, - "learning_rate": 2.7079999999999998e-05, - "loss": 181.6437, + "epoch": 0.0273516566538864, + "grad_norm": 558.9097900390625, + "learning_rate": 1.3539999999999999e-05, + "loss": 192.5242, "step": 6770 }, { - "epoch": 0.054783894504642085, - "grad_norm": 1161.74365234375, - "learning_rate": 2.712e-05, - "loss": 204.9618, + "epoch": 0.02739205791925403, + "grad_norm": 1742.455322265625, + "learning_rate": 1.356e-05, + "loss": 228.9638, "step": 6780 }, { - "epoch": 0.05486469670892622, - "grad_norm": 1748.9061279296875, - "learning_rate": 2.716e-05, - "loss": 194.0629, + "epoch": 0.02743245918462166, + "grad_norm": 6109.70947265625, + "learning_rate": 1.358e-05, + "loss": 218.1945, "step": 6790 }, { - "epoch": 0.05494549891321035, - "grad_norm": 1267.5760498046875, - "learning_rate": 2.7200000000000004e-05, - "loss": 183.3097, + "epoch": 0.027472860449989294, + "grad_norm": 2294.8359375, + "learning_rate": 1.3600000000000002e-05, + "loss": 259.6226, "step": 6800 }, { - "epoch": 0.055026301117494486, - "grad_norm": 648.74072265625, - "learning_rate": 2.724e-05, - "loss": 236.883, + "epoch": 0.027513261715356924, + "grad_norm": 1345.371826171875, + "learning_rate": 1.362e-05, + "loss": 208.2849, "step": 6810 }, { - "epoch": 0.055107103321778615, - "grad_norm": 1128.281005859375, - "learning_rate": 2.728e-05, - "loss": 198.3027, + "epoch": 0.027553662980724557, + "grad_norm": 631.337158203125, + "learning_rate": 1.364e-05, + "loss": 174.1924, "step": 6820 }, { - "epoch": 0.05518790552606275, - "grad_norm": 4471.0068359375, - "learning_rate": 2.7320000000000003e-05, - "loss": 230.4627, + "epoch": 0.027594064246092186, + "grad_norm": 2324.395751953125, + "learning_rate": 1.3660000000000001e-05, + "loss": 217.651, "step": 6830 }, { - "epoch": 0.05526870773034688, - "grad_norm": 1089.693115234375, - "learning_rate": 2.7360000000000002e-05, - "loss": 153.7902, + "epoch": 0.02763446551145982, + "grad_norm": 1169.88916015625, + "learning_rate": 1.3680000000000001e-05, + "loss": 152.5202, "step": 6840 }, { - "epoch": 0.05534950993463102, - "grad_norm": 1391.3704833984375, - "learning_rate": 2.7400000000000002e-05, - "loss": 198.1104, + "epoch": 0.02767486677682745, + "grad_norm": 580.0879516601562, + "learning_rate": 1.3700000000000001e-05, + "loss": 260.7849, "step": 6850 }, { - "epoch": 0.05543031213891515, - "grad_norm": 1094.4219970703125, - "learning_rate": 2.7439999999999998e-05, - "loss": 199.6773, + "epoch": 0.027715268042195082, + "grad_norm": 1336.279052734375, + "learning_rate": 1.3719999999999999e-05, + "loss": 184.8877, "step": 6860 }, { - "epoch": 0.05551111434319928, - "grad_norm": 1793.90283203125, - "learning_rate": 2.748e-05, - "loss": 205.4606, + "epoch": 0.02775566930756271, + "grad_norm": 1506.610107421875, + "learning_rate": 1.374e-05, + "loss": 236.7283, "step": 6870 }, { - "epoch": 0.05559191654748342, - "grad_norm": 1508.5858154296875, - "learning_rate": 2.752e-05, - "loss": 189.9783, + "epoch": 0.027796070572930345, + "grad_norm": 1100.15234375, + "learning_rate": 1.376e-05, + "loss": 263.8396, "step": 6880 }, { - "epoch": 0.05567271875176755, - "grad_norm": 983.4999389648438, - "learning_rate": 2.7560000000000004e-05, - "loss": 170.2493, + "epoch": 0.027836471838297974, + "grad_norm": 794.3699340820312, + "learning_rate": 1.3780000000000002e-05, + "loss": 290.786, "step": 6890 }, { - "epoch": 0.05575352095605168, - "grad_norm": 1173.949951171875, - "learning_rate": 2.7600000000000003e-05, - "loss": 202.6379, + "epoch": 0.027876873103665607, + "grad_norm": 2009.521240234375, + "learning_rate": 1.3800000000000002e-05, + "loss": 173.6581, "step": 6900 }, { - "epoch": 0.05583432316033581, - "grad_norm": 1271.8878173828125, - "learning_rate": 2.764e-05, - "loss": 173.9615, + "epoch": 0.027917274369033237, + "grad_norm": 1658.059814453125, + "learning_rate": 1.382e-05, + "loss": 210.0458, "step": 6910 }, { - "epoch": 0.05591512536461995, - "grad_norm": 1763.315185546875, - "learning_rate": 2.768e-05, - "loss": 204.1505, + "epoch": 0.02795767563440087, + "grad_norm": 632.90869140625, + "learning_rate": 1.384e-05, + "loss": 179.4226, "step": 6920 }, { - "epoch": 0.05599592756890408, - "grad_norm": 1689.142333984375, - "learning_rate": 2.7720000000000002e-05, - "loss": 201.5777, + "epoch": 0.0279980768997685, + "grad_norm": 1309.6959228515625, + "learning_rate": 1.3860000000000001e-05, + "loss": 168.1927, "step": 6930 }, { - "epoch": 0.056076729773188214, - "grad_norm": 1182.48193359375, - "learning_rate": 2.7760000000000002e-05, - "loss": 220.7724, + "epoch": 0.028038478165136133, + "grad_norm": 1135.08935546875, + "learning_rate": 1.3880000000000001e-05, + "loss": 189.0599, "step": 6940 }, { - "epoch": 0.05615753197747234, - "grad_norm": 1887.9561767578125, - "learning_rate": 2.7800000000000005e-05, - "loss": 126.6208, + "epoch": 0.028078879430503762, + "grad_norm": 642.1088256835938, + "learning_rate": 1.3900000000000002e-05, + "loss": 191.8304, "step": 6950 }, { - "epoch": 0.05623833418175648, - "grad_norm": 2466.075927734375, - "learning_rate": 2.7839999999999998e-05, - "loss": 182.4701, + "epoch": 0.028119280695871395, + "grad_norm": 1067.7398681640625, + "learning_rate": 1.3919999999999999e-05, + "loss": 153.804, "step": 6960 }, { - "epoch": 0.05631913638604061, - "grad_norm": 1341.1826171875, - "learning_rate": 2.788e-05, - "loss": 168.0293, + "epoch": 0.028159681961239025, + "grad_norm": 0.0, + "learning_rate": 1.394e-05, + "loss": 131.2499, "step": 6970 }, { - "epoch": 0.056399938590324744, - "grad_norm": 876.8120727539062, - "learning_rate": 2.792e-05, - "loss": 230.6344, + "epoch": 0.028200083226606658, + "grad_norm": 4239.3876953125, + "learning_rate": 1.396e-05, + "loss": 219.0053, "step": 6980 }, { - "epoch": 0.05648074079460888, - "grad_norm": 1114.42041015625, - "learning_rate": 2.7960000000000003e-05, - "loss": 182.8752, + "epoch": 0.028240484491974287, + "grad_norm": 1219.2093505859375, + "learning_rate": 1.3980000000000002e-05, + "loss": 216.1493, "step": 6990 }, { - "epoch": 0.05656154299889301, - "grad_norm": 1265.07861328125, - "learning_rate": 2.8000000000000003e-05, - "loss": 184.1162, + "epoch": 0.02828088575734192, + "grad_norm": 829.5226440429688, + "learning_rate": 1.4000000000000001e-05, + "loss": 197.1629, "step": 7000 }, { - "epoch": 0.056642345203177145, - "grad_norm": 983.197509765625, - "learning_rate": 2.804e-05, - "loss": 224.6468, + "epoch": 0.02832128702270955, + "grad_norm": 783.1107177734375, + "learning_rate": 1.402e-05, + "loss": 197.8331, "step": 7010 }, { - "epoch": 0.056723147407461275, - "grad_norm": 1237.772216796875, - "learning_rate": 2.8080000000000002e-05, - "loss": 186.0648, + "epoch": 0.028361688288077183, + "grad_norm": 1040.990966796875, + "learning_rate": 1.4040000000000001e-05, + "loss": 234.4111, "step": 7020 }, { - "epoch": 0.05680394961174541, - "grad_norm": 1407.0535888671875, - "learning_rate": 2.8120000000000002e-05, - "loss": 214.665, + "epoch": 0.028402089553444813, + "grad_norm": 952.650390625, + "learning_rate": 1.4060000000000001e-05, + "loss": 138.1043, "step": 7030 }, { - "epoch": 0.05688475181602954, - "grad_norm": 831.7734985351562, - "learning_rate": 2.816e-05, - "loss": 201.0985, + "epoch": 0.028442490818812446, + "grad_norm": 2120.21337890625, + "learning_rate": 1.408e-05, + "loss": 190.9683, "step": 7040 }, { - "epoch": 0.056965554020313676, - "grad_norm": 2256.924560546875, - "learning_rate": 2.8199999999999998e-05, - "loss": 203.6, + "epoch": 0.028482892084180075, + "grad_norm": 1032.8970947265625, + "learning_rate": 1.4099999999999999e-05, + "loss": 211.108, "step": 7050 }, { - "epoch": 0.057046356224597805, - "grad_norm": 1117.63134765625, - "learning_rate": 2.824e-05, - "loss": 169.7859, + "epoch": 0.02852329334954771, + "grad_norm": 845.619384765625, + "learning_rate": 1.412e-05, + "loss": 249.2893, "step": 7060 }, { - "epoch": 0.05712715842888194, - "grad_norm": 1881.6373291015625, - "learning_rate": 2.828e-05, - "loss": 253.7371, + "epoch": 0.028563694614915338, + "grad_norm": 2072.714599609375, + "learning_rate": 1.414e-05, + "loss": 148.9613, "step": 7070 }, { - "epoch": 0.05720796063316607, - "grad_norm": 641.18896484375, - "learning_rate": 2.8320000000000003e-05, - "loss": 166.879, + "epoch": 0.02860409588028297, + "grad_norm": 1205.255615234375, + "learning_rate": 1.4160000000000002e-05, + "loss": 208.2003, "step": 7080 }, { - "epoch": 0.057288762837450206, - "grad_norm": 1205.0986328125, - "learning_rate": 2.8360000000000003e-05, - "loss": 176.0333, + "epoch": 0.0286444971456506, + "grad_norm": 1514.6177978515625, + "learning_rate": 1.4180000000000001e-05, + "loss": 211.4245, "step": 7090 }, { - "epoch": 0.057369565041734336, - "grad_norm": 760.34228515625, - "learning_rate": 2.84e-05, - "loss": 158.4413, + "epoch": 0.028684898411018234, + "grad_norm": 599.4893188476562, + "learning_rate": 1.42e-05, + "loss": 270.9207, "step": 7100 }, { - "epoch": 0.05745036724601847, - "grad_norm": 813.41015625, - "learning_rate": 2.844e-05, - "loss": 150.5683, + "epoch": 0.028725299676385863, + "grad_norm": 622.9778442382812, + "learning_rate": 1.422e-05, + "loss": 248.9024, "step": 7110 }, { - "epoch": 0.0575311694503026, - "grad_norm": 3718.865234375, - "learning_rate": 2.8480000000000002e-05, - "loss": 205.5308, + "epoch": 0.028765700941753496, + "grad_norm": 827.0371704101562, + "learning_rate": 1.4240000000000001e-05, + "loss": 228.1889, "step": 7120 }, { - "epoch": 0.05761197165458674, - "grad_norm": 1168.968505859375, - "learning_rate": 2.852e-05, - "loss": 218.9724, + "epoch": 0.028806102207121126, + "grad_norm": 1157.1502685546875, + "learning_rate": 1.426e-05, + "loss": 181.2518, "step": 7130 }, { - "epoch": 0.05769277385887087, - "grad_norm": 1637.9498291015625, - "learning_rate": 2.8560000000000004e-05, - "loss": 197.8596, + "epoch": 0.02884650347248876, + "grad_norm": 1035.8681640625, + "learning_rate": 1.4280000000000002e-05, + "loss": 223.2644, "step": 7140 }, { - "epoch": 0.057773576063155, - "grad_norm": 1739.265380859375, - "learning_rate": 2.86e-05, - "loss": 300.0065, + "epoch": 0.02888690473785639, + "grad_norm": 1198.443359375, + "learning_rate": 1.43e-05, + "loss": 193.8474, "step": 7150 }, { - "epoch": 0.05785437826743914, - "grad_norm": 1788.8892822265625, - "learning_rate": 2.864e-05, - "loss": 211.1129, + "epoch": 0.02892730600322402, + "grad_norm": 1005.276611328125, + "learning_rate": 1.432e-05, + "loss": 250.6531, "step": 7160 }, { - "epoch": 0.05793518047172327, - "grad_norm": 1520.8428955078125, - "learning_rate": 2.868e-05, - "loss": 197.0524, + "epoch": 0.02896770726859165, + "grad_norm": 2052.37158203125, + "learning_rate": 1.434e-05, + "loss": 184.0699, "step": 7170 }, { - "epoch": 0.058015982676007403, - "grad_norm": 1793.5377197265625, - "learning_rate": 2.8720000000000003e-05, - "loss": 229.5992, + "epoch": 0.029008108533959284, + "grad_norm": 22271.3046875, + "learning_rate": 1.4360000000000001e-05, + "loss": 298.0969, "step": 7180 }, { - "epoch": 0.05809678488029153, - "grad_norm": 1648.1058349609375, - "learning_rate": 2.8760000000000002e-05, - "loss": 196.0669, + "epoch": 0.029048509799326914, + "grad_norm": 1663.1221923828125, + "learning_rate": 1.4380000000000001e-05, + "loss": 256.0283, "step": 7190 }, { - "epoch": 0.05817758708457567, - "grad_norm": 1060.8009033203125, - "learning_rate": 2.88e-05, - "loss": 183.6431, + "epoch": 0.029088911064694547, + "grad_norm": 5215.501953125, + "learning_rate": 1.44e-05, + "loss": 208.372, "step": 7200 }, { - "epoch": 0.0582583892888598, - "grad_norm": 1374.04296875, - "learning_rate": 2.8840000000000002e-05, - "loss": 245.48, + "epoch": 0.029129312330062176, + "grad_norm": 1427.092529296875, + "learning_rate": 1.4420000000000001e-05, + "loss": 196.7174, "step": 7210 }, { - "epoch": 0.058339191493143934, - "grad_norm": 1245.9501953125, - "learning_rate": 2.888e-05, - "loss": 156.4156, + "epoch": 0.02916971359542981, + "grad_norm": 755.31494140625, + "learning_rate": 1.444e-05, + "loss": 207.5049, "step": 7220 }, { - "epoch": 0.05841999369742806, - "grad_norm": 1350.1478271484375, - "learning_rate": 2.8920000000000004e-05, - "loss": 204.4906, + "epoch": 0.02921011486079744, + "grad_norm": 1840.75927734375, + "learning_rate": 1.4460000000000002e-05, + "loss": 246.7446, "step": 7230 }, { - "epoch": 0.0585007959017122, - "grad_norm": 1081.7962646484375, - "learning_rate": 2.8960000000000004e-05, - "loss": 200.9338, + "epoch": 0.029250516126165072, + "grad_norm": 1352.86767578125, + "learning_rate": 1.4480000000000002e-05, + "loss": 193.7286, "step": 7240 }, { - "epoch": 0.05858159810599633, - "grad_norm": 664.3955078125, - "learning_rate": 2.9e-05, - "loss": 179.7652, + "epoch": 0.029290917391532702, + "grad_norm": 1088.9520263671875, + "learning_rate": 1.45e-05, + "loss": 213.7931, "step": 7250 }, { - "epoch": 0.058662400310280464, - "grad_norm": 1070.4627685546875, - "learning_rate": 2.904e-05, - "loss": 168.4515, + "epoch": 0.029331318656900335, + "grad_norm": 2126.0322265625, + "learning_rate": 1.452e-05, + "loss": 227.5255, "step": 7260 }, { - "epoch": 0.0587432025145646, - "grad_norm": 1206.1871337890625, - "learning_rate": 2.9080000000000003e-05, - "loss": 171.9546, + "epoch": 0.029371719922267964, + "grad_norm": 0.0, + "learning_rate": 1.4540000000000001e-05, + "loss": 149.3132, "step": 7270 }, { - "epoch": 0.05882400471884873, - "grad_norm": 1310.056884765625, - "learning_rate": 2.9120000000000002e-05, - "loss": 177.9742, + "epoch": 0.029412121187635597, + "grad_norm": 582.3840942382812, + "learning_rate": 1.4560000000000001e-05, + "loss": 219.7635, "step": 7280 }, { - "epoch": 0.058904806923132866, - "grad_norm": 1036.6202392578125, - "learning_rate": 2.9160000000000005e-05, - "loss": 144.7034, + "epoch": 0.029452522453003227, + "grad_norm": 935.360107421875, + "learning_rate": 1.4580000000000003e-05, + "loss": 219.2881, "step": 7290 }, { - "epoch": 0.058985609127416995, - "grad_norm": 1294.3916015625, - "learning_rate": 2.9199999999999998e-05, - "loss": 204.9181, + "epoch": 0.02949292371837086, + "grad_norm": 2865.860595703125, + "learning_rate": 1.4599999999999999e-05, + "loss": 170.7084, "step": 7300 }, { - "epoch": 0.05906641133170113, - "grad_norm": 1057.136474609375, - "learning_rate": 2.924e-05, - "loss": 177.6536, + "epoch": 0.02953332498373849, + "grad_norm": 1290.148681640625, + "learning_rate": 1.462e-05, + "loss": 273.1594, "step": 7310 }, { - "epoch": 0.05914721353598526, - "grad_norm": 1183.7764892578125, - "learning_rate": 2.928e-05, - "loss": 172.6393, + "epoch": 0.029573726249106123, + "grad_norm": 1073.5101318359375, + "learning_rate": 1.464e-05, + "loss": 276.2752, "step": 7320 }, { - "epoch": 0.059228015740269396, - "grad_norm": 981.899169921875, - "learning_rate": 2.9320000000000004e-05, - "loss": 162.266, + "epoch": 0.029614127514473752, + "grad_norm": 2523.898681640625, + "learning_rate": 1.4660000000000002e-05, + "loss": 229.5607, "step": 7330 }, { - "epoch": 0.059308817944553525, - "grad_norm": 1877.4185791015625, - "learning_rate": 2.9360000000000003e-05, - "loss": 185.176, + "epoch": 0.029654528779841385, + "grad_norm": 2364.947021484375, + "learning_rate": 1.4680000000000002e-05, + "loss": 194.6274, "step": 7340 }, { - "epoch": 0.05938962014883766, - "grad_norm": 543.1023559570312, - "learning_rate": 2.94e-05, - "loss": 194.0723, + "epoch": 0.029694930045209015, + "grad_norm": 1139.0018310546875, + "learning_rate": 1.47e-05, + "loss": 143.1356, "step": 7350 }, { - "epoch": 0.05947042235312179, - "grad_norm": 1260.056640625, - "learning_rate": 2.944e-05, - "loss": 201.4721, + "epoch": 0.029735331310576648, + "grad_norm": 1392.792236328125, + "learning_rate": 1.472e-05, + "loss": 220.7791, "step": 7360 }, { - "epoch": 0.05955122455740593, - "grad_norm": 1433.0521240234375, - "learning_rate": 2.9480000000000002e-05, - "loss": 228.2963, + "epoch": 0.029775732575944278, + "grad_norm": 1605.3038330078125, + "learning_rate": 1.4740000000000001e-05, + "loss": 159.6861, "step": 7370 }, { - "epoch": 0.059632026761690056, - "grad_norm": 887.9909057617188, - "learning_rate": 2.9520000000000002e-05, - "loss": 191.3625, + "epoch": 0.02981613384131191, + "grad_norm": 1095.6702880859375, + "learning_rate": 1.4760000000000001e-05, + "loss": 190.0395, "step": 7380 }, { - "epoch": 0.05971282896597419, - "grad_norm": 738.1744384765625, - "learning_rate": 2.9559999999999998e-05, - "loss": 171.2313, + "epoch": 0.02985653510667954, + "grad_norm": 3128.165283203125, + "learning_rate": 1.4779999999999999e-05, + "loss": 217.6996, "step": 7390 }, { - "epoch": 0.05979363117025832, - "grad_norm": 1805.227294921875, - "learning_rate": 2.96e-05, - "loss": 200.2166, + "epoch": 0.029896936372047173, + "grad_norm": 868.65966796875, + "learning_rate": 1.48e-05, + "loss": 303.5258, "step": 7400 }, { - "epoch": 0.05987443337454246, - "grad_norm": 721.8179321289062, - "learning_rate": 2.964e-05, - "loss": 227.3146, + "epoch": 0.029937337637414803, + "grad_norm": 1097.5875244140625, + "learning_rate": 1.482e-05, + "loss": 208.767, "step": 7410 }, { - "epoch": 0.05995523557882659, - "grad_norm": 908.9849243164062, - "learning_rate": 2.9680000000000004e-05, - "loss": 149.9561, + "epoch": 0.029977738902782436, + "grad_norm": 1184.8807373046875, + "learning_rate": 1.4840000000000002e-05, + "loss": 212.3098, "step": 7420 }, { - "epoch": 0.06003603778311072, - "grad_norm": 1412.0394287109375, - "learning_rate": 2.9720000000000003e-05, - "loss": 208.3763, + "epoch": 0.030018140168150065, + "grad_norm": 1545.9532470703125, + "learning_rate": 1.4860000000000002e-05, + "loss": 142.4259, "step": 7430 }, { - "epoch": 0.06011683998739486, - "grad_norm": 1072.092041015625, - "learning_rate": 2.976e-05, - "loss": 191.9874, + "epoch": 0.0300585414335177, + "grad_norm": 1245.4107666015625, + "learning_rate": 1.488e-05, + "loss": 164.8042, "step": 7440 }, { - "epoch": 0.06019764219167899, - "grad_norm": 951.6849365234375, - "learning_rate": 2.98e-05, - "loss": 182.6925, + "epoch": 0.030098942698885328, + "grad_norm": 577.9031982421875, + "learning_rate": 1.49e-05, + "loss": 216.6332, "step": 7450 }, { - "epoch": 0.060278444395963124, - "grad_norm": 2270.572998046875, - "learning_rate": 2.9840000000000002e-05, - "loss": 209.876, + "epoch": 0.03013934396425296, + "grad_norm": 1043.52392578125, + "learning_rate": 1.4920000000000001e-05, + "loss": 247.0811, "step": 7460 }, { - "epoch": 0.06035924660024725, - "grad_norm": 1244.6778564453125, - "learning_rate": 2.9880000000000002e-05, - "loss": 214.1003, + "epoch": 0.03017974522962059, + "grad_norm": 1351.240966796875, + "learning_rate": 1.4940000000000001e-05, + "loss": 198.2244, "step": 7470 }, { - "epoch": 0.06044004880453139, - "grad_norm": 676.9662475585938, - "learning_rate": 2.9920000000000005e-05, - "loss": 158.1553, + "epoch": 0.030220146494988224, + "grad_norm": 1484.0284423828125, + "learning_rate": 1.4960000000000002e-05, + "loss": 197.885, "step": 7480 }, { - "epoch": 0.06052085100881552, - "grad_norm": 781.7400512695312, - "learning_rate": 2.9959999999999998e-05, - "loss": 228.7864, + "epoch": 0.030260547760355853, + "grad_norm": 1378.754638671875, + "learning_rate": 1.4979999999999999e-05, + "loss": 201.7562, "step": 7490 }, { - "epoch": 0.060601653213099654, - "grad_norm": 1596.61669921875, - "learning_rate": 3e-05, - "loss": 162.8416, + "epoch": 0.030300949025723486, + "grad_norm": 7344.61767578125, + "learning_rate": 1.5e-05, + "loss": 231.1991, "step": 7500 }, { - "epoch": 0.060682455417383784, - "grad_norm": 1502.6162109375, - "learning_rate": 3.004e-05, - "loss": 178.6196, + "epoch": 0.030341350291091116, + "grad_norm": 3804.617919921875, + "learning_rate": 1.502e-05, + "loss": 310.0097, "step": 7510 }, { - "epoch": 0.06076325762166792, - "grad_norm": 1480.6331787109375, - "learning_rate": 3.0080000000000003e-05, - "loss": 178.9377, + "epoch": 0.03038175155645875, + "grad_norm": 1033.6324462890625, + "learning_rate": 1.5040000000000002e-05, + "loss": 198.1659, "step": 7520 }, { - "epoch": 0.06084405982595205, - "grad_norm": 1437.5445556640625, - "learning_rate": 3.0120000000000003e-05, - "loss": 181.7419, + "epoch": 0.03042215282182638, + "grad_norm": 653.5598754882812, + "learning_rate": 1.5060000000000001e-05, + "loss": 178.0301, "step": 7530 }, { - "epoch": 0.060924862030236185, - "grad_norm": 1499.4638671875, - "learning_rate": 3.016e-05, - "loss": 226.4455, + "epoch": 0.030462554087194012, + "grad_norm": 1495.2060546875, + "learning_rate": 1.508e-05, + "loss": 250.4168, "step": 7540 }, { - "epoch": 0.06100566423452032, - "grad_norm": 1206.754150390625, - "learning_rate": 3.02e-05, - "loss": 200.7514, + "epoch": 0.03050295535256164, + "grad_norm": 3768.185791015625, + "learning_rate": 1.51e-05, + "loss": 161.5591, "step": 7550 }, { - "epoch": 0.06108646643880445, - "grad_norm": 1492.4632568359375, - "learning_rate": 3.0240000000000002e-05, - "loss": 172.3147, + "epoch": 0.030543356617929274, + "grad_norm": 966.1246337890625, + "learning_rate": 1.5120000000000001e-05, + "loss": 173.9351, "step": 7560 }, { - "epoch": 0.061167268643088586, - "grad_norm": 2935.53564453125, - "learning_rate": 3.028e-05, - "loss": 159.8056, + "epoch": 0.030583757883296904, + "grad_norm": 1045.052490234375, + "learning_rate": 1.514e-05, + "loss": 140.5521, "step": 7570 }, { - "epoch": 0.061248070847372715, - "grad_norm": 4353.76904296875, - "learning_rate": 3.0320000000000004e-05, - "loss": 319.3878, + "epoch": 0.030624159148664537, + "grad_norm": 1348.0595703125, + "learning_rate": 1.5160000000000002e-05, + "loss": 318.5935, "step": 7580 }, { - "epoch": 0.06132887305165685, - "grad_norm": 2135.029052734375, - "learning_rate": 3.036e-05, - "loss": 225.5889, + "epoch": 0.030664560414032167, + "grad_norm": 798.9227294921875, + "learning_rate": 1.518e-05, + "loss": 269.7896, "step": 7590 }, { - "epoch": 0.06140967525594098, - "grad_norm": 1746.432373046875, - "learning_rate": 3.04e-05, - "loss": 191.4569, + "epoch": 0.0307049616793998, + "grad_norm": 1470.73583984375, + "learning_rate": 1.52e-05, + "loss": 282.1007, "step": 7600 }, { - "epoch": 0.06149047746022512, - "grad_norm": 1307.22998046875, - "learning_rate": 3.0440000000000003e-05, - "loss": 228.3166, + "epoch": 0.03074536294476743, + "grad_norm": 770.0698852539062, + "learning_rate": 1.5220000000000002e-05, + "loss": 262.2653, "step": 7610 }, { - "epoch": 0.061571279664509246, - "grad_norm": 957.3939208984375, - "learning_rate": 3.0480000000000003e-05, - "loss": 135.6873, + "epoch": 0.030785764210135062, + "grad_norm": 571.9855346679688, + "learning_rate": 1.5240000000000001e-05, + "loss": 192.3983, "step": 7620 }, { - "epoch": 0.06165208186879338, - "grad_norm": 931.1332397460938, - "learning_rate": 3.0520000000000006e-05, - "loss": 207.387, + "epoch": 0.030826165475502692, + "grad_norm": 1054.919921875, + "learning_rate": 1.5260000000000003e-05, + "loss": 219.8108, "step": 7630 }, { - "epoch": 0.06173288407307751, - "grad_norm": 1239.0928955078125, - "learning_rate": 3.056e-05, - "loss": 232.6531, + "epoch": 0.030866566740870325, + "grad_norm": 556.494873046875, + "learning_rate": 1.528e-05, + "loss": 165.5094, "step": 7640 }, { - "epoch": 0.06181368627736165, - "grad_norm": 1364.455322265625, - "learning_rate": 3.06e-05, - "loss": 203.0215, + "epoch": 0.030906968006237955, + "grad_norm": 1616.6512451171875, + "learning_rate": 1.53e-05, + "loss": 183.0181, "step": 7650 }, { - "epoch": 0.061894488481645776, - "grad_norm": 1292.3468017578125, - "learning_rate": 3.0640000000000005e-05, - "loss": 198.0368, + "epoch": 0.030947369271605588, + "grad_norm": 678.8358764648438, + "learning_rate": 1.5320000000000002e-05, + "loss": 138.8231, "step": 7660 }, { - "epoch": 0.06197529068592991, - "grad_norm": 2630.169677734375, - "learning_rate": 3.0680000000000004e-05, - "loss": 182.3269, + "epoch": 0.030987770536973217, + "grad_norm": 817.9168090820312, + "learning_rate": 1.5340000000000002e-05, + "loss": 166.3831, "step": 7670 }, { - "epoch": 0.06205609289021405, - "grad_norm": 850.5173950195312, - "learning_rate": 3.072e-05, - "loss": 158.4843, + "epoch": 0.03102817180234085, + "grad_norm": 1391.115966796875, + "learning_rate": 1.536e-05, + "loss": 265.5596, "step": 7680 }, { - "epoch": 0.06213689509449818, - "grad_norm": 1229.681640625, - "learning_rate": 3.076e-05, - "loss": 168.2838, + "epoch": 0.03106857306770848, + "grad_norm": 2713.052490234375, + "learning_rate": 1.538e-05, + "loss": 221.1131, "step": 7690 }, { - "epoch": 0.062217697298782314, - "grad_norm": 1539.3856201171875, - "learning_rate": 3.08e-05, - "loss": 181.4302, + "epoch": 0.031108974333076113, + "grad_norm": 1156.0615234375, + "learning_rate": 1.54e-05, + "loss": 188.7715, "step": 7700 }, { - "epoch": 0.06229849950306644, - "grad_norm": 1268.3309326171875, - "learning_rate": 3.084e-05, - "loss": 192.6453, + "epoch": 0.031149375598443742, + "grad_norm": 1744.91845703125, + "learning_rate": 1.542e-05, + "loss": 203.3742, "step": 7710 }, { - "epoch": 0.06237930170735058, - "grad_norm": 1633.4764404296875, - "learning_rate": 3.088e-05, - "loss": 215.7682, + "epoch": 0.031189776863811376, + "grad_norm": 1208.4666748046875, + "learning_rate": 1.544e-05, + "loss": 146.0077, "step": 7720 }, { - "epoch": 0.06246010391163471, - "grad_norm": 934.0255126953125, - "learning_rate": 3.092e-05, - "loss": 156.6078, + "epoch": 0.031230178129179005, + "grad_norm": 643.0628662109375, + "learning_rate": 1.546e-05, + "loss": 195.1551, "step": 7730 }, { - "epoch": 0.06254090611591884, - "grad_norm": 1026.5306396484375, - "learning_rate": 3.096e-05, - "loss": 185.7123, + "epoch": 0.03127057939454664, + "grad_norm": 1256.745849609375, + "learning_rate": 1.548e-05, + "loss": 128.5509, "step": 7740 }, { - "epoch": 0.06262170832020297, - "grad_norm": 1446.604248046875, - "learning_rate": 3.1e-05, - "loss": 245.7826, + "epoch": 0.03131098065991427, + "grad_norm": 1408.4251708984375, + "learning_rate": 1.55e-05, + "loss": 202.1482, "step": 7750 }, { - "epoch": 0.06270251052448711, - "grad_norm": 1183.824462890625, - "learning_rate": 3.104e-05, - "loss": 166.5928, + "epoch": 0.0313513819252819, + "grad_norm": 2253.969970703125, + "learning_rate": 1.552e-05, + "loss": 258.4842, "step": 7760 }, { - "epoch": 0.06278331272877125, - "grad_norm": 1205.846435546875, - "learning_rate": 3.108e-05, - "loss": 163.2229, + "epoch": 0.03139178319064953, + "grad_norm": 1037.827392578125, + "learning_rate": 1.554e-05, + "loss": 167.7397, "step": 7770 }, { - "epoch": 0.06286411493305537, - "grad_norm": 1170.0755615234375, - "learning_rate": 3.112e-05, - "loss": 178.1934, + "epoch": 0.03143218445601716, + "grad_norm": 1066.5501708984375, + "learning_rate": 1.556e-05, + "loss": 218.4733, "step": 7780 }, { - "epoch": 0.0629449171373395, - "grad_norm": 843.529541015625, - "learning_rate": 3.116e-05, - "loss": 211.1167, + "epoch": 0.031472585721384796, + "grad_norm": 1018.4628295898438, + "learning_rate": 1.558e-05, + "loss": 196.4911, "step": 7790 }, { - "epoch": 0.06302571934162364, - "grad_norm": 1392.8565673828125, - "learning_rate": 3.12e-05, - "loss": 247.1108, + "epoch": 0.03151298698675242, + "grad_norm": 1526.3201904296875, + "learning_rate": 1.56e-05, + "loss": 214.0186, "step": 7800 }, { - "epoch": 0.06310652154590778, - "grad_norm": 1649.6248779296875, - "learning_rate": 3.1240000000000006e-05, - "loss": 198.9281, + "epoch": 0.031553388252120056, + "grad_norm": 829.8909301757812, + "learning_rate": 1.5620000000000003e-05, + "loss": 150.7465, "step": 7810 }, { - "epoch": 0.06318732375019191, - "grad_norm": 898.980712890625, - "learning_rate": 3.1280000000000005e-05, - "loss": 208.4491, + "epoch": 0.03159378951748769, + "grad_norm": 1091.463623046875, + "learning_rate": 1.5640000000000003e-05, + "loss": 240.4906, "step": 7820 }, { - "epoch": 0.06326812595447603, - "grad_norm": 1707.0943603515625, - "learning_rate": 3.132e-05, - "loss": 238.487, + "epoch": 0.03163419078285532, + "grad_norm": 470.00250244140625, + "learning_rate": 1.566e-05, + "loss": 159.8959, "step": 7830 }, { - "epoch": 0.06334892815876017, - "grad_norm": 2001.99853515625, - "learning_rate": 3.136e-05, - "loss": 197.1288, + "epoch": 0.03167459204822295, + "grad_norm": 1311.1846923828125, + "learning_rate": 1.568e-05, + "loss": 205.6226, "step": 7840 }, { - "epoch": 0.0634297303630443, - "grad_norm": 1150.8634033203125, - "learning_rate": 3.1400000000000004e-05, - "loss": 186.8793, + "epoch": 0.03171499331359058, + "grad_norm": 1698.6754150390625, + "learning_rate": 1.5700000000000002e-05, + "loss": 217.6704, "step": 7850 }, { - "epoch": 0.06351053256732844, - "grad_norm": 1406.9329833984375, - "learning_rate": 3.1440000000000004e-05, - "loss": 189.1392, + "epoch": 0.031755394578958214, + "grad_norm": 1376.8931884765625, + "learning_rate": 1.5720000000000002e-05, + "loss": 172.5748, "step": 7860 }, { - "epoch": 0.06359133477161256, - "grad_norm": 1370.693115234375, - "learning_rate": 3.1480000000000004e-05, - "loss": 179.2577, + "epoch": 0.03179579584432585, + "grad_norm": 1433.7232666015625, + "learning_rate": 1.5740000000000002e-05, + "loss": 185.2836, "step": 7870 }, { - "epoch": 0.0636721369758967, - "grad_norm": 1279.3988037109375, - "learning_rate": 3.1519999999999996e-05, - "loss": 205.281, + "epoch": 0.03183619710969347, + "grad_norm": 4025.76220703125, + "learning_rate": 1.5759999999999998e-05, + "loss": 315.4204, "step": 7880 }, { - "epoch": 0.06375293918018084, - "grad_norm": 1133.031494140625, - "learning_rate": 3.156e-05, - "loss": 187.7616, + "epoch": 0.031876598375061106, + "grad_norm": 853.8228759765625, + "learning_rate": 1.578e-05, + "loss": 153.3957, "step": 7890 }, { - "epoch": 0.06383374138446497, - "grad_norm": 722.7349243164062, - "learning_rate": 3.16e-05, - "loss": 224.9383, + "epoch": 0.03191699964042874, + "grad_norm": 944.7000732421875, + "learning_rate": 1.58e-05, + "loss": 128.8845, "step": 7900 }, { - "epoch": 0.0639145435887491, - "grad_norm": 3694.966552734375, - "learning_rate": 3.164e-05, - "loss": 166.4175, + "epoch": 0.03195740090579637, + "grad_norm": 643.9895629882812, + "learning_rate": 1.582e-05, + "loss": 223.8146, "step": 7910 }, { - "epoch": 0.06399534579303323, - "grad_norm": 1465.952880859375, - "learning_rate": 3.168e-05, - "loss": 144.1198, + "epoch": 0.031997802171164, + "grad_norm": 793.1214599609375, + "learning_rate": 1.584e-05, + "loss": 231.9548, "step": 7920 }, { - "epoch": 0.06407614799731737, - "grad_norm": 1090.0264892578125, - "learning_rate": 3.172e-05, - "loss": 198.0994, + "epoch": 0.03203820343653163, + "grad_norm": 1593.3084716796875, + "learning_rate": 1.586e-05, + "loss": 222.1474, "step": 7930 }, { - "epoch": 0.0641569502016015, - "grad_norm": 575.564453125, - "learning_rate": 3.176e-05, - "loss": 152.3298, + "epoch": 0.032078604701899265, + "grad_norm": 0.0, + "learning_rate": 1.588e-05, + "loss": 156.401, "step": 7940 }, { - "epoch": 0.06423775240588563, - "grad_norm": 2499.767822265625, - "learning_rate": 3.18e-05, - "loss": 231.9078, + "epoch": 0.0321190059672669, + "grad_norm": 941.607177734375, + "learning_rate": 1.59e-05, + "loss": 239.5231, "step": 7950 }, { - "epoch": 0.06431855461016976, - "grad_norm": 705.5277709960938, - "learning_rate": 3.184e-05, - "loss": 183.8972, + "epoch": 0.032159407232634524, + "grad_norm": 1561.1961669921875, + "learning_rate": 1.592e-05, + "loss": 269.0299, "step": 7960 }, { - "epoch": 0.0643993568144539, - "grad_norm": 759.2621459960938, - "learning_rate": 3.188e-05, - "loss": 193.2608, + "epoch": 0.03219980849800216, + "grad_norm": 4084.514892578125, + "learning_rate": 1.594e-05, + "loss": 269.6239, "step": 7970 }, { - "epoch": 0.06448015901873803, - "grad_norm": 1009.6481323242188, - "learning_rate": 3.192e-05, - "loss": 163.6445, + "epoch": 0.03224020976336979, + "grad_norm": 1185.47314453125, + "learning_rate": 1.596e-05, + "loss": 217.7421, "step": 7980 }, { - "epoch": 0.06456096122302217, - "grad_norm": 1949.9471435546875, - "learning_rate": 3.196e-05, - "loss": 228.4885, + "epoch": 0.03228061102873742, + "grad_norm": 1212.916748046875, + "learning_rate": 1.598e-05, + "loss": 187.7411, "step": 7990 }, { - "epoch": 0.06464176342730629, - "grad_norm": 1189.819091796875, - "learning_rate": 3.2000000000000005e-05, - "loss": 223.6213, + "epoch": 0.03232101229410505, + "grad_norm": 658.2976684570312, + "learning_rate": 1.6000000000000003e-05, + "loss": 230.9428, "step": 8000 }, { - "epoch": 0.06472256563159043, - "grad_norm": 1335.6827392578125, - "learning_rate": 3.2040000000000005e-05, - "loss": 251.1337, + "epoch": 0.03236141355947268, + "grad_norm": 1249.109130859375, + "learning_rate": 1.6020000000000002e-05, + "loss": 222.3688, "step": 8010 }, { - "epoch": 0.06480336783587456, - "grad_norm": 1116.73583984375, - "learning_rate": 3.208e-05, - "loss": 225.0393, + "epoch": 0.032401814824840315, + "grad_norm": 3268.414306640625, + "learning_rate": 1.604e-05, + "loss": 330.1671, "step": 8020 }, { - "epoch": 0.0648841700401587, - "grad_norm": 1205.75146484375, - "learning_rate": 3.212e-05, - "loss": 154.7728, + "epoch": 0.03244221609020795, + "grad_norm": 2235.10595703125, + "learning_rate": 1.606e-05, + "loss": 239.3141, "step": 8030 }, { - "epoch": 0.06496497224444282, - "grad_norm": 2140.3408203125, - "learning_rate": 3.2160000000000004e-05, - "loss": 200.3413, + "epoch": 0.032482617355575574, + "grad_norm": 461.5077819824219, + "learning_rate": 1.6080000000000002e-05, + "loss": 210.1967, "step": 8040 }, { - "epoch": 0.06504577444872696, - "grad_norm": 2502.311279296875, - "learning_rate": 3.2200000000000003e-05, - "loss": 192.7282, + "epoch": 0.03252301862094321, + "grad_norm": 754.9512939453125, + "learning_rate": 1.6100000000000002e-05, + "loss": 196.5837, "step": 8050 }, { - "epoch": 0.0651265766530111, - "grad_norm": 2250.6474609375, - "learning_rate": 3.224e-05, - "loss": 154.933, + "epoch": 0.03256341988631084, + "grad_norm": 1378.9425048828125, + "learning_rate": 1.612e-05, + "loss": 261.1862, "step": 8060 }, { - "epoch": 0.06520737885729523, - "grad_norm": 1413.3193359375, - "learning_rate": 3.2279999999999996e-05, - "loss": 243.9269, + "epoch": 0.03260382115167847, + "grad_norm": 12839.7548828125, + "learning_rate": 1.6139999999999998e-05, + "loss": 188.6295, "step": 8070 }, { - "epoch": 0.06528818106157935, - "grad_norm": 1259.7381591796875, - "learning_rate": 3.232e-05, - "loss": 177.5336, + "epoch": 0.0326442224170461, + "grad_norm": 1415.6925048828125, + "learning_rate": 1.616e-05, + "loss": 212.514, "step": 8080 }, { - "epoch": 0.06536898326586349, - "grad_norm": 1828.6173095703125, - "learning_rate": 3.236e-05, - "loss": 189.4108, + "epoch": 0.03268462368241373, + "grad_norm": 633.0122680664062, + "learning_rate": 1.618e-05, + "loss": 167.5946, "step": 8090 }, { - "epoch": 0.06544978547014763, - "grad_norm": 723.5848999023438, - "learning_rate": 3.24e-05, - "loss": 187.8053, + "epoch": 0.032725024947781366, + "grad_norm": 1421.470947265625, + "learning_rate": 1.62e-05, + "loss": 141.4309, "step": 8100 }, { - "epoch": 0.06553058767443176, - "grad_norm": 1322.9984130859375, - "learning_rate": 3.244e-05, - "loss": 202.4878, + "epoch": 0.032765426213149, + "grad_norm": 7646.71875, + "learning_rate": 1.622e-05, + "loss": 228.34, "step": 8110 }, { - "epoch": 0.0656113898787159, - "grad_norm": 1194.4041748046875, - "learning_rate": 3.248e-05, - "loss": 200.6401, + "epoch": 0.032805827478516625, + "grad_norm": 1138.2156982421875, + "learning_rate": 1.624e-05, + "loss": 209.1626, "step": 8120 }, { - "epoch": 0.06569219208300002, - "grad_norm": 1226.958984375, - "learning_rate": 3.252e-05, - "loss": 193.835, + "epoch": 0.03284622874388426, + "grad_norm": 649.6055908203125, + "learning_rate": 1.626e-05, + "loss": 204.4883, "step": 8130 }, { - "epoch": 0.06577299428728416, - "grad_norm": 1839.5345458984375, - "learning_rate": 3.256e-05, - "loss": 247.2544, + "epoch": 0.03288663000925189, + "grad_norm": 646.7942504882812, + "learning_rate": 1.628e-05, + "loss": 155.1816, "step": 8140 }, { - "epoch": 0.06585379649156829, - "grad_norm": 906.6901245117188, - "learning_rate": 3.26e-05, - "loss": 211.7479, + "epoch": 0.032927031274619524, + "grad_norm": 632.5813598632812, + "learning_rate": 1.63e-05, + "loss": 144.9363, "step": 8150 }, { - "epoch": 0.06593459869585243, - "grad_norm": 1207.205810546875, - "learning_rate": 3.2640000000000006e-05, - "loss": 166.2223, + "epoch": 0.03296743253998715, + "grad_norm": 859.72900390625, + "learning_rate": 1.6320000000000003e-05, + "loss": 207.8656, "step": 8160 }, { - "epoch": 0.06601540090013655, - "grad_norm": 1547.962158203125, - "learning_rate": 3.268e-05, - "loss": 205.9062, + "epoch": 0.03300783380535478, + "grad_norm": 661.2268676757812, + "learning_rate": 1.634e-05, + "loss": 156.5535, "step": 8170 }, { - "epoch": 0.06609620310442069, - "grad_norm": 787.65234375, - "learning_rate": 3.272e-05, - "loss": 200.8066, + "epoch": 0.033048235070722416, + "grad_norm": 1062.429931640625, + "learning_rate": 1.636e-05, + "loss": 206.9411, "step": 8180 }, { - "epoch": 0.06617700530870482, - "grad_norm": 1378.317138671875, - "learning_rate": 3.2760000000000005e-05, - "loss": 166.9708, + "epoch": 0.03308863633609005, + "grad_norm": 1048.27294921875, + "learning_rate": 1.6380000000000002e-05, + "loss": 186.6454, "step": 8190 }, { - "epoch": 0.06625780751298896, - "grad_norm": 2580.17822265625, - "learning_rate": 3.2800000000000004e-05, - "loss": 153.1489, + "epoch": 0.033129037601457675, + "grad_norm": 1131.9554443359375, + "learning_rate": 1.6400000000000002e-05, + "loss": 255.3936, "step": 8200 }, { - "epoch": 0.06633860971727308, - "grad_norm": 820.1066284179688, - "learning_rate": 3.2840000000000004e-05, - "loss": 159.0604, + "epoch": 0.03316943886682531, + "grad_norm": 1163.3974609375, + "learning_rate": 1.6420000000000002e-05, + "loss": 188.8816, "step": 8210 }, { - "epoch": 0.06641941192155722, - "grad_norm": 1294.5841064453125, - "learning_rate": 3.288e-05, - "loss": 144.4908, + "epoch": 0.03320984013219294, + "grad_norm": 555.4834594726562, + "learning_rate": 1.644e-05, + "loss": 134.8741, "step": 8220 }, { - "epoch": 0.06650021412584135, - "grad_norm": 1178.8956298828125, - "learning_rate": 3.292e-05, - "loss": 162.4389, + "epoch": 0.033250241397560575, + "grad_norm": 837.699951171875, + "learning_rate": 1.646e-05, + "loss": 183.4674, "step": 8230 }, { - "epoch": 0.06658101633012549, - "grad_norm": 1132.5394287109375, - "learning_rate": 3.296e-05, - "loss": 160.9665, + "epoch": 0.0332906426629282, + "grad_norm": 731.1207275390625, + "learning_rate": 1.648e-05, + "loss": 117.9002, "step": 8240 }, { - "epoch": 0.06666181853440963, - "grad_norm": 934.4537353515625, - "learning_rate": 3.3e-05, - "loss": 179.4576, + "epoch": 0.033331043928295834, + "grad_norm": 747.77880859375, + "learning_rate": 1.65e-05, + "loss": 222.1833, "step": 8250 }, { - "epoch": 0.06674262073869375, - "grad_norm": 699.999267578125, - "learning_rate": 3.304e-05, - "loss": 179.1677, + "epoch": 0.03337144519366347, + "grad_norm": 1067.3509521484375, + "learning_rate": 1.652e-05, + "loss": 186.3794, "step": 8260 }, { - "epoch": 0.06682342294297788, - "grad_norm": 2357.673095703125, - "learning_rate": 3.308e-05, - "loss": 181.2575, + "epoch": 0.0334118464590311, + "grad_norm": 1084.0224609375, + "learning_rate": 1.654e-05, + "loss": 236.8639, "step": 8270 }, { - "epoch": 0.06690422514726202, - "grad_norm": 1398.561767578125, - "learning_rate": 3.312e-05, - "loss": 188.7493, + "epoch": 0.033452247724398726, + "grad_norm": 1279.375732421875, + "learning_rate": 1.656e-05, + "loss": 259.652, "step": 8280 }, { - "epoch": 0.06698502735154616, - "grad_norm": 1150.1177978515625, - "learning_rate": 3.316e-05, - "loss": 179.0108, + "epoch": 0.03349264898976636, + "grad_norm": 947.9122314453125, + "learning_rate": 1.658e-05, + "loss": 233.7726, "step": 8290 }, { - "epoch": 0.06706582955583028, - "grad_norm": 3013.264404296875, - "learning_rate": 3.32e-05, - "loss": 227.943, + "epoch": 0.03353305025513399, + "grad_norm": 439.6305236816406, + "learning_rate": 1.66e-05, + "loss": 177.2815, "step": 8300 }, { - "epoch": 0.06714663176011441, - "grad_norm": 1476.740966796875, - "learning_rate": 3.324e-05, - "loss": 189.2425, + "epoch": 0.033573451520501625, + "grad_norm": 815.047119140625, + "learning_rate": 1.662e-05, + "loss": 176.6726, "step": 8310 }, { - "epoch": 0.06722743396439855, - "grad_norm": 2047.337646484375, - "learning_rate": 3.328e-05, - "loss": 290.5052, + "epoch": 0.03361385278586925, + "grad_norm": 6470.5361328125, + "learning_rate": 1.664e-05, + "loss": 222.3292, "step": 8320 }, { - "epoch": 0.06730823616868269, - "grad_norm": 1783.235107421875, - "learning_rate": 3.332e-05, - "loss": 166.3931, + "epoch": 0.033654254051236884, + "grad_norm": 790.7086791992188, + "learning_rate": 1.666e-05, + "loss": 194.9121, "step": 8330 }, { - "epoch": 0.06738903837296681, - "grad_norm": 755.54052734375, - "learning_rate": 3.336e-05, - "loss": 173.6512, + "epoch": 0.03369465531660452, + "grad_norm": 794.2681884765625, + "learning_rate": 1.668e-05, + "loss": 175.7233, "step": 8340 }, { - "epoch": 0.06746984057725094, - "grad_norm": 1432.845703125, - "learning_rate": 3.3400000000000005e-05, - "loss": 166.9066, + "epoch": 0.03373505658197215, + "grad_norm": 1229.4151611328125, + "learning_rate": 1.6700000000000003e-05, + "loss": 227.8069, "step": 8350 }, { - "epoch": 0.06755064278153508, - "grad_norm": 1066.4442138671875, - "learning_rate": 3.344e-05, - "loss": 183.3727, + "epoch": 0.033775457847339777, + "grad_norm": 792.4901123046875, + "learning_rate": 1.672e-05, + "loss": 198.4519, "step": 8360 }, { - "epoch": 0.06763144498581922, - "grad_norm": 955.55810546875, - "learning_rate": 3.348e-05, - "loss": 161.1286, + "epoch": 0.03381585911270741, + "grad_norm": 1083.63623046875, + "learning_rate": 1.674e-05, + "loss": 164.7306, "step": 8370 }, { - "epoch": 0.06771224719010335, - "grad_norm": 1308.668701171875, - "learning_rate": 3.3520000000000004e-05, - "loss": 150.0731, + "epoch": 0.03385626037807504, + "grad_norm": 1077.84765625, + "learning_rate": 1.6760000000000002e-05, + "loss": 154.6505, "step": 8380 }, { - "epoch": 0.06779304939438748, - "grad_norm": 1002.2681274414062, - "learning_rate": 3.3560000000000004e-05, - "loss": 178.5217, + "epoch": 0.033896661643442676, + "grad_norm": 2413.804931640625, + "learning_rate": 1.6780000000000002e-05, + "loss": 247.3373, "step": 8390 }, { - "epoch": 0.06787385159867161, - "grad_norm": 1684.12353515625, - "learning_rate": 3.3600000000000004e-05, - "loss": 202.3591, + "epoch": 0.0339370629088103, + "grad_norm": 2946.898681640625, + "learning_rate": 1.6800000000000002e-05, + "loss": 161.9533, "step": 8400 }, { - "epoch": 0.06795465380295575, - "grad_norm": 1875.5465087890625, - "learning_rate": 3.3639999999999996e-05, - "loss": 203.2144, + "epoch": 0.033977464174177935, + "grad_norm": 1092.52392578125, + "learning_rate": 1.6819999999999998e-05, + "loss": 216.0875, "step": 8410 }, { - "epoch": 0.06803545600723988, - "grad_norm": 2445.532470703125, - "learning_rate": 3.368e-05, - "loss": 215.2096, + "epoch": 0.03401786543954557, + "grad_norm": 930.1837158203125, + "learning_rate": 1.684e-05, + "loss": 169.9454, "step": 8420 }, { - "epoch": 0.068116258211524, - "grad_norm": 3386.977294921875, - "learning_rate": 3.372e-05, - "loss": 198.287, + "epoch": 0.0340582667049132, + "grad_norm": 514.6612548828125, + "learning_rate": 1.686e-05, + "loss": 202.2469, "step": 8430 }, { - "epoch": 0.06819706041580814, - "grad_norm": 1535.1038818359375, - "learning_rate": 3.376e-05, - "loss": 217.1945, + "epoch": 0.03409866797028083, + "grad_norm": 747.2769775390625, + "learning_rate": 1.688e-05, + "loss": 176.8761, "step": 8440 }, { - "epoch": 0.06827786262009228, - "grad_norm": 1626.080810546875, - "learning_rate": 3.38e-05, - "loss": 208.5945, + "epoch": 0.03413906923564846, + "grad_norm": 1148.2232666015625, + "learning_rate": 1.69e-05, + "loss": 124.8942, "step": 8450 }, { - "epoch": 0.06835866482437641, - "grad_norm": 2029.3245849609375, - "learning_rate": 3.384e-05, - "loss": 211.3623, + "epoch": 0.03417947050101609, + "grad_norm": 1272.0814208984375, + "learning_rate": 1.692e-05, + "loss": 201.3207, "step": 8460 }, { - "epoch": 0.06843946702866054, - "grad_norm": 1024.6494140625, - "learning_rate": 3.388e-05, - "loss": 184.3272, + "epoch": 0.034219871766383726, + "grad_norm": 658.9476318359375, + "learning_rate": 1.694e-05, + "loss": 193.7989, "step": 8470 }, { - "epoch": 0.06852026923294467, - "grad_norm": 1310.7005615234375, - "learning_rate": 3.392e-05, - "loss": 180.3935, + "epoch": 0.03426027303175135, + "grad_norm": 1220.1839599609375, + "learning_rate": 1.696e-05, + "loss": 204.7586, "step": 8480 }, { - "epoch": 0.06860107143722881, - "grad_norm": 1609.6656494140625, - "learning_rate": 3.396e-05, - "loss": 244.0554, + "epoch": 0.034300674297118985, + "grad_norm": 650.1453857421875, + "learning_rate": 1.698e-05, + "loss": 243.6956, "step": 8490 }, { - "epoch": 0.06868187364151294, - "grad_norm": 756.1630249023438, - "learning_rate": 3.4000000000000007e-05, - "loss": 288.3515, + "epoch": 0.03434107556248662, + "grad_norm": 654.3552856445312, + "learning_rate": 1.7000000000000003e-05, + "loss": 200.6542, "step": 8500 }, { - "epoch": 0.06876267584579707, - "grad_norm": 2786.549560546875, - "learning_rate": 3.404e-05, - "loss": 193.1405, + "epoch": 0.03438147682785425, + "grad_norm": 831.0885009765625, + "learning_rate": 1.702e-05, + "loss": 245.5201, "step": 8510 }, { - "epoch": 0.0688434780500812, - "grad_norm": 1552.839599609375, - "learning_rate": 3.408e-05, - "loss": 212.5238, + "epoch": 0.03442187809322188, + "grad_norm": 945.2589111328125, + "learning_rate": 1.704e-05, + "loss": 192.208, "step": 8520 }, { - "epoch": 0.06892428025436534, - "grad_norm": 971.9280395507812, - "learning_rate": 3.412e-05, - "loss": 182.2518, + "epoch": 0.03446227935858951, + "grad_norm": 929.2937622070312, + "learning_rate": 1.706e-05, + "loss": 195.5855, "step": 8530 }, { - "epoch": 0.06900508245864947, - "grad_norm": 1222.0596923828125, - "learning_rate": 3.4160000000000005e-05, - "loss": 149.9633, + "epoch": 0.034502680623957144, + "grad_norm": 1585.8245849609375, + "learning_rate": 1.7080000000000002e-05, + "loss": 161.8802, "step": 8540 }, { - "epoch": 0.06908588466293361, - "grad_norm": 1192.5916748046875, - "learning_rate": 3.4200000000000005e-05, - "loss": 153.3915, + "epoch": 0.03454308188932478, + "grad_norm": 1783.3887939453125, + "learning_rate": 1.7100000000000002e-05, + "loss": 259.6359, "step": 8550 }, { - "epoch": 0.06916668686721773, - "grad_norm": 1012.8413696289062, - "learning_rate": 3.424e-05, - "loss": 214.9546, + "epoch": 0.0345834831546924, + "grad_norm": 1169.6343994140625, + "learning_rate": 1.712e-05, + "loss": 126.7247, "step": 8560 }, { - "epoch": 0.06924748907150187, - "grad_norm": 1312.1915283203125, - "learning_rate": 3.4280000000000004e-05, - "loss": 182.429, + "epoch": 0.034623884420060036, + "grad_norm": 967.0234375, + "learning_rate": 1.7140000000000002e-05, + "loss": 228.0373, "step": 8570 }, { - "epoch": 0.069328291275786, - "grad_norm": 1570.241455078125, - "learning_rate": 3.4320000000000003e-05, - "loss": 186.6674, + "epoch": 0.03466428568542767, + "grad_norm": 1204.86865234375, + "learning_rate": 1.7160000000000002e-05, + "loss": 183.3604, "step": 8580 }, { - "epoch": 0.06940909348007014, - "grad_norm": 3148.201416015625, - "learning_rate": 3.436e-05, - "loss": 201.0193, + "epoch": 0.0347046869507953, + "grad_norm": 3491.818603515625, + "learning_rate": 1.718e-05, + "loss": 151.8515, "step": 8590 }, { - "epoch": 0.06948989568435426, - "grad_norm": 546.072998046875, - "learning_rate": 3.4399999999999996e-05, - "loss": 232.1858, + "epoch": 0.03474508821616293, + "grad_norm": 698.922607421875, + "learning_rate": 1.7199999999999998e-05, + "loss": 364.246, "step": 8600 }, { - "epoch": 0.0695706978886384, - "grad_norm": 2019.9246826171875, - "learning_rate": 3.444e-05, - "loss": 175.6072, + "epoch": 0.03478548948153056, + "grad_norm": 754.37646484375, + "learning_rate": 1.722e-05, + "loss": 214.1979, "step": 8610 }, { - "epoch": 0.06965150009292254, - "grad_norm": 1518.28955078125, - "learning_rate": 3.448e-05, - "loss": 159.8877, + "epoch": 0.034825890746898194, + "grad_norm": 1443.7174072265625, + "learning_rate": 1.724e-05, + "loss": 146.2507, "step": 8620 }, { - "epoch": 0.06973230229720667, - "grad_norm": 891.6912841796875, - "learning_rate": 3.452e-05, - "loss": 145.3052, + "epoch": 0.03486629201226583, + "grad_norm": 1486.6044921875, + "learning_rate": 1.726e-05, + "loss": 209.7959, "step": 8630 }, { - "epoch": 0.0698131045014908, - "grad_norm": 3085.961669921875, - "learning_rate": 3.456e-05, - "loss": 229.9006, + "epoch": 0.034906693277633453, + "grad_norm": 1869.3212890625, + "learning_rate": 1.728e-05, + "loss": 216.6505, "step": 8640 }, { - "epoch": 0.06989390670577493, - "grad_norm": 2942.58837890625, - "learning_rate": 3.46e-05, - "loss": 217.8281, + "epoch": 0.03494709454300109, + "grad_norm": 1274.484130859375, + "learning_rate": 1.73e-05, + "loss": 175.598, "step": 8650 }, { - "epoch": 0.06997470891005907, - "grad_norm": 1092.8861083984375, - "learning_rate": 3.464e-05, - "loss": 175.1439, + "epoch": 0.03498749580836872, + "grad_norm": 906.1784057617188, + "learning_rate": 1.732e-05, + "loss": 194.2832, "step": 8660 }, { - "epoch": 0.0700555111143432, - "grad_norm": 2018.2930908203125, - "learning_rate": 3.468e-05, - "loss": 161.8401, + "epoch": 0.03502789707373635, + "grad_norm": 897.7321166992188, + "learning_rate": 1.734e-05, + "loss": 167.8126, "step": 8670 }, { - "epoch": 0.07013631331862734, - "grad_norm": 893.0232543945312, - "learning_rate": 3.472e-05, - "loss": 200.8942, + "epoch": 0.03506829833910398, + "grad_norm": 1329.6224365234375, + "learning_rate": 1.736e-05, + "loss": 100.7456, "step": 8680 }, { - "epoch": 0.07021711552291146, - "grad_norm": 1257.9110107421875, - "learning_rate": 3.4760000000000006e-05, - "loss": 244.2999, + "epoch": 0.03510869960447161, + "grad_norm": 2856.0009765625, + "learning_rate": 1.7380000000000003e-05, + "loss": 136.0745, "step": 8690 }, { - "epoch": 0.0702979177271956, - "grad_norm": 875.2976684570312, - "learning_rate": 3.48e-05, - "loss": 135.7402, + "epoch": 0.035149100869839245, + "grad_norm": 851.291748046875, + "learning_rate": 1.74e-05, + "loss": 276.2555, "step": 8700 }, { - "epoch": 0.07037871993147973, - "grad_norm": 1306.94287109375, - "learning_rate": 3.484e-05, - "loss": 261.4927, + "epoch": 0.03518950213520688, + "grad_norm": 1677.5799560546875, + "learning_rate": 1.742e-05, + "loss": 209.6706, "step": 8710 }, { - "epoch": 0.07045952213576387, - "grad_norm": 932.8497314453125, - "learning_rate": 3.4880000000000005e-05, - "loss": 160.9935, + "epoch": 0.035229903400574504, + "grad_norm": 640.1288452148438, + "learning_rate": 1.7440000000000002e-05, + "loss": 223.2772, "step": 8720 }, { - "epoch": 0.07054032434004799, - "grad_norm": 1358.204345703125, - "learning_rate": 3.4920000000000004e-05, - "loss": 158.5028, + "epoch": 0.03527030466594214, + "grad_norm": 4403.36962890625, + "learning_rate": 1.7460000000000002e-05, + "loss": 219.3949, "step": 8730 }, { - "epoch": 0.07062112654433213, - "grad_norm": 4013.16455078125, - "learning_rate": 3.4960000000000004e-05, - "loss": 189.8207, + "epoch": 0.03531070593130977, + "grad_norm": 2118.526123046875, + "learning_rate": 1.7480000000000002e-05, + "loss": 264.3927, "step": 8740 }, { - "epoch": 0.07070192874861626, - "grad_norm": 808.0092163085938, - "learning_rate": 3.5e-05, - "loss": 185.6832, + "epoch": 0.0353511071966774, + "grad_norm": 1355.910400390625, + "learning_rate": 1.75e-05, + "loss": 172.0311, "step": 8750 }, { - "epoch": 0.0707827309529004, - "grad_norm": 956.4999389648438, - "learning_rate": 3.504e-05, - "loss": 174.4473, + "epoch": 0.03539150846204503, + "grad_norm": 1693.513671875, + "learning_rate": 1.752e-05, + "loss": 185.7292, "step": 8760 }, { - "epoch": 0.07086353315718452, - "grad_norm": 1209.3671875, - "learning_rate": 3.508e-05, - "loss": 172.7735, + "epoch": 0.03543190972741266, + "grad_norm": 1187.20263671875, + "learning_rate": 1.754e-05, + "loss": 157.557, "step": 8770 }, { - "epoch": 0.07094433536146866, - "grad_norm": 1574.357666015625, - "learning_rate": 3.512e-05, - "loss": 259.2428, + "epoch": 0.035472310992780295, + "grad_norm": 893.8792114257812, + "learning_rate": 1.756e-05, + "loss": 191.817, "step": 8780 }, { - "epoch": 0.0710251375657528, - "grad_norm": 1376.3720703125, - "learning_rate": 3.516e-05, - "loss": 154.0107, + "epoch": 0.03551271225814793, + "grad_norm": 1077.1021728515625, + "learning_rate": 1.758e-05, + "loss": 189.9844, "step": 8790 }, { - "epoch": 0.07110593977003693, - "grad_norm": 1948.1181640625, - "learning_rate": 3.52e-05, - "loss": 204.8118, + "epoch": 0.035553113523515555, + "grad_norm": 929.8483276367188, + "learning_rate": 1.76e-05, + "loss": 197.5466, "step": 8800 }, { - "epoch": 0.07118674197432107, - "grad_norm": 3535.336669921875, - "learning_rate": 3.524e-05, - "loss": 206.1783, + "epoch": 0.03559351478888319, + "grad_norm": 2271.410400390625, + "learning_rate": 1.762e-05, + "loss": 228.7887, "step": 8810 }, { - "epoch": 0.07126754417860519, - "grad_norm": 865.1713256835938, - "learning_rate": 3.528e-05, - "loss": 145.2324, + "epoch": 0.03563391605425082, + "grad_norm": 2570.052001953125, + "learning_rate": 1.764e-05, + "loss": 155.6589, "step": 8820 }, { - "epoch": 0.07134834638288932, - "grad_norm": 973.4431762695312, - "learning_rate": 3.532e-05, - "loss": 202.3396, + "epoch": 0.035674317319618454, + "grad_norm": 657.0897216796875, + "learning_rate": 1.766e-05, + "loss": 225.2949, "step": 8830 }, { - "epoch": 0.07142914858717346, - "grad_norm": 1085.9337158203125, - "learning_rate": 3.536000000000001e-05, - "loss": 256.6032, + "epoch": 0.03571471858498608, + "grad_norm": 1940.9866943359375, + "learning_rate": 1.7680000000000004e-05, + "loss": 162.1829, "step": 8840 }, { - "epoch": 0.0715099507914576, - "grad_norm": 3106.70556640625, - "learning_rate": 3.54e-05, - "loss": 188.9828, + "epoch": 0.03575511985035371, + "grad_norm": 1256.1148681640625, + "learning_rate": 1.77e-05, + "loss": 174.6637, "step": 8850 }, { - "epoch": 0.07159075299574172, - "grad_norm": 978.59765625, - "learning_rate": 3.544e-05, - "loss": 161.3262, + "epoch": 0.035795521115721346, + "grad_norm": 394.8336181640625, + "learning_rate": 1.772e-05, + "loss": 173.8255, "step": 8860 }, { - "epoch": 0.07167155520002585, - "grad_norm": 813.19140625, - "learning_rate": 3.548e-05, - "loss": 184.0631, + "epoch": 0.03583592238108898, + "grad_norm": 2153.534423828125, + "learning_rate": 1.774e-05, + "loss": 171.3043, "step": 8870 }, { - "epoch": 0.07175235740430999, - "grad_norm": 1006.8140258789062, - "learning_rate": 3.5520000000000006e-05, - "loss": 193.3974, + "epoch": 0.035876323646456605, + "grad_norm": 760.2359619140625, + "learning_rate": 1.7760000000000003e-05, + "loss": 200.2851, "step": 8880 }, { - "epoch": 0.07183315960859413, - "grad_norm": 1306.429443359375, - "learning_rate": 3.5560000000000005e-05, - "loss": 225.1129, + "epoch": 0.03591672491182424, + "grad_norm": 1478.953369140625, + "learning_rate": 1.7780000000000003e-05, + "loss": 225.8184, "step": 8890 }, { - "epoch": 0.07191396181287825, - "grad_norm": 905.6278686523438, - "learning_rate": 3.56e-05, - "loss": 168.6888, + "epoch": 0.03595712617719187, + "grad_norm": 1087.418212890625, + "learning_rate": 1.78e-05, + "loss": 185.8715, "step": 8900 }, { - "epoch": 0.07199476401716239, - "grad_norm": 1611.2620849609375, - "learning_rate": 3.5640000000000004e-05, - "loss": 159.7826, + "epoch": 0.035997527442559504, + "grad_norm": 2434.63623046875, + "learning_rate": 1.7820000000000002e-05, + "loss": 178.0469, "step": 8910 }, { - "epoch": 0.07207556622144652, - "grad_norm": 1513.336669921875, - "learning_rate": 3.5680000000000004e-05, - "loss": 164.5545, + "epoch": 0.03603792870792713, + "grad_norm": 1043.3377685546875, + "learning_rate": 1.7840000000000002e-05, + "loss": 179.4526, "step": 8920 }, { - "epoch": 0.07215636842573066, - "grad_norm": 928.5970458984375, - "learning_rate": 3.5720000000000004e-05, - "loss": 169.191, + "epoch": 0.036078329973294763, + "grad_norm": 1111.5899658203125, + "learning_rate": 1.7860000000000002e-05, + "loss": 210.2127, "step": 8930 }, { - "epoch": 0.0722371706300148, - "grad_norm": 1008.218505859375, - "learning_rate": 3.5759999999999996e-05, - "loss": 185.4698, + "epoch": 0.0361187312386624, + "grad_norm": 688.8521728515625, + "learning_rate": 1.7879999999999998e-05, + "loss": 126.2401, "step": 8940 }, { - "epoch": 0.07231797283429892, - "grad_norm": 1244.755615234375, - "learning_rate": 3.58e-05, - "loss": 192.6423, + "epoch": 0.03615913250403003, + "grad_norm": 958.8832397460938, + "learning_rate": 1.79e-05, + "loss": 221.2132, "step": 8950 }, { - "epoch": 0.07239877503858305, - "grad_norm": 1162.1192626953125, - "learning_rate": 3.584e-05, - "loss": 133.6257, + "epoch": 0.036199533769397656, + "grad_norm": 1401.521728515625, + "learning_rate": 1.792e-05, + "loss": 236.2084, "step": 8960 }, { - "epoch": 0.07247957724286719, - "grad_norm": 2176.612548828125, - "learning_rate": 3.588e-05, - "loss": 171.5323, + "epoch": 0.03623993503476529, + "grad_norm": 10308.7021484375, + "learning_rate": 1.794e-05, + "loss": 186.4272, "step": 8970 }, { - "epoch": 0.07256037944715132, - "grad_norm": 2865.153564453125, - "learning_rate": 3.592e-05, - "loss": 202.3073, + "epoch": 0.03628033630013292, + "grad_norm": 1097.2364501953125, + "learning_rate": 1.796e-05, + "loss": 178.1187, "step": 8980 }, { - "epoch": 0.07264118165143545, - "grad_norm": 1172.5172119140625, - "learning_rate": 3.596e-05, - "loss": 149.6037, + "epoch": 0.036320737565500555, + "grad_norm": 1978.429931640625, + "learning_rate": 1.798e-05, + "loss": 196.0737, "step": 8990 }, { - "epoch": 0.07272198385571958, - "grad_norm": 2171.366455078125, - "learning_rate": 3.6e-05, - "loss": 159.9915, + "epoch": 0.03636113883086818, + "grad_norm": 3581.975341796875, + "learning_rate": 1.8e-05, + "loss": 209.1971, "step": 9000 }, { - "epoch": 0.07280278606000372, - "grad_norm": 1030.8807373046875, - "learning_rate": 3.604e-05, - "loss": 175.5365, + "epoch": 0.036401540096235814, + "grad_norm": 1180.078125, + "learning_rate": 1.802e-05, + "loss": 220.5676, "step": 9010 }, { - "epoch": 0.07288358826428785, - "grad_norm": 1232.118408203125, - "learning_rate": 3.608e-05, - "loss": 115.2722, + "epoch": 0.03644194136160345, + "grad_norm": 1235.55859375, + "learning_rate": 1.804e-05, + "loss": 124.504, "step": 9020 }, { - "epoch": 0.07296439046857198, - "grad_norm": 2816.69287109375, - "learning_rate": 3.6120000000000007e-05, - "loss": 194.1796, + "epoch": 0.03648234262697108, + "grad_norm": 1020.11865234375, + "learning_rate": 1.8060000000000003e-05, + "loss": 99.5129, "step": 9030 }, { - "epoch": 0.07304519267285611, - "grad_norm": 1613.893798828125, - "learning_rate": 3.616e-05, - "loss": 144.0771, + "epoch": 0.036522743892338706, + "grad_norm": 962.7813110351562, + "learning_rate": 1.808e-05, + "loss": 178.8402, "step": 9040 }, { - "epoch": 0.07312599487714025, - "grad_norm": 735.4164428710938, - "learning_rate": 3.62e-05, - "loss": 136.124, + "epoch": 0.03656314515770634, + "grad_norm": 1318.480224609375, + "learning_rate": 1.81e-05, + "loss": 180.0119, "step": 9050 }, { - "epoch": 0.07320679708142439, - "grad_norm": 704.7960815429688, - "learning_rate": 3.624e-05, - "loss": 150.2144, + "epoch": 0.03660354642307397, + "grad_norm": 2261.136962890625, + "learning_rate": 1.812e-05, + "loss": 329.5452, "step": 9060 }, { - "epoch": 0.07328759928570851, - "grad_norm": 1374.904052734375, - "learning_rate": 3.6280000000000005e-05, - "loss": 200.4284, + "epoch": 0.036643947688441605, + "grad_norm": 3996.27197265625, + "learning_rate": 1.8140000000000003e-05, + "loss": 224.336, "step": 9070 }, { - "epoch": 0.07336840148999264, - "grad_norm": 489.0487365722656, - "learning_rate": 3.6320000000000005e-05, - "loss": 166.5231, + "epoch": 0.03668434895380923, + "grad_norm": 724.8762817382812, + "learning_rate": 1.8160000000000002e-05, + "loss": 148.4422, "step": 9080 }, { - "epoch": 0.07344920369427678, - "grad_norm": 859.1505737304688, - "learning_rate": 3.636e-05, - "loss": 174.3586, + "epoch": 0.036724750219176865, + "grad_norm": 838.4283447265625, + "learning_rate": 1.818e-05, + "loss": 160.2637, "step": 9090 }, { - "epoch": 0.07353000589856092, - "grad_norm": 1292.6456298828125, - "learning_rate": 3.6400000000000004e-05, - "loss": 189.9267, + "epoch": 0.0367651514845445, + "grad_norm": 1999.3927001953125, + "learning_rate": 1.8200000000000002e-05, + "loss": 252.0707, "step": 9100 }, { - "epoch": 0.07361080810284505, - "grad_norm": 811.70263671875, - "learning_rate": 3.6440000000000003e-05, - "loss": 197.9917, + "epoch": 0.03680555274991213, + "grad_norm": 1008.3331298828125, + "learning_rate": 1.8220000000000002e-05, + "loss": 112.7752, "step": 9110 }, { - "epoch": 0.07369161030712917, - "grad_norm": 1170.6190185546875, - "learning_rate": 3.648e-05, - "loss": 186.6553, + "epoch": 0.03684595401527976, + "grad_norm": 1373.5531005859375, + "learning_rate": 1.824e-05, + "loss": 140.497, "step": 9120 }, { - "epoch": 0.07377241251141331, - "grad_norm": 993.7378540039062, - "learning_rate": 3.652e-05, - "loss": 168.9459, + "epoch": 0.03688635528064739, + "grad_norm": 698.5148315429688, + "learning_rate": 1.826e-05, + "loss": 132.642, "step": 9130 }, { - "epoch": 0.07385321471569745, - "grad_norm": 1279.6517333984375, - "learning_rate": 3.656e-05, - "loss": 214.9513, + "epoch": 0.03692675654601502, + "grad_norm": 1003.8239135742188, + "learning_rate": 1.828e-05, + "loss": 172.7703, "step": 9140 }, { - "epoch": 0.07393401691998158, - "grad_norm": 1972.90283203125, - "learning_rate": 3.66e-05, - "loss": 218.6674, + "epoch": 0.03696715781138265, + "grad_norm": 695.0194702148438, + "learning_rate": 1.83e-05, + "loss": 188.6919, "step": 9150 }, { - "epoch": 0.0740148191242657, - "grad_norm": 1044.6314697265625, - "learning_rate": 3.664e-05, - "loss": 196.509, + "epoch": 0.03700755907675028, + "grad_norm": 844.4266967773438, + "learning_rate": 1.832e-05, + "loss": 253.677, "step": 9160 }, { - "epoch": 0.07409562132854984, - "grad_norm": 1271.7125244140625, - "learning_rate": 3.668e-05, - "loss": 214.8085, + "epoch": 0.037047960342117915, + "grad_norm": 1122.217041015625, + "learning_rate": 1.834e-05, + "loss": 197.4021, "step": 9170 }, { - "epoch": 0.07417642353283398, - "grad_norm": 728.4358520507812, - "learning_rate": 3.672000000000001e-05, - "loss": 138.856, + "epoch": 0.03708836160748555, + "grad_norm": 1105.47314453125, + "learning_rate": 1.8360000000000004e-05, + "loss": 146.558, "step": 9180 }, { - "epoch": 0.07425722573711811, - "grad_norm": 2291.1845703125, - "learning_rate": 3.676e-05, - "loss": 222.0645, + "epoch": 0.037128762872853174, + "grad_norm": 1299.706298828125, + "learning_rate": 1.838e-05, + "loss": 196.9577, "step": 9190 }, { - "epoch": 0.07433802794140223, - "grad_norm": 2109.933349609375, - "learning_rate": 3.68e-05, - "loss": 187.4435, + "epoch": 0.03716916413822081, + "grad_norm": 4088.533447265625, + "learning_rate": 1.84e-05, + "loss": 178.9004, "step": 9200 }, { - "epoch": 0.07441883014568637, - "grad_norm": 808.6648559570312, - "learning_rate": 3.684e-05, - "loss": 170.4844, + "epoch": 0.03720956540358844, + "grad_norm": 2833.98388671875, + "learning_rate": 1.842e-05, + "loss": 220.8664, "step": 9210 }, { - "epoch": 0.07449963234997051, - "grad_norm": 1456.9173583984375, - "learning_rate": 3.6880000000000006e-05, - "loss": 141.3195, + "epoch": 0.037249966668956074, + "grad_norm": 649.87548828125, + "learning_rate": 1.8440000000000003e-05, + "loss": 163.9712, "step": 9220 }, { - "epoch": 0.07458043455425464, - "grad_norm": 3270.64501953125, - "learning_rate": 3.692e-05, - "loss": 165.7313, + "epoch": 0.0372903679343237, + "grad_norm": 1362.6846923828125, + "learning_rate": 1.846e-05, + "loss": 186.3454, "step": 9230 }, { - "epoch": 0.07466123675853878, - "grad_norm": 1216.39306640625, - "learning_rate": 3.696e-05, - "loss": 161.4625, + "epoch": 0.03733076919969133, + "grad_norm": 4132.52099609375, + "learning_rate": 1.848e-05, + "loss": 205.0476, "step": 9240 }, { - "epoch": 0.0747420389628229, - "grad_norm": 1753.88427734375, - "learning_rate": 3.7e-05, - "loss": 171.3236, + "epoch": 0.037371170465058966, + "grad_norm": 1643.118896484375, + "learning_rate": 1.85e-05, + "loss": 209.0955, "step": 9250 }, { - "epoch": 0.07482284116710704, - "grad_norm": 794.2777099609375, - "learning_rate": 3.7040000000000005e-05, - "loss": 162.1762, + "epoch": 0.0374115717304266, + "grad_norm": 5062.59716796875, + "learning_rate": 1.8520000000000002e-05, + "loss": 145.1507, "step": 9260 }, { - "epoch": 0.07490364337139117, - "grad_norm": 3876.9150390625, - "learning_rate": 3.7080000000000004e-05, - "loss": 199.3948, + "epoch": 0.037451972995794225, + "grad_norm": 390.54925537109375, + "learning_rate": 1.8540000000000002e-05, + "loss": 221.6778, "step": 9270 }, { - "epoch": 0.07498444557567531, - "grad_norm": 960.2236328125, - "learning_rate": 3.712e-05, - "loss": 139.2535, + "epoch": 0.03749237426116186, + "grad_norm": 593.5599975585938, + "learning_rate": 1.856e-05, + "loss": 188.7002, "step": 9280 }, { - "epoch": 0.07506524777995943, - "grad_norm": 1083.4541015625, - "learning_rate": 3.716e-05, - "loss": 214.9501, + "epoch": 0.03753277552652949, + "grad_norm": 1883.616455078125, + "learning_rate": 1.858e-05, + "loss": 189.2378, "step": 9290 }, { - "epoch": 0.07514604998424357, - "grad_norm": 1267.2135009765625, - "learning_rate": 3.72e-05, - "loss": 201.7698, + "epoch": 0.037573176791897124, + "grad_norm": 1161.8848876953125, + "learning_rate": 1.86e-05, + "loss": 197.2501, "step": 9300 }, { - "epoch": 0.0752268521885277, - "grad_norm": 791.3404541015625, - "learning_rate": 3.724e-05, - "loss": 197.6358, + "epoch": 0.03761357805726475, + "grad_norm": 692.7471313476562, + "learning_rate": 1.862e-05, + "loss": 171.7747, "step": 9310 }, { - "epoch": 0.07530765439281184, - "grad_norm": 1108.4683837890625, - "learning_rate": 3.728e-05, - "loss": 188.061, + "epoch": 0.03765397932263238, + "grad_norm": 644.8750610351562, + "learning_rate": 1.864e-05, + "loss": 243.3304, "step": 9320 }, { - "epoch": 0.07538845659709596, - "grad_norm": 1755.4454345703125, - "learning_rate": 3.732e-05, - "loss": 151.4291, + "epoch": 0.037694380588000016, + "grad_norm": 1095.04443359375, + "learning_rate": 1.866e-05, + "loss": 262.2983, "step": 9330 }, { - "epoch": 0.0754692588013801, - "grad_norm": 1762.6278076171875, - "learning_rate": 3.736e-05, - "loss": 171.227, + "epoch": 0.03773478185336765, + "grad_norm": 1455.4674072265625, + "learning_rate": 1.868e-05, + "loss": 179.6782, "step": 9340 }, { - "epoch": 0.07555006100566423, - "grad_norm": 1045.48388671875, - "learning_rate": 3.74e-05, - "loss": 167.8686, + "epoch": 0.037775183118735275, + "grad_norm": 788.5756225585938, + "learning_rate": 1.87e-05, + "loss": 108.4792, "step": 9350 }, { - "epoch": 0.07563086320994837, - "grad_norm": 1305.0751953125, - "learning_rate": 3.744e-05, - "loss": 235.7066, + "epoch": 0.03781558438410291, + "grad_norm": 1122.2181396484375, + "learning_rate": 1.872e-05, + "loss": 261.3737, "step": 9360 }, { - "epoch": 0.0757116654142325, - "grad_norm": 1210.6595458984375, - "learning_rate": 3.748000000000001e-05, - "loss": 177.2374, + "epoch": 0.03785598564947054, + "grad_norm": 1429.47607421875, + "learning_rate": 1.8740000000000004e-05, + "loss": 143.9239, "step": 9370 }, { - "epoch": 0.07579246761851663, - "grad_norm": 1649.41552734375, - "learning_rate": 3.752e-05, - "loss": 152.2463, + "epoch": 0.037896386914838175, + "grad_norm": 1072.0924072265625, + "learning_rate": 1.876e-05, + "loss": 217.753, "step": 9380 }, { - "epoch": 0.07587326982280077, - "grad_norm": 1223.367431640625, - "learning_rate": 3.756e-05, - "loss": 167.6342, + "epoch": 0.0379367881802058, + "grad_norm": 2434.5927734375, + "learning_rate": 1.878e-05, + "loss": 144.4152, "step": 9390 }, { - "epoch": 0.0759540720270849, - "grad_norm": 2955.2724609375, - "learning_rate": 3.76e-05, - "loss": 160.5796, + "epoch": 0.037977189445573434, + "grad_norm": 803.2135620117188, + "learning_rate": 1.88e-05, + "loss": 167.2345, "step": 9400 }, { - "epoch": 0.07603487423136904, - "grad_norm": 1036.196533203125, - "learning_rate": 3.7640000000000006e-05, - "loss": 227.5626, + "epoch": 0.03801759071094107, + "grad_norm": 1207.8824462890625, + "learning_rate": 1.8820000000000003e-05, + "loss": 211.6839, "step": 9410 }, { - "epoch": 0.07611567643565316, - "grad_norm": 1944.9627685546875, - "learning_rate": 3.7680000000000005e-05, - "loss": 164.2506, + "epoch": 0.0380579919763087, + "grad_norm": 1132.8394775390625, + "learning_rate": 1.8840000000000003e-05, + "loss": 147.7339, "step": 9420 }, { - "epoch": 0.0761964786399373, - "grad_norm": 3351.102294921875, - "learning_rate": 3.772e-05, - "loss": 227.037, + "epoch": 0.038098393241676326, + "grad_norm": 802.6298828125, + "learning_rate": 1.886e-05, + "loss": 139.8525, "step": 9430 }, { - "epoch": 0.07627728084422143, - "grad_norm": 1530.412353515625, - "learning_rate": 3.776e-05, - "loss": 161.8873, + "epoch": 0.03813879450704396, + "grad_norm": 3992.421875, + "learning_rate": 1.888e-05, + "loss": 199.5186, "step": 9440 }, { - "epoch": 0.07635808304850557, - "grad_norm": 967.5376586914062, - "learning_rate": 3.7800000000000004e-05, - "loss": 194.8877, + "epoch": 0.03817919577241159, + "grad_norm": 1528.7784423828125, + "learning_rate": 1.8900000000000002e-05, + "loss": 186.0713, "step": 9450 }, { - "epoch": 0.07643888525278969, - "grad_norm": 1061.1539306640625, - "learning_rate": 3.7840000000000004e-05, - "loss": 247.2234, + "epoch": 0.038219597037779225, + "grad_norm": 853.1768798828125, + "learning_rate": 1.8920000000000002e-05, + "loss": 222.1405, "step": 9460 }, { - "epoch": 0.07651968745707383, - "grad_norm": 1047.8619384765625, - "learning_rate": 3.788e-05, - "loss": 162.1735, + "epoch": 0.03825999830314685, + "grad_norm": 991.3099975585938, + "learning_rate": 1.894e-05, + "loss": 219.1491, "step": 9470 }, { - "epoch": 0.07660048966135796, - "grad_norm": 981.178466796875, - "learning_rate": 3.792e-05, - "loss": 169.8079, + "epoch": 0.038300399568514484, + "grad_norm": 2823.091064453125, + "learning_rate": 1.896e-05, + "loss": 216.1606, "step": 9480 }, { - "epoch": 0.0766812918656421, - "grad_norm": 1065.6182861328125, - "learning_rate": 3.796e-05, - "loss": 161.1064, + "epoch": 0.03834080083388212, + "grad_norm": 792.9014892578125, + "learning_rate": 1.898e-05, + "loss": 185.3038, "step": 9490 }, { - "epoch": 0.07676209406992623, - "grad_norm": 1711.6788330078125, - "learning_rate": 3.8e-05, - "loss": 262.615, + "epoch": 0.03838120209924975, + "grad_norm": 760.1093139648438, + "learning_rate": 1.9e-05, + "loss": 213.5252, "step": 9500 }, { - "epoch": 0.07684289627421036, - "grad_norm": 1656.35009765625, - "learning_rate": 3.804e-05, - "loss": 133.9092, + "epoch": 0.03842160336461738, + "grad_norm": 1229.9395751953125, + "learning_rate": 1.902e-05, + "loss": 99.1262, "step": 9510 }, { - "epoch": 0.07692369847849449, - "grad_norm": 1534.111572265625, - "learning_rate": 3.808e-05, - "loss": 169.9845, + "epoch": 0.03846200462998501, + "grad_norm": 1547.040771484375, + "learning_rate": 1.904e-05, + "loss": 193.0997, "step": 9520 }, { - "epoch": 0.07700450068277863, - "grad_norm": 1244.1246337890625, - "learning_rate": 3.812e-05, - "loss": 248.8472, + "epoch": 0.03850240589535264, + "grad_norm": 5546.650390625, + "learning_rate": 1.906e-05, + "loss": 191.2586, "step": 9530 }, { - "epoch": 0.07708530288706276, - "grad_norm": 573.146484375, - "learning_rate": 3.816e-05, - "loss": 155.0069, + "epoch": 0.038542807160720276, + "grad_norm": 1049.4068603515625, + "learning_rate": 1.908e-05, + "loss": 126.0632, "step": 9540 }, { - "epoch": 0.07716610509134689, - "grad_norm": 1121.0283203125, - "learning_rate": 3.82e-05, - "loss": 177.2353, + "epoch": 0.0385832084260879, + "grad_norm": 670.5220947265625, + "learning_rate": 1.91e-05, + "loss": 152.6221, "step": 9550 }, { - "epoch": 0.07724690729563102, - "grad_norm": 897.5433349609375, - "learning_rate": 3.8240000000000007e-05, - "loss": 153.0734, + "epoch": 0.038623609691455535, + "grad_norm": 1974.0662841796875, + "learning_rate": 1.9120000000000003e-05, + "loss": 175.8063, "step": 9560 }, { - "epoch": 0.07732770949991516, - "grad_norm": 1504.4290771484375, - "learning_rate": 3.828e-05, - "loss": 171.3821, + "epoch": 0.03866401095682317, + "grad_norm": 4551.0517578125, + "learning_rate": 1.914e-05, + "loss": 188.1946, "step": 9570 }, { - "epoch": 0.0774085117041993, - "grad_norm": 674.1407470703125, - "learning_rate": 3.832e-05, - "loss": 106.9927, + "epoch": 0.0387044122221908, + "grad_norm": 806.855712890625, + "learning_rate": 1.916e-05, + "loss": 166.843, "step": 9580 }, { - "epoch": 0.07748931390848342, - "grad_norm": 1398.94921875, - "learning_rate": 3.836e-05, - "loss": 125.6668, + "epoch": 0.03874481348755843, + "grad_norm": 531.7847290039062, + "learning_rate": 1.918e-05, + "loss": 141.4506, "step": 9590 }, { - "epoch": 0.07757011611276755, - "grad_norm": 1347.630859375, - "learning_rate": 3.8400000000000005e-05, - "loss": 199.499, + "epoch": 0.03878521475292606, + "grad_norm": 931.3214111328125, + "learning_rate": 1.9200000000000003e-05, + "loss": 184.4449, "step": 9600 }, { - "epoch": 0.07765091831705169, - "grad_norm": 501.4110107421875, - "learning_rate": 3.8440000000000005e-05, - "loss": 216.1979, + "epoch": 0.03882561601829369, + "grad_norm": 728.2243041992188, + "learning_rate": 1.9220000000000002e-05, + "loss": 144.1032, "step": 9610 }, { - "epoch": 0.07773172052133583, - "grad_norm": 2298.113525390625, - "learning_rate": 3.848e-05, - "loss": 186.5605, + "epoch": 0.038866017283661326, + "grad_norm": 1086.5455322265625, + "learning_rate": 1.924e-05, + "loss": 166.8239, "step": 9620 }, { - "epoch": 0.07781252272561995, - "grad_norm": 1575.307861328125, - "learning_rate": 3.8520000000000004e-05, - "loss": 200.3646, + "epoch": 0.03890641854902895, + "grad_norm": 0.0, + "learning_rate": 1.9260000000000002e-05, + "loss": 195.737, "step": 9630 }, { - "epoch": 0.07789332492990408, - "grad_norm": 923.5650634765625, - "learning_rate": 3.8560000000000004e-05, - "loss": 152.6383, + "epoch": 0.038946819814396585, + "grad_norm": 1560.2904052734375, + "learning_rate": 1.9280000000000002e-05, + "loss": 165.3053, "step": 9640 }, { - "epoch": 0.07797412713418822, - "grad_norm": 1438.6524658203125, - "learning_rate": 3.86e-05, - "loss": 148.3616, + "epoch": 0.03898722107976422, + "grad_norm": 1141.6810302734375, + "learning_rate": 1.93e-05, + "loss": 170.9885, "step": 9650 }, { - "epoch": 0.07805492933847236, - "grad_norm": 2303.528564453125, - "learning_rate": 3.864e-05, - "loss": 202.378, + "epoch": 0.03902762234513185, + "grad_norm": 0.0, + "learning_rate": 1.932e-05, + "loss": 154.8165, "step": 9660 }, { - "epoch": 0.07813573154275649, - "grad_norm": 2530.14013671875, - "learning_rate": 3.868e-05, - "loss": 224.129, + "epoch": 0.03906802361049948, + "grad_norm": 1502.9287109375, + "learning_rate": 1.934e-05, + "loss": 161.04, "step": 9670 }, { - "epoch": 0.07821653374704061, - "grad_norm": 4952.0341796875, - "learning_rate": 3.872e-05, - "loss": 198.1282, + "epoch": 0.03910842487586711, + "grad_norm": 1250.09619140625, + "learning_rate": 1.936e-05, + "loss": 140.4067, "step": 9680 }, { - "epoch": 0.07829733595132475, - "grad_norm": 1609.444580078125, - "learning_rate": 3.876e-05, - "loss": 203.0807, + "epoch": 0.039148826141234744, + "grad_norm": 1319.486328125, + "learning_rate": 1.938e-05, + "loss": 183.2919, "step": 9690 }, { - "epoch": 0.07837813815560889, - "grad_norm": 1734.220947265625, - "learning_rate": 3.88e-05, - "loss": 229.1168, + "epoch": 0.03918922740660238, + "grad_norm": 1285.4862060546875, + "learning_rate": 1.94e-05, + "loss": 200.0876, "step": 9700 }, { - "epoch": 0.07845894035989302, - "grad_norm": 1470.4158935546875, - "learning_rate": 3.884e-05, - "loss": 205.234, + "epoch": 0.03922962867197, + "grad_norm": 935.0762939453125, + "learning_rate": 1.942e-05, + "loss": 135.3171, "step": 9710 }, { - "epoch": 0.07853974256417715, - "grad_norm": 841.0286254882812, - "learning_rate": 3.888e-05, - "loss": 179.4924, + "epoch": 0.039270029937337636, + "grad_norm": 1619.843017578125, + "learning_rate": 1.944e-05, + "loss": 193.6001, "step": 9720 }, { - "epoch": 0.07862054476846128, - "grad_norm": 785.2080078125, - "learning_rate": 3.892e-05, - "loss": 174.6788, + "epoch": 0.03931043120270527, + "grad_norm": 1271.864990234375, + "learning_rate": 1.946e-05, + "loss": 115.0868, "step": 9730 }, { - "epoch": 0.07870134697274542, - "grad_norm": 2138.3388671875, - "learning_rate": 3.896e-05, - "loss": 170.9055, + "epoch": 0.0393508324680729, + "grad_norm": 724.2216186523438, + "learning_rate": 1.948e-05, + "loss": 184.4641, "step": 9740 }, { - "epoch": 0.07878214917702955, - "grad_norm": 891.0426635742188, - "learning_rate": 3.9000000000000006e-05, - "loss": 212.9628, + "epoch": 0.03939123373344053, + "grad_norm": 1635.4261474609375, + "learning_rate": 1.9500000000000003e-05, + "loss": 208.8505, "step": 9750 }, { - "epoch": 0.07886295138131368, - "grad_norm": 1529.48193359375, - "learning_rate": 3.9040000000000006e-05, - "loss": 140.431, + "epoch": 0.03943163499880816, + "grad_norm": 2965.97265625, + "learning_rate": 1.9520000000000003e-05, + "loss": 172.8497, "step": 9760 }, { - "epoch": 0.07894375358559781, - "grad_norm": 1415.4608154296875, - "learning_rate": 3.908e-05, - "loss": 168.8636, + "epoch": 0.039472036264175794, + "grad_norm": 2287.69287109375, + "learning_rate": 1.954e-05, + "loss": 182.5987, "step": 9770 }, { - "epoch": 0.07902455578988195, - "grad_norm": 1225.661865234375, - "learning_rate": 3.912e-05, - "loss": 176.6606, + "epoch": 0.03951243752954343, + "grad_norm": 812.2120971679688, + "learning_rate": 1.956e-05, + "loss": 125.8975, "step": 9780 }, { - "epoch": 0.07910535799416608, - "grad_norm": 789.5761108398438, - "learning_rate": 3.9160000000000005e-05, - "loss": 148.1701, + "epoch": 0.039552838794911054, + "grad_norm": 2255.82275390625, + "learning_rate": 1.9580000000000002e-05, + "loss": 205.0292, "step": 9790 }, { - "epoch": 0.07918616019845022, - "grad_norm": 980.8698120117188, - "learning_rate": 3.9200000000000004e-05, - "loss": 182.0633, + "epoch": 0.03959324006027869, + "grad_norm": 794.7918701171875, + "learning_rate": 1.9600000000000002e-05, + "loss": 140.7012, "step": 9800 }, { - "epoch": 0.07926696240273434, - "grad_norm": 951.6787109375, - "learning_rate": 3.9240000000000004e-05, - "loss": 157.9205, + "epoch": 0.03963364132564632, + "grad_norm": 1928.4591064453125, + "learning_rate": 1.9620000000000002e-05, + "loss": 230.6109, "step": 9810 }, { - "epoch": 0.07934776460701848, - "grad_norm": 1432.989501953125, - "learning_rate": 3.9280000000000003e-05, - "loss": 185.7759, + "epoch": 0.03967404259101395, + "grad_norm": 497.63433837890625, + "learning_rate": 1.9640000000000002e-05, + "loss": 175.5, "step": 9820 }, { - "epoch": 0.07942856681130261, - "grad_norm": 1328.513916015625, - "learning_rate": 3.932e-05, - "loss": 174.0459, + "epoch": 0.03971444385638158, + "grad_norm": 605.4525146484375, + "learning_rate": 1.966e-05, + "loss": 146.0165, "step": 9830 }, { - "epoch": 0.07950936901558675, - "grad_norm": 1071.6217041015625, - "learning_rate": 3.936e-05, - "loss": 178.2446, + "epoch": 0.03975484512174921, + "grad_norm": 12817.353515625, + "learning_rate": 1.968e-05, + "loss": 223.3047, "step": 9840 }, { - "epoch": 0.07959017121987087, - "grad_norm": 566.2764282226562, - "learning_rate": 3.94e-05, - "loss": 201.6997, + "epoch": 0.039795246387116845, + "grad_norm": 1107.6016845703125, + "learning_rate": 1.97e-05, + "loss": 164.6401, "step": 9850 }, { - "epoch": 0.07967097342415501, - "grad_norm": 1115.84130859375, - "learning_rate": 3.944e-05, - "loss": 157.878, + "epoch": 0.03983564765248448, + "grad_norm": 1794.5758056640625, + "learning_rate": 1.972e-05, + "loss": 211.4972, "step": 9860 }, { - "epoch": 0.07975177562843914, - "grad_norm": 1618.052978515625, - "learning_rate": 3.948e-05, - "loss": 177.7351, + "epoch": 0.039876048917852104, + "grad_norm": 1120.5679931640625, + "learning_rate": 1.974e-05, + "loss": 198.396, "step": 9870 }, { - "epoch": 0.07983257783272328, - "grad_norm": 3017.021728515625, - "learning_rate": 3.952e-05, - "loss": 160.3948, + "epoch": 0.03991645018321974, + "grad_norm": 627.087646484375, + "learning_rate": 1.976e-05, + "loss": 142.4417, "step": 9880 }, { - "epoch": 0.0799133800370074, - "grad_norm": 1273.435302734375, - "learning_rate": 3.956e-05, - "loss": 176.9263, + "epoch": 0.03995685144858737, + "grad_norm": 415.39337158203125, + "learning_rate": 1.978e-05, + "loss": 109.121, "step": 9890 }, { - "epoch": 0.07999418224129154, - "grad_norm": 1910.0296630859375, - "learning_rate": 3.960000000000001e-05, - "loss": 165.2272, + "epoch": 0.039997252713955, + "grad_norm": 1138.5850830078125, + "learning_rate": 1.9800000000000004e-05, + "loss": 190.1185, "step": 9900 }, { - "epoch": 0.08007498444557568, - "grad_norm": 2431.107666015625, - "learning_rate": 3.964e-05, - "loss": 193.6506, + "epoch": 0.04003765397932263, + "grad_norm": 506.6181945800781, + "learning_rate": 1.982e-05, + "loss": 152.6702, "step": 9910 }, { - "epoch": 0.08015578664985981, - "grad_norm": 5130.7294921875, - "learning_rate": 3.968e-05, - "loss": 266.5739, + "epoch": 0.04007805524469026, + "grad_norm": 1317.2239990234375, + "learning_rate": 1.984e-05, + "loss": 210.7801, "step": 9920 }, { - "epoch": 0.08023658885414395, - "grad_norm": 1193.6356201171875, - "learning_rate": 3.972e-05, - "loss": 157.0664, + "epoch": 0.040118456510057895, + "grad_norm": 1474.5733642578125, + "learning_rate": 1.986e-05, + "loss": 224.4348, "step": 9930 }, { - "epoch": 0.08031739105842807, - "grad_norm": 1334.3779296875, - "learning_rate": 3.9760000000000006e-05, - "loss": 161.1673, + "epoch": 0.04015885777542553, + "grad_norm": 1309.1876220703125, + "learning_rate": 1.9880000000000003e-05, + "loss": 87.4433, "step": 9940 }, { - "epoch": 0.0803981932627122, - "grad_norm": 1916.085693359375, - "learning_rate": 3.9800000000000005e-05, - "loss": 174.8383, + "epoch": 0.040199259040793155, + "grad_norm": 861.6743774414062, + "learning_rate": 1.9900000000000003e-05, + "loss": 136.5576, "step": 9950 }, { - "epoch": 0.08047899546699634, - "grad_norm": 1039.6949462890625, - "learning_rate": 3.984e-05, - "loss": 237.1551, + "epoch": 0.04023966030616079, + "grad_norm": 847.05517578125, + "learning_rate": 1.992e-05, + "loss": 182.9425, "step": 9960 }, { - "epoch": 0.08055979767128048, - "grad_norm": 693.6524047851562, - "learning_rate": 3.988e-05, - "loss": 127.6087, + "epoch": 0.04028006157152842, + "grad_norm": 1133.978515625, + "learning_rate": 1.994e-05, + "loss": 205.3185, "step": 9970 }, { - "epoch": 0.0806405998755646, - "grad_norm": 845.566650390625, - "learning_rate": 3.9920000000000004e-05, - "loss": 172.3226, + "epoch": 0.040320462836896054, + "grad_norm": 670.1548461914062, + "learning_rate": 1.9960000000000002e-05, + "loss": 190.8365, "step": 9980 }, { - "epoch": 0.08072140207984874, - "grad_norm": 921.083984375, - "learning_rate": 3.9960000000000004e-05, - "loss": 244.0432, + "epoch": 0.04036086410226368, + "grad_norm": 887.6602783203125, + "learning_rate": 1.9980000000000002e-05, + "loss": 180.067, "step": 9990 }, { - "epoch": 0.08080220428413287, - "grad_norm": 972.2029418945312, - "learning_rate": 4e-05, - "loss": 151.5975, + "epoch": 0.04040126536763131, + "grad_norm": 687.7095336914062, + "learning_rate": 2e-05, + "loss": 127.6409, "step": 10000 }, { - "epoch": 0.08088300648841701, - "grad_norm": 1146.177734375, - "learning_rate": 4.004e-05, - "loss": 198.049, + "epoch": 0.040441666632998946, + "grad_norm": 385.455810546875, + "learning_rate": 2.002e-05, + "loss": 161.4435, "step": 10010 }, { - "epoch": 0.08096380869270113, - "grad_norm": 1662.4725341796875, - "learning_rate": 4.008e-05, - "loss": 183.5892, + "epoch": 0.04048206789836658, + "grad_norm": 1409.2718505859375, + "learning_rate": 2.004e-05, + "loss": 182.3646, "step": 10020 }, { - "epoch": 0.08104461089698527, - "grad_norm": 2366.826171875, - "learning_rate": 4.012e-05, - "loss": 173.7599, + "epoch": 0.040522469163734205, + "grad_norm": 803.884765625, + "learning_rate": 2.006e-05, + "loss": 210.5829, "step": 10030 }, { - "epoch": 0.0811254131012694, - "grad_norm": 1399.5177001953125, - "learning_rate": 4.016e-05, - "loss": 279.5705, + "epoch": 0.04056287042910184, + "grad_norm": 1560.3782958984375, + "learning_rate": 2.008e-05, + "loss": 164.0962, "step": 10040 }, { - "epoch": 0.08120621530555354, - "grad_norm": 1353.117431640625, - "learning_rate": 4.02e-05, - "loss": 161.5938, + "epoch": 0.04060327169446947, + "grad_norm": 2029.135009765625, + "learning_rate": 2.01e-05, + "loss": 140.0395, "step": 10050 }, { - "epoch": 0.08128701750983768, - "grad_norm": 3179.9501953125, - "learning_rate": 4.024e-05, - "loss": 182.7751, + "epoch": 0.040643672959837104, + "grad_norm": 0.0, + "learning_rate": 2.012e-05, + "loss": 144.4245, "step": 10060 }, { - "epoch": 0.0813678197141218, - "grad_norm": 1770.83740234375, - "learning_rate": 4.028e-05, - "loss": 213.9104, + "epoch": 0.04068407422520473, + "grad_norm": 1140.849853515625, + "learning_rate": 2.014e-05, + "loss": 179.6542, "step": 10070 }, { - "epoch": 0.08144862191840593, - "grad_norm": 643.4440307617188, - "learning_rate": 4.032e-05, - "loss": 169.6355, + "epoch": 0.040724475490572364, + "grad_norm": 1746.8056640625, + "learning_rate": 2.016e-05, + "loss": 222.9251, "step": 10080 }, { - "epoch": 0.08152942412269007, - "grad_norm": 976.877197265625, - "learning_rate": 4.0360000000000007e-05, - "loss": 184.5578, + "epoch": 0.04076487675594, + "grad_norm": 722.617919921875, + "learning_rate": 2.0180000000000003e-05, + "loss": 175.7524, "step": 10090 }, { - "epoch": 0.0816102263269742, - "grad_norm": 1427.906494140625, - "learning_rate": 4.0400000000000006e-05, - "loss": 172.6602, + "epoch": 0.04080527802130763, + "grad_norm": 837.6021118164062, + "learning_rate": 2.0200000000000003e-05, + "loss": 180.1091, "step": 10100 }, { - "epoch": 0.08169102853125833, - "grad_norm": 825.6599731445312, - "learning_rate": 4.044e-05, - "loss": 174.3504, + "epoch": 0.040845679286675256, + "grad_norm": 1460.4134521484375, + "learning_rate": 2.022e-05, + "loss": 188.8666, "step": 10110 }, { - "epoch": 0.08177183073554246, - "grad_norm": 2436.170166015625, - "learning_rate": 4.048e-05, - "loss": 235.238, + "epoch": 0.04088608055204289, + "grad_norm": 770.96484375, + "learning_rate": 2.024e-05, + "loss": 164.6192, "step": 10120 }, { - "epoch": 0.0818526329398266, - "grad_norm": 948.8191528320312, - "learning_rate": 4.0520000000000005e-05, - "loss": 169.0186, + "epoch": 0.04092648181741052, + "grad_norm": 887.3283081054688, + "learning_rate": 2.0260000000000003e-05, + "loss": 181.0503, "step": 10130 }, { - "epoch": 0.08193343514411074, - "grad_norm": 1173.0087890625, - "learning_rate": 4.0560000000000005e-05, - "loss": 182.7577, + "epoch": 0.040966883082778155, + "grad_norm": 1267.9124755859375, + "learning_rate": 2.0280000000000002e-05, + "loss": 191.1377, "step": 10140 }, { - "epoch": 0.08201423734839486, - "grad_norm": 1082.241943359375, - "learning_rate": 4.0600000000000004e-05, - "loss": 152.1746, + "epoch": 0.04100728434814578, + "grad_norm": 785.875244140625, + "learning_rate": 2.0300000000000002e-05, + "loss": 175.139, "step": 10150 }, { - "epoch": 0.082095039552679, - "grad_norm": 878.6609497070312, - "learning_rate": 4.064e-05, - "loss": 183.4489, + "epoch": 0.041047685613513414, + "grad_norm": 3146.860107421875, + "learning_rate": 2.032e-05, + "loss": 189.9724, "step": 10160 }, { - "epoch": 0.08217584175696313, - "grad_norm": 903.724609375, - "learning_rate": 4.0680000000000004e-05, - "loss": 177.1646, + "epoch": 0.04108808687888105, + "grad_norm": 1048.8988037109375, + "learning_rate": 2.0340000000000002e-05, + "loss": 219.648, "step": 10170 }, { - "epoch": 0.08225664396124727, - "grad_norm": 1044.388427734375, - "learning_rate": 4.072e-05, - "loss": 162.3992, + "epoch": 0.04112848814424868, + "grad_norm": 534.6448364257812, + "learning_rate": 2.036e-05, + "loss": 165.6375, "step": 10180 }, { - "epoch": 0.08233744616553139, - "grad_norm": 1552.9786376953125, - "learning_rate": 4.076e-05, - "loss": 193.026, + "epoch": 0.041168889409616306, + "grad_norm": 2196.583984375, + "learning_rate": 2.038e-05, + "loss": 219.21, "step": 10190 }, { - "epoch": 0.08241824836981552, - "grad_norm": 973.9620361328125, - "learning_rate": 4.08e-05, - "loss": 178.0761, + "epoch": 0.04120929067498394, + "grad_norm": 868.2059326171875, + "learning_rate": 2.04e-05, + "loss": 186.5126, "step": 10200 }, { - "epoch": 0.08249905057409966, - "grad_norm": 1457.1915283203125, - "learning_rate": 4.084e-05, - "loss": 178.3972, + "epoch": 0.04124969194035157, + "grad_norm": 1177.8323974609375, + "learning_rate": 2.042e-05, + "loss": 173.2809, "step": 10210 }, { - "epoch": 0.0825798527783838, - "grad_norm": 1393.6435546875, - "learning_rate": 4.088e-05, - "loss": 169.8833, + "epoch": 0.041290093205719205, + "grad_norm": 1213.236572265625, + "learning_rate": 2.044e-05, + "loss": 195.4992, "step": 10220 }, { - "epoch": 0.08266065498266793, - "grad_norm": 548.5450439453125, - "learning_rate": 4.092e-05, - "loss": 170.2038, + "epoch": 0.04133049447108683, + "grad_norm": 712.7450561523438, + "learning_rate": 2.046e-05, + "loss": 123.3338, "step": 10230 }, { - "epoch": 0.08274145718695206, - "grad_norm": 1042.5880126953125, - "learning_rate": 4.096e-05, - "loss": 183.4634, + "epoch": 0.041370895736454465, + "grad_norm": 1610.07373046875, + "learning_rate": 2.048e-05, + "loss": 182.489, "step": 10240 }, { - "epoch": 0.08282225939123619, - "grad_norm": 1946.1444091796875, - "learning_rate": 4.1e-05, - "loss": 246.7109, + "epoch": 0.0414112970018221, + "grad_norm": 1756.5374755859375, + "learning_rate": 2.05e-05, + "loss": 271.8728, "step": 10250 }, { - "epoch": 0.08290306159552033, - "grad_norm": 1744.5115966796875, - "learning_rate": 4.104e-05, - "loss": 194.5518, + "epoch": 0.04145169826718973, + "grad_norm": 1240.2843017578125, + "learning_rate": 2.052e-05, + "loss": 166.9299, "step": 10260 }, { - "epoch": 0.08298386379980446, - "grad_norm": 843.8541870117188, - "learning_rate": 4.108e-05, - "loss": 181.7394, + "epoch": 0.04149209953255736, + "grad_norm": 803.8770141601562, + "learning_rate": 2.054e-05, + "loss": 155.5844, "step": 10270 }, { - "epoch": 0.08306466600408859, - "grad_norm": 1452.23046875, - "learning_rate": 4.1120000000000006e-05, - "loss": 172.7989, + "epoch": 0.04153250079792499, + "grad_norm": 954.6376953125, + "learning_rate": 2.0560000000000003e-05, + "loss": 199.4898, "step": 10280 }, { - "epoch": 0.08314546820837272, - "grad_norm": 1428.465576171875, - "learning_rate": 4.1160000000000006e-05, - "loss": 150.7257, + "epoch": 0.04157290206329262, + "grad_norm": 667.2396240234375, + "learning_rate": 2.0580000000000003e-05, + "loss": 112.1381, "step": 10290 }, { - "epoch": 0.08322627041265686, - "grad_norm": 1098.4039306640625, - "learning_rate": 4.12e-05, - "loss": 210.3694, + "epoch": 0.041613303328660256, + "grad_norm": 1343.771240234375, + "learning_rate": 2.06e-05, + "loss": 200.9802, "step": 10300 }, { - "epoch": 0.083307072616941, - "grad_norm": 1203.095703125, - "learning_rate": 4.124e-05, - "loss": 211.7454, + "epoch": 0.04165370459402788, + "grad_norm": 1224.6522216796875, + "learning_rate": 2.062e-05, + "loss": 128.1033, "step": 10310 }, { - "epoch": 0.08338787482122512, - "grad_norm": 784.8167724609375, - "learning_rate": 4.1280000000000005e-05, - "loss": 195.1169, + "epoch": 0.041694105859395515, + "grad_norm": 845.0104370117188, + "learning_rate": 2.0640000000000002e-05, + "loss": 165.8742, "step": 10320 }, { - "epoch": 0.08346867702550925, - "grad_norm": 2486.029052734375, - "learning_rate": 4.1320000000000004e-05, - "loss": 191.3834, + "epoch": 0.04173450712476315, + "grad_norm": 532.292236328125, + "learning_rate": 2.0660000000000002e-05, + "loss": 131.5288, "step": 10330 }, { - "epoch": 0.08354947922979339, - "grad_norm": 1144.709716796875, - "learning_rate": 4.1360000000000004e-05, - "loss": 199.3864, + "epoch": 0.04177490839013078, + "grad_norm": 876.7532958984375, + "learning_rate": 2.0680000000000002e-05, + "loss": 136.8921, "step": 10340 }, { - "epoch": 0.08363028143407752, - "grad_norm": 1321.3115234375, - "learning_rate": 4.14e-05, - "loss": 212.5151, + "epoch": 0.04181530965549841, + "grad_norm": 2460.5791015625, + "learning_rate": 2.07e-05, + "loss": 152.2601, "step": 10350 }, { - "epoch": 0.08371108363836166, - "grad_norm": 862.5822143554688, - "learning_rate": 4.144e-05, - "loss": 198.9451, + "epoch": 0.04185571092086604, + "grad_norm": 1794.477783203125, + "learning_rate": 2.072e-05, + "loss": 201.8371, "step": 10360 }, { - "epoch": 0.08379188584264578, - "grad_norm": 10484.8984375, - "learning_rate": 4.148e-05, - "loss": 256.3769, + "epoch": 0.041896112186233674, + "grad_norm": 1977.7452392578125, + "learning_rate": 2.074e-05, + "loss": 247.7894, "step": 10370 }, { - "epoch": 0.08387268804692992, - "grad_norm": 1291.408203125, - "learning_rate": 4.152e-05, - "loss": 145.6406, + "epoch": 0.04193651345160131, + "grad_norm": 667.1467895507812, + "learning_rate": 2.076e-05, + "loss": 222.6323, "step": 10380 }, { - "epoch": 0.08395349025121406, - "grad_norm": 570.9535522460938, - "learning_rate": 4.156e-05, - "loss": 183.1359, + "epoch": 0.04197691471696893, + "grad_norm": 351.2049865722656, + "learning_rate": 2.078e-05, + "loss": 138.2745, "step": 10390 }, { - "epoch": 0.08403429245549819, - "grad_norm": 3295.680908203125, - "learning_rate": 4.16e-05, - "loss": 209.5042, + "epoch": 0.042017315982336566, + "grad_norm": 944.7986450195312, + "learning_rate": 2.08e-05, + "loss": 196.8081, "step": 10400 }, { - "epoch": 0.08411509465978231, - "grad_norm": 1278.0555419921875, - "learning_rate": 4.164e-05, - "loss": 201.0906, + "epoch": 0.0420577172477042, + "grad_norm": 1119.5870361328125, + "learning_rate": 2.082e-05, + "loss": 163.2965, "step": 10410 }, { - "epoch": 0.08419589686406645, - "grad_norm": 2004.2734375, - "learning_rate": 4.168e-05, - "loss": 201.3545, + "epoch": 0.04209811851307183, + "grad_norm": 684.2236938476562, + "learning_rate": 2.084e-05, + "loss": 154.9308, "step": 10420 }, { - "epoch": 0.08427669906835059, - "grad_norm": 861.1351318359375, - "learning_rate": 4.172e-05, - "loss": 112.2548, + "epoch": 0.04213851977843946, + "grad_norm": 1214.9677734375, + "learning_rate": 2.086e-05, + "loss": 181.0213, "step": 10430 }, { - "epoch": 0.08435750127263472, - "grad_norm": 1816.4276123046875, - "learning_rate": 4.176000000000001e-05, - "loss": 182.1057, + "epoch": 0.04217892104380709, + "grad_norm": 979.2085571289062, + "learning_rate": 2.0880000000000003e-05, + "loss": 214.0994, "step": 10440 }, { - "epoch": 0.08443830347691884, - "grad_norm": 1326.76708984375, - "learning_rate": 4.18e-05, - "loss": 167.3876, + "epoch": 0.042219322309174724, + "grad_norm": 1195.45947265625, + "learning_rate": 2.09e-05, + "loss": 184.8288, "step": 10450 }, { - "epoch": 0.08451910568120298, - "grad_norm": 1033.8326416015625, - "learning_rate": 4.184e-05, - "loss": 232.5481, + "epoch": 0.04225972357454236, + "grad_norm": 3266.998291015625, + "learning_rate": 2.092e-05, + "loss": 154.9474, "step": 10460 }, { - "epoch": 0.08459990788548712, - "grad_norm": 1077.4783935546875, - "learning_rate": 4.1880000000000006e-05, - "loss": 161.4913, + "epoch": 0.04230012483990998, + "grad_norm": 1256.9425048828125, + "learning_rate": 2.0940000000000003e-05, + "loss": 119.0928, "step": 10470 }, { - "epoch": 0.08468071008977125, - "grad_norm": 842.7870483398438, - "learning_rate": 4.1920000000000005e-05, - "loss": 146.6304, + "epoch": 0.042340526105277616, + "grad_norm": 816.3329467773438, + "learning_rate": 2.0960000000000003e-05, + "loss": 138.4739, "step": 10480 }, { - "epoch": 0.08476151229405539, - "grad_norm": 1683.2530517578125, - "learning_rate": 4.196e-05, - "loss": 162.2518, + "epoch": 0.04238092737064525, + "grad_norm": 1145.7486572265625, + "learning_rate": 2.098e-05, + "loss": 179.8457, "step": 10490 }, { - "epoch": 0.08484231449833951, - "grad_norm": 1105.76416015625, - "learning_rate": 4.2e-05, - "loss": 165.4935, + "epoch": 0.04242132863601288, + "grad_norm": 4142.47998046875, + "learning_rate": 2.1e-05, + "loss": 190.1194, "step": 10500 }, { - "epoch": 0.08492311670262365, - "grad_norm": 3281.477783203125, - "learning_rate": 4.2040000000000004e-05, - "loss": 276.0207, + "epoch": 0.04246172990138051, + "grad_norm": 748.7318115234375, + "learning_rate": 2.1020000000000002e-05, + "loss": 156.9302, "step": 10510 }, { - "epoch": 0.08500391890690778, - "grad_norm": 773.9490356445312, - "learning_rate": 4.2080000000000004e-05, - "loss": 151.6816, + "epoch": 0.04250213116674814, + "grad_norm": 1273.5777587890625, + "learning_rate": 2.1040000000000002e-05, + "loss": 175.272, "step": 10520 }, { - "epoch": 0.08508472111119192, - "grad_norm": 878.6908569335938, - "learning_rate": 4.212e-05, - "loss": 140.2555, + "epoch": 0.042542532432115775, + "grad_norm": 551.0802612304688, + "learning_rate": 2.106e-05, + "loss": 167.7439, "step": 10530 }, { - "epoch": 0.08516552331547604, - "grad_norm": 773.424072265625, - "learning_rate": 4.2159999999999996e-05, - "loss": 195.5604, + "epoch": 0.04258293369748341, + "grad_norm": 604.2559814453125, + "learning_rate": 2.1079999999999998e-05, + "loss": 153.7686, "step": 10540 }, { - "epoch": 0.08524632551976018, - "grad_norm": 1280.2425537109375, - "learning_rate": 4.22e-05, - "loss": 165.135, + "epoch": 0.042623334962851034, + "grad_norm": 460.89154052734375, + "learning_rate": 2.11e-05, + "loss": 117.5873, "step": 10550 }, { - "epoch": 0.08532712772404431, - "grad_norm": 1143.2020263671875, - "learning_rate": 4.224e-05, - "loss": 196.1139, + "epoch": 0.04266373622821867, + "grad_norm": 1551.710693359375, + "learning_rate": 2.112e-05, + "loss": 165.0876, "step": 10560 }, { - "epoch": 0.08540792992832845, - "grad_norm": 1141.0167236328125, - "learning_rate": 4.228e-05, - "loss": 207.5249, + "epoch": 0.0427041374935863, + "grad_norm": 934.1738891601562, + "learning_rate": 2.114e-05, + "loss": 143.8318, "step": 10570 }, { - "epoch": 0.08548873213261257, - "grad_norm": 2905.197509765625, - "learning_rate": 4.232e-05, - "loss": 169.4194, + "epoch": 0.04274453875895393, + "grad_norm": 1267.439453125, + "learning_rate": 2.116e-05, + "loss": 187.996, "step": 10580 }, { - "epoch": 0.08556953433689671, - "grad_norm": 943.3927001953125, - "learning_rate": 4.236e-05, - "loss": 151.4123, + "epoch": 0.04278494002432156, + "grad_norm": 1843.2279052734375, + "learning_rate": 2.118e-05, + "loss": 181.2761, "step": 10590 }, { - "epoch": 0.08565033654118084, - "grad_norm": 1627.9791259765625, - "learning_rate": 4.24e-05, - "loss": 194.5689, + "epoch": 0.04282534128968919, + "grad_norm": 2143.365966796875, + "learning_rate": 2.12e-05, + "loss": 158.841, "step": 10600 }, { - "epoch": 0.08573113874546498, - "grad_norm": 6744.78662109375, - "learning_rate": 4.244e-05, - "loss": 234.027, + "epoch": 0.042865742555056825, + "grad_norm": 925.6973266601562, + "learning_rate": 2.122e-05, + "loss": 210.331, "step": 10610 }, { - "epoch": 0.08581194094974912, - "grad_norm": 937.9603271484375, - "learning_rate": 4.248e-05, - "loss": 170.269, + "epoch": 0.04290614382042446, + "grad_norm": 2171.398681640625, + "learning_rate": 2.124e-05, + "loss": 225.7547, "step": 10620 }, { - "epoch": 0.08589274315403324, - "grad_norm": 1123.732177734375, - "learning_rate": 4.2520000000000006e-05, - "loss": 164.9477, + "epoch": 0.042946545085792084, + "grad_norm": 1160.7347412109375, + "learning_rate": 2.1260000000000003e-05, + "loss": 153.8757, "step": 10630 }, { - "epoch": 0.08597354535831737, - "grad_norm": 2390.64697265625, - "learning_rate": 4.256e-05, - "loss": 193.9472, + "epoch": 0.04298694635115972, + "grad_norm": 1135.9134521484375, + "learning_rate": 2.128e-05, + "loss": 188.1846, "step": 10640 }, { - "epoch": 0.08605434756260151, - "grad_norm": 992.861083984375, - "learning_rate": 4.26e-05, - "loss": 197.5291, + "epoch": 0.04302734761652735, + "grad_norm": 2029.5738525390625, + "learning_rate": 2.13e-05, + "loss": 201.2368, "step": 10650 }, { - "epoch": 0.08613514976688565, - "grad_norm": 931.4149780273438, - "learning_rate": 4.2640000000000005e-05, - "loss": 162.7885, + "epoch": 0.043067748881894984, + "grad_norm": 814.3689575195312, + "learning_rate": 2.1320000000000003e-05, + "loss": 206.1595, "step": 10660 }, { - "epoch": 0.08621595197116977, - "grad_norm": 4359.9873046875, - "learning_rate": 4.2680000000000005e-05, - "loss": 183.5593, + "epoch": 0.04310815014726261, + "grad_norm": 422.2183837890625, + "learning_rate": 2.1340000000000002e-05, + "loss": 157.6857, "step": 10670 }, { - "epoch": 0.0862967541754539, - "grad_norm": 969.6080932617188, - "learning_rate": 4.2720000000000004e-05, - "loss": 150.2163, + "epoch": 0.04314855141263024, + "grad_norm": 880.4464111328125, + "learning_rate": 2.1360000000000002e-05, + "loss": 163.0372, "step": 10680 }, { - "epoch": 0.08637755637973804, - "grad_norm": 969.9591674804688, - "learning_rate": 4.276e-05, - "loss": 140.7298, + "epoch": 0.043188952677997876, + "grad_norm": 484.959228515625, + "learning_rate": 2.138e-05, + "loss": 144.4623, "step": 10690 }, { - "epoch": 0.08645835858402218, - "grad_norm": 2042.2684326171875, - "learning_rate": 4.2800000000000004e-05, - "loss": 206.122, + "epoch": 0.04322935394336551, + "grad_norm": 1225.7841796875, + "learning_rate": 2.1400000000000002e-05, + "loss": 226.4971, "step": 10700 }, { - "epoch": 0.0865391607883063, - "grad_norm": 1093.513427734375, - "learning_rate": 4.284e-05, - "loss": 178.5101, + "epoch": 0.043269755208733135, + "grad_norm": 569.0413208007812, + "learning_rate": 2.142e-05, + "loss": 186.6787, "step": 10710 }, { - "epoch": 0.08661996299259044, - "grad_norm": 1408.14990234375, - "learning_rate": 4.288e-05, - "loss": 184.9918, + "epoch": 0.04331015647410077, + "grad_norm": 1226.3690185546875, + "learning_rate": 2.144e-05, + "loss": 219.7577, "step": 10720 }, { - "epoch": 0.08670076519687457, - "grad_norm": 2175.18115234375, - "learning_rate": 4.292e-05, - "loss": 168.8215, + "epoch": 0.0433505577394684, + "grad_norm": 1915.441650390625, + "learning_rate": 2.146e-05, + "loss": 194.0689, "step": 10730 }, { - "epoch": 0.08678156740115871, - "grad_norm": 1018.9749145507812, - "learning_rate": 4.296e-05, - "loss": 157.6885, + "epoch": 0.043390959004836034, + "grad_norm": 1193.3553466796875, + "learning_rate": 2.148e-05, + "loss": 149.6157, "step": 10740 }, { - "epoch": 0.08686236960544283, - "grad_norm": 833.9223022460938, - "learning_rate": 4.3e-05, - "loss": 190.7543, + "epoch": 0.04343136027020366, + "grad_norm": 562.625, + "learning_rate": 2.15e-05, + "loss": 204.4145, "step": 10750 }, { - "epoch": 0.08694317180972697, - "grad_norm": 1124.74072265625, - "learning_rate": 4.304e-05, - "loss": 162.6468, + "epoch": 0.04347176153557129, + "grad_norm": 12496.0029296875, + "learning_rate": 2.152e-05, + "loss": 248.7909, "step": 10760 }, { - "epoch": 0.0870239740140111, - "grad_norm": 1126.108642578125, - "learning_rate": 4.308e-05, - "loss": 156.0814, + "epoch": 0.043512162800938926, + "grad_norm": 1115.0479736328125, + "learning_rate": 2.154e-05, + "loss": 183.6795, "step": 10770 }, { - "epoch": 0.08710477621829524, - "grad_norm": 1275.92431640625, - "learning_rate": 4.312000000000001e-05, - "loss": 193.4797, + "epoch": 0.04355256406630656, + "grad_norm": 1180.6798095703125, + "learning_rate": 2.1560000000000004e-05, + "loss": 161.4541, "step": 10780 }, { - "epoch": 0.08718557842257937, - "grad_norm": 637.2274169921875, - "learning_rate": 4.316e-05, - "loss": 152.6488, + "epoch": 0.043592965331674186, + "grad_norm": 2096.56298828125, + "learning_rate": 2.158e-05, + "loss": 177.0552, "step": 10790 }, { - "epoch": 0.0872663806268635, - "grad_norm": 2631.250732421875, - "learning_rate": 4.32e-05, - "loss": 201.6297, + "epoch": 0.04363336659704182, + "grad_norm": 1031.798828125, + "learning_rate": 2.16e-05, + "loss": 160.7948, "step": 10800 }, { - "epoch": 0.08734718283114763, - "grad_norm": 876.3217163085938, - "learning_rate": 4.324e-05, - "loss": 169.1845, + "epoch": 0.04367376786240945, + "grad_norm": 1001.6660766601562, + "learning_rate": 2.162e-05, + "loss": 279.4042, "step": 10810 }, { - "epoch": 0.08742798503543177, - "grad_norm": 1013.3303833007812, - "learning_rate": 4.3280000000000006e-05, - "loss": 204.4532, + "epoch": 0.043714169127777085, + "grad_norm": 5103.36669921875, + "learning_rate": 2.1640000000000003e-05, + "loss": 198.244, "step": 10820 }, { - "epoch": 0.0875087872397159, - "grad_norm": 1415.256103515625, - "learning_rate": 4.332e-05, - "loss": 155.4371, + "epoch": 0.04375457039314471, + "grad_norm": 813.843017578125, + "learning_rate": 2.166e-05, + "loss": 204.9555, "step": 10830 }, { - "epoch": 0.08758958944400003, - "grad_norm": 1123.542724609375, - "learning_rate": 4.336e-05, - "loss": 153.6385, + "epoch": 0.043794971658512344, + "grad_norm": 1556.0152587890625, + "learning_rate": 2.168e-05, + "loss": 178.9818, "step": 10840 }, { - "epoch": 0.08767039164828416, - "grad_norm": 1267.4825439453125, - "learning_rate": 4.3400000000000005e-05, - "loss": 177.9626, + "epoch": 0.04383537292387998, + "grad_norm": 1136.54443359375, + "learning_rate": 2.1700000000000002e-05, + "loss": 214.708, "step": 10850 }, { - "epoch": 0.0877511938525683, - "grad_norm": 1323.0853271484375, - "learning_rate": 4.3440000000000004e-05, - "loss": 169.0611, + "epoch": 0.04387577418924761, + "grad_norm": 913.5957641601562, + "learning_rate": 2.1720000000000002e-05, + "loss": 162.8733, "step": 10860 }, { - "epoch": 0.08783199605685243, - "grad_norm": 1246.5340576171875, - "learning_rate": 4.3480000000000004e-05, - "loss": 143.5475, + "epoch": 0.043916175454615236, + "grad_norm": 1809.7088623046875, + "learning_rate": 2.1740000000000002e-05, + "loss": 202.5214, "step": 10870 }, { - "epoch": 0.08791279826113656, - "grad_norm": 992.7854614257812, - "learning_rate": 4.352e-05, - "loss": 168.819, + "epoch": 0.04395657671998287, + "grad_norm": 1365.0262451171875, + "learning_rate": 2.176e-05, + "loss": 197.6029, "step": 10880 }, { - "epoch": 0.0879936004654207, - "grad_norm": 3300.27783203125, - "learning_rate": 4.356e-05, - "loss": 186.9549, + "epoch": 0.0439969779853505, + "grad_norm": 1124.8267822265625, + "learning_rate": 2.178e-05, + "loss": 195.3639, "step": 10890 }, { - "epoch": 0.08807440266970483, - "grad_norm": 1342.2066650390625, - "learning_rate": 4.36e-05, - "loss": 197.1866, + "epoch": 0.044037379250718135, + "grad_norm": 933.0280151367188, + "learning_rate": 2.18e-05, + "loss": 166.1828, "step": 10900 }, { - "epoch": 0.08815520487398897, - "grad_norm": 1559.653564453125, - "learning_rate": 4.364e-05, - "loss": 136.7352, + "epoch": 0.04407778051608576, + "grad_norm": 893.4417724609375, + "learning_rate": 2.182e-05, + "loss": 125.791, "step": 10910 }, { - "epoch": 0.0882360070782731, - "grad_norm": 1199.302490234375, - "learning_rate": 4.368e-05, - "loss": 183.9886, + "epoch": 0.044118181781453394, + "grad_norm": 1642.8094482421875, + "learning_rate": 2.184e-05, + "loss": 144.994, "step": 10920 }, { - "epoch": 0.08831680928255722, - "grad_norm": 1037.2520751953125, - "learning_rate": 4.372e-05, - "loss": 157.0541, + "epoch": 0.04415858304682103, + "grad_norm": 1519.8751220703125, + "learning_rate": 2.186e-05, + "loss": 158.417, "step": 10930 }, { - "epoch": 0.08839761148684136, - "grad_norm": 1746.9495849609375, - "learning_rate": 4.376e-05, - "loss": 158.3566, + "epoch": 0.04419898431218866, + "grad_norm": 1085.8243408203125, + "learning_rate": 2.188e-05, + "loss": 189.8353, "step": 10940 }, { - "epoch": 0.0884784136911255, - "grad_norm": 861.6211547851562, - "learning_rate": 4.38e-05, - "loss": 202.3647, + "epoch": 0.04423938557755629, + "grad_norm": 907.6950073242188, + "learning_rate": 2.19e-05, + "loss": 131.8621, "step": 10950 }, { - "epoch": 0.08855921589540963, - "grad_norm": 1614.130615234375, - "learning_rate": 4.384e-05, - "loss": 162.0783, + "epoch": 0.04427978684292392, + "grad_norm": 988.0367431640625, + "learning_rate": 2.192e-05, + "loss": 110.0259, "step": 10960 }, { - "epoch": 0.08864001809969375, - "grad_norm": 892.6734008789062, - "learning_rate": 4.388000000000001e-05, - "loss": 189.6491, + "epoch": 0.04432018810829155, + "grad_norm": 358.17889404296875, + "learning_rate": 2.1940000000000003e-05, + "loss": 144.1581, "step": 10970 }, { - "epoch": 0.08872082030397789, - "grad_norm": 2857.29150390625, - "learning_rate": 4.392e-05, - "loss": 197.7264, + "epoch": 0.044360589373659186, + "grad_norm": 3126.972900390625, + "learning_rate": 2.196e-05, + "loss": 242.1895, "step": 10980 }, { - "epoch": 0.08880162250826203, - "grad_norm": 744.896240234375, - "learning_rate": 4.396e-05, - "loss": 186.8744, + "epoch": 0.04440099063902681, + "grad_norm": 660.0758056640625, + "learning_rate": 2.198e-05, + "loss": 307.6868, "step": 10990 }, { - "epoch": 0.08888242471254616, - "grad_norm": 745.9660034179688, - "learning_rate": 4.4000000000000006e-05, - "loss": 160.1633, + "epoch": 0.044441391904394445, + "grad_norm": 1388.6563720703125, + "learning_rate": 2.2000000000000003e-05, + "loss": 191.1774, "step": 11000 }, { - "epoch": 0.08896322691683028, - "grad_norm": 1471.337646484375, - "learning_rate": 4.4040000000000005e-05, - "loss": 192.2696, + "epoch": 0.04448179316976208, + "grad_norm": 0.0, + "learning_rate": 2.2020000000000003e-05, + "loss": 182.281, "step": 11010 }, { - "epoch": 0.08904402912111442, - "grad_norm": 1103.0841064453125, - "learning_rate": 4.4080000000000005e-05, - "loss": 176.2664, + "epoch": 0.04452219443512971, + "grad_norm": 1133.305419921875, + "learning_rate": 2.2040000000000002e-05, + "loss": 141.6396, "step": 11020 }, { - "epoch": 0.08912483132539856, - "grad_norm": 1671.141357421875, - "learning_rate": 4.412e-05, - "loss": 167.9981, + "epoch": 0.04456259570049734, + "grad_norm": 760.71728515625, + "learning_rate": 2.206e-05, + "loss": 184.8224, "step": 11030 }, { - "epoch": 0.08920563352968269, - "grad_norm": 3819.29931640625, - "learning_rate": 4.4160000000000004e-05, - "loss": 165.1254, + "epoch": 0.04460299696586497, + "grad_norm": 700.0611572265625, + "learning_rate": 2.2080000000000002e-05, + "loss": 174.1201, "step": 11040 }, { - "epoch": 0.08928643573396683, - "grad_norm": 1165.341796875, - "learning_rate": 4.4200000000000004e-05, - "loss": 208.1041, + "epoch": 0.0446433982312326, + "grad_norm": 464.8121643066406, + "learning_rate": 2.2100000000000002e-05, + "loss": 147.1738, "step": 11050 }, { - "epoch": 0.08936723793825095, - "grad_norm": 1322.56982421875, - "learning_rate": 4.424e-05, - "loss": 174.6135, + "epoch": 0.044683799496600236, + "grad_norm": 659.0347900390625, + "learning_rate": 2.212e-05, + "loss": 121.4146, "step": 11060 }, { - "epoch": 0.08944804014253509, - "grad_norm": 1034.9105224609375, - "learning_rate": 4.428e-05, - "loss": 171.773, + "epoch": 0.04472420076196786, + "grad_norm": 495.3475646972656, + "learning_rate": 2.214e-05, + "loss": 122.6828, "step": 11070 }, { - "epoch": 0.08952884234681922, - "grad_norm": 1145.392822265625, - "learning_rate": 4.432e-05, - "loss": 152.1384, + "epoch": 0.044764602027335496, + "grad_norm": 899.0453491210938, + "learning_rate": 2.216e-05, + "loss": 171.483, "step": 11080 }, { - "epoch": 0.08960964455110336, - "grad_norm": 820.4057006835938, - "learning_rate": 4.436e-05, - "loss": 147.7346, + "epoch": 0.04480500329270313, + "grad_norm": 1652.7977294921875, + "learning_rate": 2.218e-05, + "loss": 136.9625, "step": 11090 }, { - "epoch": 0.08969044675538748, - "grad_norm": 1907.7330322265625, - "learning_rate": 4.44e-05, - "loss": 231.0103, + "epoch": 0.04484540455807076, + "grad_norm": 1164.9923095703125, + "learning_rate": 2.22e-05, + "loss": 218.6428, "step": 11100 }, { - "epoch": 0.08977124895967162, - "grad_norm": 4071.659423828125, - "learning_rate": 4.444e-05, - "loss": 189.1313, + "epoch": 0.04488580582343839, + "grad_norm": 1860.8387451171875, + "learning_rate": 2.222e-05, + "loss": 128.8451, "step": 11110 }, { - "epoch": 0.08985205116395575, - "grad_norm": 960.520751953125, - "learning_rate": 4.448e-05, - "loss": 170.6324, + "epoch": 0.04492620708880602, + "grad_norm": 1793.767578125, + "learning_rate": 2.224e-05, + "loss": 127.518, "step": 11120 }, { - "epoch": 0.08993285336823989, - "grad_norm": 1146.0321044921875, - "learning_rate": 4.452e-05, - "loss": 223.4478, + "epoch": 0.044966608354173654, + "grad_norm": 910.3096923828125, + "learning_rate": 2.226e-05, + "loss": 102.4282, "step": 11130 }, { - "epoch": 0.09001365557252401, - "grad_norm": 537.45751953125, - "learning_rate": 4.456e-05, - "loss": 146.5862, + "epoch": 0.04500700961954129, + "grad_norm": 3313.176025390625, + "learning_rate": 2.228e-05, + "loss": 205.4728, "step": 11140 }, { - "epoch": 0.09009445777680815, - "grad_norm": 1589.918212890625, - "learning_rate": 4.46e-05, - "loss": 229.1164, + "epoch": 0.04504741088490891, + "grad_norm": 959.5852661132812, + "learning_rate": 2.23e-05, + "loss": 181.3004, "step": 11150 }, { - "epoch": 0.09017525998109228, - "grad_norm": 2013.3570556640625, - "learning_rate": 4.4640000000000006e-05, - "loss": 191.8223, + "epoch": 0.045087812150276546, + "grad_norm": 3117.2705078125, + "learning_rate": 2.2320000000000003e-05, + "loss": 159.9985, "step": 11160 }, { - "epoch": 0.09025606218537642, - "grad_norm": 1235.016357421875, - "learning_rate": 4.468e-05, - "loss": 166.435, + "epoch": 0.04512821341564418, + "grad_norm": 4501.052734375, + "learning_rate": 2.234e-05, + "loss": 150.3723, "step": 11170 }, { - "epoch": 0.09033686438966056, - "grad_norm": 1219.5076904296875, - "learning_rate": 4.472e-05, - "loss": 147.5948, + "epoch": 0.04516861468101181, + "grad_norm": 981.978515625, + "learning_rate": 2.236e-05, + "loss": 142.0348, "step": 11180 }, { - "epoch": 0.09041766659394468, - "grad_norm": 1120.6129150390625, - "learning_rate": 4.4760000000000005e-05, - "loss": 171.5583, + "epoch": 0.04520901594637944, + "grad_norm": 1109.785400390625, + "learning_rate": 2.2380000000000003e-05, + "loss": 163.3822, "step": 11190 }, { - "epoch": 0.09049846879822881, - "grad_norm": 986.9843139648438, - "learning_rate": 4.4800000000000005e-05, - "loss": 173.6144, + "epoch": 0.04524941721174707, + "grad_norm": 1274.3033447265625, + "learning_rate": 2.2400000000000002e-05, + "loss": 268.0728, "step": 11200 }, { - "epoch": 0.09057927100251295, - "grad_norm": 981.845703125, - "learning_rate": 4.4840000000000004e-05, - "loss": 156.4097, + "epoch": 0.045289818477114704, + "grad_norm": 750.956298828125, + "learning_rate": 2.2420000000000002e-05, + "loss": 164.3101, "step": 11210 }, { - "epoch": 0.09066007320679709, - "grad_norm": 2041.173583984375, - "learning_rate": 4.488e-05, - "loss": 150.8351, + "epoch": 0.04533021974248234, + "grad_norm": 908.2940063476562, + "learning_rate": 2.244e-05, + "loss": 205.5986, "step": 11220 }, { - "epoch": 0.09074087541108121, - "grad_norm": 2189.515380859375, - "learning_rate": 4.4920000000000004e-05, - "loss": 199.2858, + "epoch": 0.045370621007849964, + "grad_norm": 1060.370849609375, + "learning_rate": 2.2460000000000002e-05, + "loss": 152.6007, "step": 11230 }, { - "epoch": 0.09082167761536535, - "grad_norm": 810.195068359375, - "learning_rate": 4.496e-05, - "loss": 186.9851, + "epoch": 0.0454110222732176, + "grad_norm": 1556.2314453125, + "learning_rate": 2.248e-05, + "loss": 204.8753, "step": 11240 }, { - "epoch": 0.09090247981964948, - "grad_norm": 1534.787353515625, - "learning_rate": 4.5e-05, - "loss": 152.5892, + "epoch": 0.04545142353858523, + "grad_norm": 683.9859619140625, + "learning_rate": 2.25e-05, + "loss": 142.627, "step": 11250 }, { - "epoch": 0.09098328202393362, - "grad_norm": 1222.702392578125, - "learning_rate": 4.504e-05, - "loss": 147.9779, + "epoch": 0.04549182480395286, + "grad_norm": 1313.3026123046875, + "learning_rate": 2.252e-05, + "loss": 183.1745, "step": 11260 }, { - "epoch": 0.09106408422821774, - "grad_norm": 2411.489501953125, - "learning_rate": 4.508e-05, - "loss": 188.0063, + "epoch": 0.04553222606932049, + "grad_norm": 1381.1295166015625, + "learning_rate": 2.254e-05, + "loss": 175.9728, "step": 11270 }, { - "epoch": 0.09114488643250188, - "grad_norm": 1780.130615234375, - "learning_rate": 4.512e-05, - "loss": 157.6954, + "epoch": 0.04557262733468812, + "grad_norm": 509.716796875, + "learning_rate": 2.256e-05, + "loss": 210.1509, "step": 11280 }, { - "epoch": 0.09122568863678601, - "grad_norm": 1758.829833984375, - "learning_rate": 4.516e-05, - "loss": 210.6379, + "epoch": 0.045613028600055755, + "grad_norm": 2492.5234375, + "learning_rate": 2.258e-05, + "loss": 231.53, "step": 11290 }, { - "epoch": 0.09130649084107015, - "grad_norm": 1227.318603515625, - "learning_rate": 4.52e-05, - "loss": 157.6064, + "epoch": 0.04565342986542339, + "grad_norm": 1633.21630859375, + "learning_rate": 2.26e-05, + "loss": 152.2005, "step": 11300 }, { - "epoch": 0.09138729304535427, - "grad_norm": 1432.8858642578125, - "learning_rate": 4.524000000000001e-05, - "loss": 163.6855, + "epoch": 0.045693831130791014, + "grad_norm": 768.939208984375, + "learning_rate": 2.2620000000000004e-05, + "loss": 185.2856, "step": 11310 }, { - "epoch": 0.0914680952496384, - "grad_norm": 624.8172607421875, - "learning_rate": 4.528e-05, - "loss": 165.9595, + "epoch": 0.04573423239615865, + "grad_norm": 799.730712890625, + "learning_rate": 2.264e-05, + "loss": 194.0909, "step": 11320 }, { - "epoch": 0.09154889745392254, - "grad_norm": 1297.4705810546875, - "learning_rate": 4.532e-05, - "loss": 173.91, + "epoch": 0.04577463366152628, + "grad_norm": 3679.43310546875, + "learning_rate": 2.266e-05, + "loss": 354.1977, "step": 11330 }, { - "epoch": 0.09162969965820668, - "grad_norm": 624.9852905273438, - "learning_rate": 4.536e-05, - "loss": 188.6223, + "epoch": 0.04581503492689391, + "grad_norm": 1576.691650390625, + "learning_rate": 2.268e-05, + "loss": 181.5113, "step": 11340 }, { - "epoch": 0.09171050186249081, - "grad_norm": 1098.534423828125, - "learning_rate": 4.5400000000000006e-05, - "loss": 169.788, + "epoch": 0.04585543619226154, + "grad_norm": 1108.6512451171875, + "learning_rate": 2.2700000000000003e-05, + "loss": 184.2569, "step": 11350 }, { - "epoch": 0.09179130406677494, - "grad_norm": 797.4472045898438, - "learning_rate": 4.5440000000000005e-05, - "loss": 156.8224, + "epoch": 0.04589583745762917, + "grad_norm": 1905.447998046875, + "learning_rate": 2.2720000000000003e-05, + "loss": 217.2384, "step": 11360 }, { - "epoch": 0.09187210627105907, - "grad_norm": 1351.994873046875, - "learning_rate": 4.548e-05, - "loss": 179.5852, + "epoch": 0.045936238722996806, + "grad_norm": 884.3139038085938, + "learning_rate": 2.274e-05, + "loss": 141.6524, "step": 11370 }, { - "epoch": 0.09195290847534321, - "grad_norm": 886.7573852539062, - "learning_rate": 4.5520000000000005e-05, - "loss": 135.6091, + "epoch": 0.04597663998836444, + "grad_norm": 617.2454223632812, + "learning_rate": 2.2760000000000002e-05, + "loss": 148.7823, "step": 11380 }, { - "epoch": 0.09203371067962735, - "grad_norm": 1108.171630859375, - "learning_rate": 4.5560000000000004e-05, - "loss": 168.4767, + "epoch": 0.046017041253732065, + "grad_norm": 551.2994384765625, + "learning_rate": 2.2780000000000002e-05, + "loss": 133.9882, "step": 11390 }, { - "epoch": 0.09211451288391147, - "grad_norm": 832.1515502929688, - "learning_rate": 4.5600000000000004e-05, - "loss": 144.3086, + "epoch": 0.0460574425190997, + "grad_norm": 700.8192138671875, + "learning_rate": 2.2800000000000002e-05, + "loss": 154.8847, "step": 11400 }, { - "epoch": 0.0921953150881956, - "grad_norm": 816.866943359375, - "learning_rate": 4.564e-05, - "loss": 186.062, + "epoch": 0.04609784378446733, + "grad_norm": 1914.3916015625, + "learning_rate": 2.282e-05, + "loss": 145.8075, "step": 11410 }, { - "epoch": 0.09227611729247974, - "grad_norm": 1261.3763427734375, - "learning_rate": 4.568e-05, - "loss": 163.5355, + "epoch": 0.046138245049834964, + "grad_norm": 2445.005859375, + "learning_rate": 2.284e-05, + "loss": 199.9154, "step": 11420 }, { - "epoch": 0.09235691949676388, - "grad_norm": 1099.1051025390625, - "learning_rate": 4.572e-05, - "loss": 164.8973, + "epoch": 0.04617864631520259, + "grad_norm": 1541.644775390625, + "learning_rate": 2.286e-05, + "loss": 168.6186, "step": 11430 }, { - "epoch": 0.092437721701048, - "grad_norm": 827.2234497070312, - "learning_rate": 4.576e-05, - "loss": 136.455, + "epoch": 0.04621904758057022, + "grad_norm": 816.3759765625, + "learning_rate": 2.288e-05, + "loss": 181.8408, "step": 11440 }, { - "epoch": 0.09251852390533213, - "grad_norm": 952.1354370117188, - "learning_rate": 4.58e-05, - "loss": 178.8885, + "epoch": 0.046259448845937856, + "grad_norm": 1267.0755615234375, + "learning_rate": 2.29e-05, + "loss": 172.8114, "step": 11450 }, { - "epoch": 0.09259932610961627, - "grad_norm": 1530.2086181640625, - "learning_rate": 4.584e-05, - "loss": 197.7136, + "epoch": 0.04629985011130549, + "grad_norm": 855.4586791992188, + "learning_rate": 2.292e-05, + "loss": 197.7767, "step": 11460 }, { - "epoch": 0.0926801283139004, - "grad_norm": 1051.4169921875, - "learning_rate": 4.588e-05, - "loss": 176.7224, + "epoch": 0.046340251376673115, + "grad_norm": 1094.00634765625, + "learning_rate": 2.294e-05, + "loss": 171.698, "step": 11470 }, { - "epoch": 0.09276093051818454, - "grad_norm": 1054.561279296875, - "learning_rate": 4.592e-05, - "loss": 160.7726, + "epoch": 0.04638065264204075, + "grad_norm": 1205.0, + "learning_rate": 2.296e-05, + "loss": 215.9526, "step": 11480 }, { - "epoch": 0.09284173272246866, - "grad_norm": 1013.0098876953125, - "learning_rate": 4.596e-05, - "loss": 160.4522, + "epoch": 0.04642105390740838, + "grad_norm": 1176.0413818359375, + "learning_rate": 2.298e-05, + "loss": 163.7831, "step": 11490 }, { - "epoch": 0.0929225349267528, - "grad_norm": 2939.901611328125, - "learning_rate": 4.600000000000001e-05, - "loss": 153.9102, + "epoch": 0.046461455172776014, + "grad_norm": 786.1920776367188, + "learning_rate": 2.3000000000000003e-05, + "loss": 167.4065, "step": 11500 }, { - "epoch": 0.09300333713103694, - "grad_norm": 981.0615844726562, - "learning_rate": 4.604e-05, - "loss": 118.6744, + "epoch": 0.04650185643814364, + "grad_norm": 516.4776000976562, + "learning_rate": 2.302e-05, + "loss": 165.6811, "step": 11510 }, { - "epoch": 0.09308413933532107, - "grad_norm": 1067.1905517578125, - "learning_rate": 4.608e-05, - "loss": 181.1808, + "epoch": 0.046542257703511274, + "grad_norm": 396.16497802734375, + "learning_rate": 2.304e-05, + "loss": 168.2221, "step": 11520 }, { - "epoch": 0.0931649415396052, - "grad_norm": 819.0405883789062, - "learning_rate": 4.612e-05, - "loss": 305.4522, + "epoch": 0.04658265896887891, + "grad_norm": 882.178466796875, + "learning_rate": 2.306e-05, + "loss": 140.923, "step": 11530 }, { - "epoch": 0.09324574374388933, - "grad_norm": 1146.8128662109375, - "learning_rate": 4.6160000000000005e-05, - "loss": 175.5782, + "epoch": 0.04662306023424654, + "grad_norm": 934.631591796875, + "learning_rate": 2.3080000000000003e-05, + "loss": 186.3469, "step": 11540 }, { - "epoch": 0.09332654594817347, - "grad_norm": 710.7339477539062, - "learning_rate": 4.6200000000000005e-05, - "loss": 173.2434, + "epoch": 0.046663461499614166, + "grad_norm": 1052.7349853515625, + "learning_rate": 2.3100000000000002e-05, + "loss": 201.8761, "step": 11550 }, { - "epoch": 0.0934073481524576, - "grad_norm": 675.75537109375, - "learning_rate": 4.624e-05, - "loss": 154.4693, + "epoch": 0.0467038627649818, + "grad_norm": 1325.5751953125, + "learning_rate": 2.312e-05, + "loss": 255.0451, "step": 11560 }, { - "epoch": 0.09348815035674173, - "grad_norm": 662.1353149414062, - "learning_rate": 4.6280000000000004e-05, - "loss": 157.1887, + "epoch": 0.04674426403034943, + "grad_norm": 827.1654663085938, + "learning_rate": 2.3140000000000002e-05, + "loss": 162.8754, "step": 11570 }, { - "epoch": 0.09356895256102586, - "grad_norm": 1741.1334228515625, - "learning_rate": 4.6320000000000004e-05, - "loss": 204.9533, + "epoch": 0.046784665295717065, + "grad_norm": 506.81097412109375, + "learning_rate": 2.3160000000000002e-05, + "loss": 99.88, "step": 11580 }, { - "epoch": 0.09364975476531, - "grad_norm": 1301.878662109375, - "learning_rate": 4.636e-05, - "loss": 164.3773, + "epoch": 0.04682506656108469, + "grad_norm": 1403.5689697265625, + "learning_rate": 2.318e-05, + "loss": 157.3405, "step": 11590 }, { - "epoch": 0.09373055696959413, - "grad_norm": 2257.124267578125, - "learning_rate": 4.64e-05, - "loss": 196.3772, + "epoch": 0.046865467826452324, + "grad_norm": 1264.109375, + "learning_rate": 2.32e-05, + "loss": 135.8957, "step": 11600 }, { - "epoch": 0.09381135917387827, - "grad_norm": 867.2877807617188, - "learning_rate": 4.644e-05, - "loss": 184.0841, + "epoch": 0.04690586909181996, + "grad_norm": 851.1090698242188, + "learning_rate": 2.322e-05, + "loss": 98.8174, "step": 11610 }, { - "epoch": 0.09389216137816239, - "grad_norm": 656.1356811523438, - "learning_rate": 4.648e-05, - "loss": 198.8547, + "epoch": 0.04694627035718759, + "grad_norm": 1114.6529541015625, + "learning_rate": 2.324e-05, + "loss": 174.8662, "step": 11620 }, { - "epoch": 0.09397296358244653, - "grad_norm": 730.0324096679688, - "learning_rate": 4.652e-05, - "loss": 121.4998, + "epoch": 0.046986671622555216, + "grad_norm": 997.0178833007812, + "learning_rate": 2.326e-05, + "loss": 202.974, "step": 11630 }, { - "epoch": 0.09405376578673066, - "grad_norm": 957.2469482421875, - "learning_rate": 4.656e-05, - "loss": 145.3453, + "epoch": 0.04702707288792285, + "grad_norm": 2686.275146484375, + "learning_rate": 2.328e-05, + "loss": 180.5854, "step": 11640 }, { - "epoch": 0.0941345679910148, - "grad_norm": 859.6589965820312, - "learning_rate": 4.660000000000001e-05, - "loss": 128.9684, + "epoch": 0.04706747415329048, + "grad_norm": 1341.779541015625, + "learning_rate": 2.3300000000000004e-05, + "loss": 188.3465, "step": 11650 }, { - "epoch": 0.09421537019529892, - "grad_norm": 1499.4656982421875, - "learning_rate": 4.664e-05, - "loss": 148.427, + "epoch": 0.047107875418658116, + "grad_norm": 1174.360595703125, + "learning_rate": 2.332e-05, + "loss": 100.4389, "step": 11660 }, { - "epoch": 0.09429617239958306, - "grad_norm": 879.8517456054688, - "learning_rate": 4.668e-05, - "loss": 155.101, + "epoch": 0.04714827668402574, + "grad_norm": 485.88134765625, + "learning_rate": 2.334e-05, + "loss": 84.7096, "step": 11670 }, { - "epoch": 0.0943769746038672, - "grad_norm": 1098.4874267578125, - "learning_rate": 4.672e-05, - "loss": 132.9569, + "epoch": 0.047188677949393375, + "grad_norm": 700.1830444335938, + "learning_rate": 2.336e-05, + "loss": 252.0006, "step": 11680 }, { - "epoch": 0.09445777680815133, - "grad_norm": 2264.105712890625, - "learning_rate": 4.6760000000000006e-05, - "loss": 150.9695, + "epoch": 0.04722907921476101, + "grad_norm": 1874.8160400390625, + "learning_rate": 2.3380000000000003e-05, + "loss": 271.8778, "step": 11690 }, { - "epoch": 0.09453857901243545, - "grad_norm": 1145.921630859375, - "learning_rate": 4.6800000000000006e-05, - "loss": 191.5021, + "epoch": 0.04726948048012864, + "grad_norm": 780.0234985351562, + "learning_rate": 2.3400000000000003e-05, + "loss": 156.8313, "step": 11700 }, { - "epoch": 0.09461938121671959, - "grad_norm": 810.2247924804688, - "learning_rate": 4.684e-05, - "loss": 149.1247, + "epoch": 0.04730988174549627, + "grad_norm": 1217.7091064453125, + "learning_rate": 2.342e-05, + "loss": 139.3272, "step": 11710 }, { - "epoch": 0.09470018342100373, - "grad_norm": 1204.9842529296875, - "learning_rate": 4.688e-05, - "loss": 133.8571, + "epoch": 0.0473502830108639, + "grad_norm": 1002.0595092773438, + "learning_rate": 2.344e-05, + "loss": 155.965, "step": 11720 }, { - "epoch": 0.09478098562528786, - "grad_norm": 737.8526000976562, - "learning_rate": 4.6920000000000005e-05, - "loss": 139.7323, + "epoch": 0.04739068427623153, + "grad_norm": 597.17529296875, + "learning_rate": 2.3460000000000002e-05, + "loss": 172.1847, "step": 11730 }, { - "epoch": 0.094861787829572, - "grad_norm": 1930.502685546875, - "learning_rate": 4.6960000000000004e-05, - "loss": 124.6747, + "epoch": 0.047431085541599166, + "grad_norm": 910.2625732421875, + "learning_rate": 2.3480000000000002e-05, + "loss": 148.3666, "step": 11740 }, { - "epoch": 0.09494259003385612, - "grad_norm": 1270.105224609375, - "learning_rate": 4.7e-05, - "loss": 195.0569, + "epoch": 0.04747148680696679, + "grad_norm": 1215.2615966796875, + "learning_rate": 2.35e-05, + "loss": 194.8361, "step": 11750 }, { - "epoch": 0.09502339223814026, - "grad_norm": 1404.0224609375, - "learning_rate": 4.7040000000000004e-05, - "loss": 166.4848, + "epoch": 0.047511888072334425, + "grad_norm": 762.500244140625, + "learning_rate": 2.3520000000000002e-05, + "loss": 113.7633, "step": 11760 }, { - "epoch": 0.09510419444242439, - "grad_norm": 781.730712890625, - "learning_rate": 4.708e-05, - "loss": 141.6356, + "epoch": 0.04755228933770206, + "grad_norm": 3147.351318359375, + "learning_rate": 2.354e-05, + "loss": 194.7475, "step": 11770 }, { - "epoch": 0.09518499664670853, - "grad_norm": 1125.01904296875, - "learning_rate": 4.712e-05, - "loss": 174.6155, + "epoch": 0.04759269060306969, + "grad_norm": 1164.023681640625, + "learning_rate": 2.356e-05, + "loss": 108.1381, "step": 11780 }, { - "epoch": 0.09526579885099265, - "grad_norm": 765.6973876953125, - "learning_rate": 4.716e-05, - "loss": 158.3404, + "epoch": 0.04763309186843732, + "grad_norm": 2093.1826171875, + "learning_rate": 2.358e-05, + "loss": 165.2831, "step": 11790 }, { - "epoch": 0.09534660105527679, - "grad_norm": 1408.74560546875, - "learning_rate": 4.72e-05, - "loss": 140.0329, + "epoch": 0.04767349313380495, + "grad_norm": 628.909423828125, + "learning_rate": 2.36e-05, + "loss": 172.9029, "step": 11800 }, { - "epoch": 0.09542740325956092, - "grad_norm": 1072.8111572265625, - "learning_rate": 4.724e-05, - "loss": 194.64, + "epoch": 0.047713894399172584, + "grad_norm": 400.00811767578125, + "learning_rate": 2.362e-05, + "loss": 181.3741, "step": 11810 }, { - "epoch": 0.09550820546384506, - "grad_norm": 731.4005126953125, - "learning_rate": 4.728e-05, - "loss": 198.574, + "epoch": 0.04775429566454022, + "grad_norm": 1536.272216796875, + "learning_rate": 2.364e-05, + "loss": 154.7509, "step": 11820 }, { - "epoch": 0.09558900766812918, - "grad_norm": 790.837158203125, - "learning_rate": 4.732e-05, - "loss": 226.6698, + "epoch": 0.04779469692990784, + "grad_norm": 1407.3763427734375, + "learning_rate": 2.366e-05, + "loss": 174.5441, "step": 11830 }, { - "epoch": 0.09566980987241332, - "grad_norm": 1121.7171630859375, - "learning_rate": 4.736000000000001e-05, - "loss": 178.3607, + "epoch": 0.047835098195275476, + "grad_norm": 726.475830078125, + "learning_rate": 2.3680000000000004e-05, + "loss": 171.3695, "step": 11840 }, { - "epoch": 0.09575061207669745, - "grad_norm": 1308.0308837890625, - "learning_rate": 4.74e-05, - "loss": 187.4262, + "epoch": 0.04787549946064311, + "grad_norm": 1060.8369140625, + "learning_rate": 2.37e-05, + "loss": 150.6572, "step": 11850 }, { - "epoch": 0.09583141428098159, - "grad_norm": 1256.408447265625, - "learning_rate": 4.744e-05, - "loss": 254.674, + "epoch": 0.04791590072601074, + "grad_norm": 2245.559814453125, + "learning_rate": 2.372e-05, + "loss": 172.8109, "step": 11860 }, { - "epoch": 0.09591221648526572, - "grad_norm": 1465.269775390625, - "learning_rate": 4.748e-05, - "loss": 210.7811, + "epoch": 0.04795630199137837, + "grad_norm": 2129.251708984375, + "learning_rate": 2.374e-05, + "loss": 191.5867, "step": 11870 }, { - "epoch": 0.09599301868954985, - "grad_norm": 1035.5107421875, - "learning_rate": 4.7520000000000006e-05, - "loss": 155.1348, + "epoch": 0.047996703256746, + "grad_norm": 1103.0263671875, + "learning_rate": 2.3760000000000003e-05, + "loss": 213.8663, "step": 11880 }, { - "epoch": 0.09607382089383398, - "grad_norm": 847.0551147460938, - "learning_rate": 4.7560000000000005e-05, - "loss": 167.0793, + "epoch": 0.048037104522113634, + "grad_norm": 1063.9124755859375, + "learning_rate": 2.3780000000000003e-05, + "loss": 194.2018, "step": 11890 }, { - "epoch": 0.09615462309811812, - "grad_norm": 1275.8096923828125, - "learning_rate": 4.76e-05, - "loss": 233.6509, + "epoch": 0.04807750578748127, + "grad_norm": 7122.65185546875, + "learning_rate": 2.38e-05, + "loss": 229.5696, "step": 11900 }, { - "epoch": 0.09623542530240226, - "grad_norm": 1621.178466796875, - "learning_rate": 4.7640000000000005e-05, - "loss": 145.1244, + "epoch": 0.04811790705284889, + "grad_norm": 1017.9201049804688, + "learning_rate": 2.3820000000000002e-05, + "loss": 146.0323, "step": 11910 }, { - "epoch": 0.09631622750668638, - "grad_norm": 1200.5909423828125, - "learning_rate": 4.7680000000000004e-05, - "loss": 196.8802, + "epoch": 0.048158308318216526, + "grad_norm": 1932.71435546875, + "learning_rate": 2.3840000000000002e-05, + "loss": 143.6282, "step": 11920 }, { - "epoch": 0.09639702971097051, - "grad_norm": 3169.34130859375, - "learning_rate": 4.7720000000000004e-05, - "loss": 139.0356, + "epoch": 0.04819870958358416, + "grad_norm": 753.262939453125, + "learning_rate": 2.3860000000000002e-05, + "loss": 151.7239, "step": 11930 }, { - "epoch": 0.09647783191525465, - "grad_norm": 730.8500366210938, - "learning_rate": 4.7760000000000004e-05, - "loss": 136.013, + "epoch": 0.04823911084895179, + "grad_norm": 1837.5714111328125, + "learning_rate": 2.3880000000000002e-05, + "loss": 192.4802, "step": 11940 }, { - "epoch": 0.09655863411953879, - "grad_norm": 773.2274780273438, - "learning_rate": 4.78e-05, - "loss": 189.061, + "epoch": 0.04827951211431942, + "grad_norm": 1542.8756103515625, + "learning_rate": 2.39e-05, + "loss": 239.2184, "step": 11950 }, { - "epoch": 0.09663943632382291, - "grad_norm": 2446.462158203125, - "learning_rate": 4.784e-05, - "loss": 173.5474, + "epoch": 0.04831991337968705, + "grad_norm": 1126.86572265625, + "learning_rate": 2.392e-05, + "loss": 132.8466, "step": 11960 }, { - "epoch": 0.09672023852810704, - "grad_norm": 1230.4921875, - "learning_rate": 4.788e-05, - "loss": 185.5946, + "epoch": 0.048360314645054685, + "grad_norm": 650.4667358398438, + "learning_rate": 2.394e-05, + "loss": 121.4513, "step": 11970 }, { - "epoch": 0.09680104073239118, - "grad_norm": 836.4797973632812, - "learning_rate": 4.792e-05, - "loss": 170.7093, + "epoch": 0.04840071591042232, + "grad_norm": 708.6920776367188, + "learning_rate": 2.396e-05, + "loss": 172.2358, "step": 11980 }, { - "epoch": 0.09688184293667532, - "grad_norm": 717.6349487304688, - "learning_rate": 4.796e-05, - "loss": 150.7234, + "epoch": 0.048441117175789944, + "grad_norm": 1389.237548828125, + "learning_rate": 2.398e-05, + "loss": 220.8339, "step": 11990 }, { - "epoch": 0.09696264514095944, - "grad_norm": 1342.440673828125, - "learning_rate": 4.8e-05, - "loss": 186.2961, + "epoch": 0.04848151844115758, + "grad_norm": 580.6951293945312, + "learning_rate": 2.4e-05, + "loss": 178.8768, "step": 12000 - }, - { - "epoch": 0.09704344734524357, - "grad_norm": 1010.5694580078125, - "learning_rate": 4.804e-05, - "loss": 207.1202, - "step": 12010 - }, - { - "epoch": 0.09712424954952771, - "grad_norm": 1768.517333984375, - "learning_rate": 4.808e-05, - "loss": 201.3005, - "step": 12020 - }, - { - "epoch": 0.09720505175381185, - "grad_norm": 1026.8671875, - "learning_rate": 4.812000000000001e-05, - "loss": 149.5696, - "step": 12030 - }, - { - "epoch": 0.09728585395809598, - "grad_norm": 1275.5589599609375, - "learning_rate": 4.816e-05, - "loss": 133.8864, - "step": 12040 - }, - { - "epoch": 0.0973666561623801, - "grad_norm": 1346.738037109375, - "learning_rate": 4.82e-05, - "loss": 152.9663, - "step": 12050 - }, - { - "epoch": 0.09744745836666424, - "grad_norm": 923.6655883789062, - "learning_rate": 4.824e-05, - "loss": 150.9784, - "step": 12060 - }, - { - "epoch": 0.09752826057094838, - "grad_norm": 3050.222900390625, - "learning_rate": 4.8280000000000005e-05, - "loss": 153.0247, - "step": 12070 - }, - { - "epoch": 0.09760906277523251, - "grad_norm": 1102.73974609375, - "learning_rate": 4.8320000000000005e-05, - "loss": 150.1411, - "step": 12080 - }, - { - "epoch": 0.09768986497951664, - "grad_norm": 1364.6285400390625, - "learning_rate": 4.836e-05, - "loss": 174.2896, - "step": 12090 - }, - { - "epoch": 0.09777066718380077, - "grad_norm": 1641.3353271484375, - "learning_rate": 4.8400000000000004e-05, - "loss": 163.0087, - "step": 12100 - }, - { - "epoch": 0.09785146938808491, - "grad_norm": 838.1264038085938, - "learning_rate": 4.8440000000000004e-05, - "loss": 183.779, - "step": 12110 - }, - { - "epoch": 0.09793227159236904, - "grad_norm": 973.3532104492188, - "learning_rate": 4.8480000000000003e-05, - "loss": 189.3351, - "step": 12120 - }, - { - "epoch": 0.09801307379665317, - "grad_norm": 1237.4892578125, - "learning_rate": 4.852e-05, - "loss": 207.5712, - "step": 12130 - }, - { - "epoch": 0.0980938760009373, - "grad_norm": 2167.91845703125, - "learning_rate": 4.856e-05, - "loss": 135.0125, - "step": 12140 - }, - { - "epoch": 0.09817467820522144, - "grad_norm": 837.57373046875, - "learning_rate": 4.86e-05, - "loss": 136.3053, - "step": 12150 - }, - { - "epoch": 0.09825548040950557, - "grad_norm": 961.7384033203125, - "learning_rate": 4.864e-05, - "loss": 166.4451, - "step": 12160 - }, - { - "epoch": 0.09833628261378971, - "grad_norm": 674.4572143554688, - "learning_rate": 4.868e-05, - "loss": 129.0381, - "step": 12170 - }, - { - "epoch": 0.09841708481807383, - "grad_norm": 1955.5008544921875, - "learning_rate": 4.872000000000001e-05, - "loss": 167.4936, - "step": 12180 - }, - { - "epoch": 0.09849788702235797, - "grad_norm": 1208.7279052734375, - "learning_rate": 4.876e-05, - "loss": 172.8202, - "step": 12190 - }, - { - "epoch": 0.0985786892266421, - "grad_norm": 799.6799926757812, - "learning_rate": 4.88e-05, - "loss": 186.0433, - "step": 12200 - }, - { - "epoch": 0.09865949143092624, - "grad_norm": 985.7881469726562, - "learning_rate": 4.884e-05, - "loss": 222.6292, - "step": 12210 - }, - { - "epoch": 0.09874029363521036, - "grad_norm": 1215.9290771484375, - "learning_rate": 4.8880000000000006e-05, - "loss": 173.9973, - "step": 12220 - }, - { - "epoch": 0.0988210958394945, - "grad_norm": 1102.7919921875, - "learning_rate": 4.8920000000000006e-05, - "loss": 187.2067, - "step": 12230 - }, - { - "epoch": 0.09890189804377864, - "grad_norm": 1129.8570556640625, - "learning_rate": 4.896e-05, - "loss": 146.3462, - "step": 12240 - }, - { - "epoch": 0.09898270024806277, - "grad_norm": 1339.725341796875, - "learning_rate": 4.9e-05, - "loss": 228.0844, - "step": 12250 - }, - { - "epoch": 0.0990635024523469, - "grad_norm": 2263.031494140625, - "learning_rate": 4.9040000000000005e-05, - "loss": 163.0763, - "step": 12260 - }, - { - "epoch": 0.09914430465663103, - "grad_norm": 845.9645385742188, - "learning_rate": 4.9080000000000004e-05, - "loss": 167.7114, - "step": 12270 - }, - { - "epoch": 0.09922510686091517, - "grad_norm": 938.7130126953125, - "learning_rate": 4.9120000000000004e-05, - "loss": 151.6433, - "step": 12280 - }, - { - "epoch": 0.0993059090651993, - "grad_norm": 758.6414794921875, - "learning_rate": 4.9160000000000004e-05, - "loss": 182.8119, - "step": 12290 - }, - { - "epoch": 0.09938671126948344, - "grad_norm": 2226.031005859375, - "learning_rate": 4.92e-05, - "loss": 216.8831, - "step": 12300 - }, - { - "epoch": 0.09946751347376756, - "grad_norm": 1011.7924194335938, - "learning_rate": 4.924e-05, - "loss": 123.0676, - "step": 12310 - }, - { - "epoch": 0.0995483156780517, - "grad_norm": 1064.193115234375, - "learning_rate": 4.928e-05, - "loss": 138.8443, - "step": 12320 - }, - { - "epoch": 0.09962911788233583, - "grad_norm": 1430.9873046875, - "learning_rate": 4.932e-05, - "loss": 144.371, - "step": 12330 - }, - { - "epoch": 0.09970992008661997, - "grad_norm": 1438.7493896484375, - "learning_rate": 4.936e-05, - "loss": 165.0729, - "step": 12340 - }, - { - "epoch": 0.09979072229090409, - "grad_norm": 863.255126953125, - "learning_rate": 4.94e-05, - "loss": 160.2718, - "step": 12350 - }, - { - "epoch": 0.09987152449518823, - "grad_norm": 1316.8582763671875, - "learning_rate": 4.944e-05, - "loss": 181.4045, - "step": 12360 - }, - { - "epoch": 0.09995232669947236, - "grad_norm": 939.5771484375, - "learning_rate": 4.948000000000001e-05, - "loss": 160.5194, - "step": 12370 - }, - { - "epoch": 0.1000331289037565, - "grad_norm": 690.865478515625, - "learning_rate": 4.952e-05, - "loss": 186.1665, - "step": 12380 - }, - { - "epoch": 0.10011393110804062, - "grad_norm": 1229.1156005859375, - "learning_rate": 4.956e-05, - "loss": 122.7564, - "step": 12390 - }, - { - "epoch": 0.10019473331232476, - "grad_norm": 1259.888427734375, - "learning_rate": 4.96e-05, - "loss": 209.4278, - "step": 12400 - }, - { - "epoch": 0.1002755355166089, - "grad_norm": 1434.8515625, - "learning_rate": 4.9640000000000006e-05, - "loss": 185.9033, - "step": 12410 - }, - { - "epoch": 0.10035633772089303, - "grad_norm": 2604.31982421875, - "learning_rate": 4.9680000000000005e-05, - "loss": 237.6686, - "step": 12420 - }, - { - "epoch": 0.10043713992517717, - "grad_norm": 1235.104248046875, - "learning_rate": 4.972e-05, - "loss": 172.7765, - "step": 12430 - }, - { - "epoch": 0.10051794212946129, - "grad_norm": 1696.4671630859375, - "learning_rate": 4.976e-05, - "loss": 190.3481, - "step": 12440 - }, - { - "epoch": 0.10059874433374542, - "grad_norm": 794.130859375, - "learning_rate": 4.9800000000000004e-05, - "loss": 149.6749, - "step": 12450 - }, - { - "epoch": 0.10067954653802956, - "grad_norm": 815.9335327148438, - "learning_rate": 4.9840000000000004e-05, - "loss": 160.8904, - "step": 12460 - }, - { - "epoch": 0.1007603487423137, - "grad_norm": 1460.5875244140625, - "learning_rate": 4.9880000000000004e-05, - "loss": 148.0341, - "step": 12470 - }, - { - "epoch": 0.10084115094659782, - "grad_norm": 1123.116943359375, - "learning_rate": 4.992e-05, - "loss": 140.76, - "step": 12480 - }, - { - "epoch": 0.10092195315088195, - "grad_norm": 1400.1688232421875, - "learning_rate": 4.996e-05, - "loss": 173.8594, - "step": 12490 - }, - { - "epoch": 0.10100275535516609, - "grad_norm": 739.1624145507812, - "learning_rate": 5e-05, - "loss": 166.7017, - "step": 12500 - }, - { - "epoch": 0.10108355755945023, - "grad_norm": 2877.39208984375, - "learning_rate": 4.999999902522426e-05, - "loss": 266.3766, - "step": 12510 - }, - { - "epoch": 0.10116435976373435, - "grad_norm": 1436.8443603515625, - "learning_rate": 4.9999996100897126e-05, - "loss": 126.9785, - "step": 12520 - }, - { - "epoch": 0.10124516196801848, - "grad_norm": 1014.07763671875, - "learning_rate": 4.999999122701883e-05, - "loss": 173.9883, - "step": 12530 - }, - { - "epoch": 0.10132596417230262, - "grad_norm": 672.8335571289062, - "learning_rate": 4.999998440358973e-05, - "loss": 152.0292, - "step": 12540 - }, - { - "epoch": 0.10140676637658676, - "grad_norm": 1747.57568359375, - "learning_rate": 4.999997563061038e-05, - "loss": 170.5477, - "step": 12550 - }, - { - "epoch": 0.10148756858087088, - "grad_norm": 1307.4234619140625, - "learning_rate": 4.9999964908081455e-05, - "loss": 173.7452, - "step": 12560 - }, - { - "epoch": 0.10156837078515502, - "grad_norm": 1389.5120849609375, - "learning_rate": 4.999995223600379e-05, - "loss": 195.8977, - "step": 12570 - }, - { - "epoch": 0.10164917298943915, - "grad_norm": 1296.297607421875, - "learning_rate": 4.999993761437838e-05, - "loss": 199.3999, - "step": 12580 - }, - { - "epoch": 0.10172997519372329, - "grad_norm": 1167.2091064453125, - "learning_rate": 4.999992104320636e-05, - "loss": 177.1517, - "step": 12590 - }, - { - "epoch": 0.10181077739800742, - "grad_norm": 866.978759765625, - "learning_rate": 4.9999902522489015e-05, - "loss": 171.3453, - "step": 12600 - }, - { - "epoch": 0.10189157960229155, - "grad_norm": 664.741455078125, - "learning_rate": 4.999988205222781e-05, - "loss": 137.3964, - "step": 12610 - }, - { - "epoch": 0.10197238180657568, - "grad_norm": 4070.291748046875, - "learning_rate": 4.999985963242432e-05, - "loss": 207.3841, - "step": 12620 - }, - { - "epoch": 0.10205318401085982, - "grad_norm": 1080.383544921875, - "learning_rate": 4.99998352630803e-05, - "loss": 161.8314, - "step": 12630 - }, - { - "epoch": 0.10213398621514395, - "grad_norm": 859.8750610351562, - "learning_rate": 4.9999808944197666e-05, - "loss": 168.428, - "step": 12640 - }, - { - "epoch": 0.10221478841942808, - "grad_norm": 825.5784301757812, - "learning_rate": 4.999978067577844e-05, - "loss": 137.4553, - "step": 12650 - }, - { - "epoch": 0.10229559062371221, - "grad_norm": 1104.8963623046875, - "learning_rate": 4.999975045782486e-05, - "loss": 170.8137, - "step": 12660 - }, - { - "epoch": 0.10237639282799635, - "grad_norm": 1366.6217041015625, - "learning_rate": 4.9999718290339256e-05, - "loss": 196.1074, - "step": 12670 - }, - { - "epoch": 0.10245719503228048, - "grad_norm": 1153.875, - "learning_rate": 4.999968417332415e-05, - "loss": 157.5345, - "step": 12680 - }, - { - "epoch": 0.10253799723656461, - "grad_norm": 1161.9166259765625, - "learning_rate": 4.999964810678219e-05, - "loss": 208.5221, - "step": 12690 - }, - { - "epoch": 0.10261879944084874, - "grad_norm": 1163.0301513671875, - "learning_rate": 4.999961009071621e-05, - "loss": 182.4954, - "step": 12700 - }, - { - "epoch": 0.10269960164513288, - "grad_norm": 1738.5555419921875, - "learning_rate": 4.999957012512916e-05, - "loss": 159.2989, - "step": 12710 - }, - { - "epoch": 0.10278040384941702, - "grad_norm": 1262.410400390625, - "learning_rate": 4.999952821002415e-05, - "loss": 156.2391, - "step": 12720 - }, - { - "epoch": 0.10286120605370115, - "grad_norm": 1261.878662109375, - "learning_rate": 4.999948434540446e-05, - "loss": 220.3659, - "step": 12730 - }, - { - "epoch": 0.10294200825798527, - "grad_norm": 1671.6888427734375, - "learning_rate": 4.999943853127351e-05, - "loss": 168.3365, - "step": 12740 - }, - { - "epoch": 0.10302281046226941, - "grad_norm": 1700.01806640625, - "learning_rate": 4.999939076763487e-05, - "loss": 163.4017, - "step": 12750 - }, - { - "epoch": 0.10310361266655355, - "grad_norm": 939.8884887695312, - "learning_rate": 4.9999341054492265e-05, - "loss": 158.5266, - "step": 12760 - }, - { - "epoch": 0.10318441487083768, - "grad_norm": 1419.837890625, - "learning_rate": 4.999928939184958e-05, - "loss": 220.1496, - "step": 12770 - }, - { - "epoch": 0.1032652170751218, - "grad_norm": 1171.638427734375, - "learning_rate": 4.9999235779710826e-05, - "loss": 193.891, - "step": 12780 - }, - { - "epoch": 0.10334601927940594, - "grad_norm": 651.5762329101562, - "learning_rate": 4.999918021808019e-05, - "loss": 131.8267, - "step": 12790 - }, - { - "epoch": 0.10342682148369008, - "grad_norm": 692.036865234375, - "learning_rate": 4.999912270696202e-05, - "loss": 169.2473, - "step": 12800 - }, - { - "epoch": 0.10350762368797421, - "grad_norm": 1337.3287353515625, - "learning_rate": 4.9999063246360786e-05, - "loss": 141.8685, - "step": 12810 - }, - { - "epoch": 0.10358842589225833, - "grad_norm": 1478.6639404296875, - "learning_rate": 4.999900183628112e-05, - "loss": 196.864, - "step": 12820 - }, - { - "epoch": 0.10366922809654247, - "grad_norm": 1000.7589111328125, - "learning_rate": 4.9998938476727826e-05, - "loss": 188.9481, - "step": 12830 - }, - { - "epoch": 0.1037500303008266, - "grad_norm": 1054.9835205078125, - "learning_rate": 4.999887316770584e-05, - "loss": 130.5882, - "step": 12840 - }, - { - "epoch": 0.10383083250511074, - "grad_norm": 2553.80712890625, - "learning_rate": 4.999880590922025e-05, - "loss": 187.1475, - "step": 12850 - }, - { - "epoch": 0.10391163470939488, - "grad_norm": 937.0132446289062, - "learning_rate": 4.9998736701276295e-05, - "loss": 162.3992, - "step": 12860 - }, - { - "epoch": 0.103992436913679, - "grad_norm": 791.8550415039062, - "learning_rate": 4.999866554387939e-05, - "loss": 197.168, - "step": 12870 - }, - { - "epoch": 0.10407323911796314, - "grad_norm": 3654.48486328125, - "learning_rate": 4.9998592437035076e-05, - "loss": 164.5334, - "step": 12880 - }, - { - "epoch": 0.10415404132224727, - "grad_norm": 1143.3536376953125, - "learning_rate": 4.999851738074904e-05, - "loss": 146.9005, - "step": 12890 - }, - { - "epoch": 0.10423484352653141, - "grad_norm": 1063.0355224609375, - "learning_rate": 4.9998440375027166e-05, - "loss": 198.5039, - "step": 12900 - }, - { - "epoch": 0.10431564573081553, - "grad_norm": 874.8700561523438, - "learning_rate": 4.999836141987543e-05, - "loss": 172.7148, - "step": 12910 - }, - { - "epoch": 0.10439644793509967, - "grad_norm": 908.2416381835938, - "learning_rate": 4.99982805153e-05, - "loss": 180.7337, - "step": 12920 - }, - { - "epoch": 0.1044772501393838, - "grad_norm": 1179.3070068359375, - "learning_rate": 4.999819766130719e-05, - "loss": 156.6323, - "step": 12930 - }, - { - "epoch": 0.10455805234366794, - "grad_norm": 918.0863647460938, - "learning_rate": 4.9998112857903454e-05, - "loss": 165.3982, - "step": 12940 - }, - { - "epoch": 0.10463885454795206, - "grad_norm": 1157.9603271484375, - "learning_rate": 4.9998026105095405e-05, - "loss": 191.8449, - "step": 12950 - }, - { - "epoch": 0.1047196567522362, - "grad_norm": 1094.0836181640625, - "learning_rate": 4.999793740288982e-05, - "loss": 153.5846, - "step": 12960 - }, - { - "epoch": 0.10480045895652033, - "grad_norm": 902.975830078125, - "learning_rate": 4.999784675129359e-05, - "loss": 163.8275, - "step": 12970 - }, - { - "epoch": 0.10488126116080447, - "grad_norm": 1163.962158203125, - "learning_rate": 4.9997754150313815e-05, - "loss": 179.3945, - "step": 12980 - }, - { - "epoch": 0.1049620633650886, - "grad_norm": 1341.8209228515625, - "learning_rate": 4.999765959995769e-05, - "loss": 170.5088, - "step": 12990 - }, - { - "epoch": 0.10504286556937273, - "grad_norm": 1075.1884765625, - "learning_rate": 4.999756310023261e-05, - "loss": 156.7718, - "step": 13000 - }, - { - "epoch": 0.10512366777365686, - "grad_norm": 1152.5919189453125, - "learning_rate": 4.999746465114609e-05, - "loss": 144.3317, - "step": 13010 - }, - { - "epoch": 0.105204469977941, - "grad_norm": 1482.2880859375, - "learning_rate": 4.99973642527058e-05, - "loss": 176.2166, - "step": 13020 - }, - { - "epoch": 0.10528527218222514, - "grad_norm": 1299.338623046875, - "learning_rate": 4.999726190491958e-05, - "loss": 146.185, - "step": 13030 - }, - { - "epoch": 0.10536607438650926, - "grad_norm": 1425.8626708984375, - "learning_rate": 4.999715760779541e-05, - "loss": 174.5499, - "step": 13040 - }, - { - "epoch": 0.1054468765907934, - "grad_norm": 1019.5512084960938, - "learning_rate": 4.9997051361341425e-05, - "loss": 137.8167, - "step": 13050 - }, - { - "epoch": 0.10552767879507753, - "grad_norm": 1327.304931640625, - "learning_rate": 4.9996943165565905e-05, - "loss": 121.892, - "step": 13060 - }, - { - "epoch": 0.10560848099936167, - "grad_norm": 1519.26318359375, - "learning_rate": 4.9996833020477285e-05, - "loss": 140.6848, - "step": 13070 - }, - { - "epoch": 0.10568928320364579, - "grad_norm": 1422.664794921875, - "learning_rate": 4.9996720926084164e-05, - "loss": 212.7574, - "step": 13080 - }, - { - "epoch": 0.10577008540792993, - "grad_norm": 897.9098510742188, - "learning_rate": 4.999660688239527e-05, - "loss": 154.736, - "step": 13090 - }, - { - "epoch": 0.10585088761221406, - "grad_norm": 692.3068237304688, - "learning_rate": 4.9996490889419514e-05, - "loss": 149.2785, - "step": 13100 - }, - { - "epoch": 0.1059316898164982, - "grad_norm": 1280.52880859375, - "learning_rate": 4.999637294716593e-05, - "loss": 194.0431, - "step": 13110 - }, - { - "epoch": 0.10601249202078232, - "grad_norm": 1880.19970703125, - "learning_rate": 4.999625305564371e-05, - "loss": 145.3331, - "step": 13120 - }, - { - "epoch": 0.10609329422506646, - "grad_norm": 961.3082885742188, - "learning_rate": 4.999613121486222e-05, - "loss": 105.231, - "step": 13130 - }, - { - "epoch": 0.10617409642935059, - "grad_norm": 831.8555297851562, - "learning_rate": 4.999600742483094e-05, - "loss": 151.7289, - "step": 13140 - }, - { - "epoch": 0.10625489863363473, - "grad_norm": 1600.5860595703125, - "learning_rate": 4.999588168555954e-05, - "loss": 198.1135, - "step": 13150 - }, - { - "epoch": 0.10633570083791886, - "grad_norm": 1870.4561767578125, - "learning_rate": 4.999575399705783e-05, - "loss": 124.5649, - "step": 13160 - }, - { - "epoch": 0.10641650304220299, - "grad_norm": 1433.4654541015625, - "learning_rate": 4.999562435933575e-05, - "loss": 197.3641, - "step": 13170 - }, - { - "epoch": 0.10649730524648712, - "grad_norm": 1232.9798583984375, - "learning_rate": 4.999549277240342e-05, - "loss": 203.3432, - "step": 13180 - }, - { - "epoch": 0.10657810745077126, - "grad_norm": 1231.0390625, - "learning_rate": 4.999535923627109e-05, - "loss": 166.3136, - "step": 13190 - }, - { - "epoch": 0.1066589096550554, - "grad_norm": 969.1221313476562, - "learning_rate": 4.999522375094919e-05, - "loss": 179.7614, - "step": 13200 - }, - { - "epoch": 0.10673971185933952, - "grad_norm": 1390.123046875, - "learning_rate": 4.9995086316448284e-05, - "loss": 204.0981, - "step": 13210 - }, - { - "epoch": 0.10682051406362365, - "grad_norm": 1467.5294189453125, - "learning_rate": 4.999494693277907e-05, - "loss": 173.4162, - "step": 13220 - }, - { - "epoch": 0.10690131626790779, - "grad_norm": 1082.658935546875, - "learning_rate": 4.9994805599952445e-05, - "loss": 179.5972, - "step": 13230 - }, - { - "epoch": 0.10698211847219193, - "grad_norm": 1515.619140625, - "learning_rate": 4.999466231797941e-05, - "loss": 138.5394, - "step": 13240 - }, - { - "epoch": 0.10706292067647605, - "grad_norm": 1140.68408203125, - "learning_rate": 4.999451708687114e-05, - "loss": 176.4543, - "step": 13250 - }, - { - "epoch": 0.10714372288076018, - "grad_norm": 831.0529174804688, - "learning_rate": 4.999436990663897e-05, - "loss": 150.354, - "step": 13260 - }, - { - "epoch": 0.10722452508504432, - "grad_norm": 963.3916015625, - "learning_rate": 4.9994220777294364e-05, - "loss": 156.2194, - "step": 13270 - }, - { - "epoch": 0.10730532728932846, - "grad_norm": 912.9598388671875, - "learning_rate": 4.999406969884897e-05, - "loss": 162.1478, - "step": 13280 - }, - { - "epoch": 0.10738612949361259, - "grad_norm": 675.6322631835938, - "learning_rate": 4.999391667131455e-05, - "loss": 224.8732, - "step": 13290 - }, - { - "epoch": 0.10746693169789671, - "grad_norm": 1727.82421875, - "learning_rate": 4.999376169470306e-05, - "loss": 190.2591, - "step": 13300 - }, - { - "epoch": 0.10754773390218085, - "grad_norm": 524.6329345703125, - "learning_rate": 4.999360476902656e-05, - "loss": 152.8234, - "step": 13310 - }, - { - "epoch": 0.10762853610646499, - "grad_norm": 1688.1556396484375, - "learning_rate": 4.99934458942973e-05, - "loss": 174.9487, - "step": 13320 - }, - { - "epoch": 0.10770933831074912, - "grad_norm": 1667.50439453125, - "learning_rate": 4.999328507052768e-05, - "loss": 173.4268, - "step": 13330 - }, - { - "epoch": 0.10779014051503324, - "grad_norm": 1009.3993530273438, - "learning_rate": 4.999312229773022e-05, - "loss": 147.6254, - "step": 13340 - }, - { - "epoch": 0.10787094271931738, - "grad_norm": 996.6771850585938, - "learning_rate": 4.999295757591762e-05, - "loss": 153.3868, - "step": 13350 - }, - { - "epoch": 0.10795174492360152, - "grad_norm": 7017.70166015625, - "learning_rate": 4.9992790905102734e-05, - "loss": 145.673, - "step": 13360 - }, - { - "epoch": 0.10803254712788565, - "grad_norm": 1505.3873291015625, - "learning_rate": 4.999262228529855e-05, - "loss": 192.0382, - "step": 13370 - }, - { - "epoch": 0.10811334933216978, - "grad_norm": 2117.772216796875, - "learning_rate": 4.999245171651823e-05, - "loss": 149.8352, - "step": 13380 - }, - { - "epoch": 0.10819415153645391, - "grad_norm": 1212.994140625, - "learning_rate": 4.999227919877506e-05, - "loss": 196.7278, - "step": 13390 - }, - { - "epoch": 0.10827495374073805, - "grad_norm": 1208.19921875, - "learning_rate": 4.99921047320825e-05, - "loss": 167.442, - "step": 13400 - }, - { - "epoch": 0.10835575594502218, - "grad_norm": 1247.399169921875, - "learning_rate": 4.999192831645416e-05, - "loss": 156.3466, - "step": 13410 - }, - { - "epoch": 0.10843655814930632, - "grad_norm": 963.58203125, - "learning_rate": 4.999174995190379e-05, - "loss": 168.6638, - "step": 13420 - }, - { - "epoch": 0.10851736035359044, - "grad_norm": 1341.9423828125, - "learning_rate": 4.99915696384453e-05, - "loss": 156.1889, - "step": 13430 - }, - { - "epoch": 0.10859816255787458, - "grad_norm": 845.2861938476562, - "learning_rate": 4.999138737609276e-05, - "loss": 131.8809, - "step": 13440 - }, - { - "epoch": 0.10867896476215871, - "grad_norm": 1764.8289794921875, - "learning_rate": 4.9991203164860365e-05, - "loss": 182.9894, - "step": 13450 - }, - { - "epoch": 0.10875976696644285, - "grad_norm": 861.3524780273438, - "learning_rate": 4.9991017004762496e-05, - "loss": 204.3976, - "step": 13460 - }, - { - "epoch": 0.10884056917072697, - "grad_norm": 899.0159301757812, - "learning_rate": 4.999082889581367e-05, - "loss": 162.5302, - "step": 13470 - }, - { - "epoch": 0.10892137137501111, - "grad_norm": 1054.7913818359375, - "learning_rate": 4.9990638838028546e-05, - "loss": 146.1126, - "step": 13480 - }, - { - "epoch": 0.10900217357929524, - "grad_norm": 1118.8768310546875, - "learning_rate": 4.9990446831421955e-05, - "loss": 159.5271, - "step": 13490 - }, - { - "epoch": 0.10908297578357938, - "grad_norm": 905.4559936523438, - "learning_rate": 4.999025287600886e-05, - "loss": 148.4053, - "step": 13500 - }, - { - "epoch": 0.1091637779878635, - "grad_norm": 1903.7471923828125, - "learning_rate": 4.99900569718044e-05, - "loss": 171.2639, - "step": 13510 - }, - { - "epoch": 0.10924458019214764, - "grad_norm": 3995.648193359375, - "learning_rate": 4.998985911882384e-05, - "loss": 195.9141, - "step": 13520 - }, - { - "epoch": 0.10932538239643178, - "grad_norm": 765.4593505859375, - "learning_rate": 4.998965931708261e-05, - "loss": 194.9591, - "step": 13530 - }, - { - "epoch": 0.10940618460071591, - "grad_norm": 992.2507934570312, - "learning_rate": 4.99894575665963e-05, - "loss": 176.8459, - "step": 13540 - }, - { - "epoch": 0.10948698680500005, - "grad_norm": 1436.7867431640625, - "learning_rate": 4.998925386738063e-05, - "loss": 204.7029, - "step": 13550 - }, - { - "epoch": 0.10956778900928417, - "grad_norm": 1059.3260498046875, - "learning_rate": 4.9989048219451495e-05, - "loss": 157.6819, - "step": 13560 - }, - { - "epoch": 0.1096485912135683, - "grad_norm": 1169.8514404296875, - "learning_rate": 4.998884062282492e-05, - "loss": 141.3741, - "step": 13570 - }, - { - "epoch": 0.10972939341785244, - "grad_norm": 824.392333984375, - "learning_rate": 4.998863107751711e-05, - "loss": 159.9517, - "step": 13580 - }, - { - "epoch": 0.10981019562213658, - "grad_norm": 784.0963745117188, - "learning_rate": 4.99884195835444e-05, - "loss": 171.3307, - "step": 13590 - }, - { - "epoch": 0.1098909978264207, - "grad_norm": 1065.682373046875, - "learning_rate": 4.998820614092328e-05, - "loss": 133.3055, - "step": 13600 - }, - { - "epoch": 0.10997180003070484, - "grad_norm": 2594.65283203125, - "learning_rate": 4.9987990749670395e-05, - "loss": 169.1922, - "step": 13610 - }, - { - "epoch": 0.11005260223498897, - "grad_norm": 1230.3470458984375, - "learning_rate": 4.998777340980254e-05, - "loss": 134.5084, - "step": 13620 - }, - { - "epoch": 0.11013340443927311, - "grad_norm": 1700.8367919921875, - "learning_rate": 4.9987554121336666e-05, - "loss": 140.0847, - "step": 13630 - }, - { - "epoch": 0.11021420664355723, - "grad_norm": 2070.349609375, - "learning_rate": 4.998733288428987e-05, - "loss": 135.2259, - "step": 13640 - }, - { - "epoch": 0.11029500884784137, - "grad_norm": 906.6098022460938, - "learning_rate": 4.998710969867942e-05, - "loss": 159.0804, - "step": 13650 - }, - { - "epoch": 0.1103758110521255, - "grad_norm": 671.7243041992188, - "learning_rate": 4.9986884564522696e-05, - "loss": 158.5768, - "step": 13660 - }, - { - "epoch": 0.11045661325640964, - "grad_norm": 1695.5078125, - "learning_rate": 4.9986657481837277e-05, - "loss": 192.8576, - "step": 13670 - }, - { - "epoch": 0.11053741546069376, - "grad_norm": 846.3448486328125, - "learning_rate": 4.998642845064086e-05, - "loss": 207.5042, - "step": 13680 - }, - { - "epoch": 0.1106182176649779, - "grad_norm": 1619.7523193359375, - "learning_rate": 4.998619747095129e-05, - "loss": 178.7888, - "step": 13690 - }, - { - "epoch": 0.11069901986926203, - "grad_norm": 889.7913818359375, - "learning_rate": 4.9985964542786614e-05, - "loss": 174.9721, - "step": 13700 - }, - { - "epoch": 0.11077982207354617, - "grad_norm": 3909.932373046875, - "learning_rate": 4.998572966616496e-05, - "loss": 186.7709, - "step": 13710 - }, - { - "epoch": 0.1108606242778303, - "grad_norm": 1768.4962158203125, - "learning_rate": 4.998549284110468e-05, - "loss": 152.7362, - "step": 13720 - }, - { - "epoch": 0.11094142648211443, - "grad_norm": 897.6123046875, - "learning_rate": 4.9985254067624215e-05, - "loss": 173.6773, - "step": 13730 - }, - { - "epoch": 0.11102222868639856, - "grad_norm": 1334.3804931640625, - "learning_rate": 4.99850133457422e-05, - "loss": 160.6299, - "step": 13740 - }, - { - "epoch": 0.1111030308906827, - "grad_norm": 871.2564086914062, - "learning_rate": 4.99847706754774e-05, - "loss": 171.9371, - "step": 13750 - }, - { - "epoch": 0.11118383309496684, - "grad_norm": 854.1260986328125, - "learning_rate": 4.998452605684874e-05, - "loss": 121.2178, - "step": 13760 - }, - { - "epoch": 0.11126463529925096, - "grad_norm": 2619.515380859375, - "learning_rate": 4.998427948987528e-05, - "loss": 139.2697, - "step": 13770 - }, - { - "epoch": 0.1113454375035351, - "grad_norm": 969.038330078125, - "learning_rate": 4.9984030974576285e-05, - "loss": 163.5971, - "step": 13780 - }, - { - "epoch": 0.11142623970781923, - "grad_norm": 1423.4775390625, - "learning_rate": 4.998378051097111e-05, - "loss": 161.5164, - "step": 13790 - }, - { - "epoch": 0.11150704191210337, - "grad_norm": 1076.0850830078125, - "learning_rate": 4.998352809907928e-05, - "loss": 187.8438, - "step": 13800 - }, - { - "epoch": 0.11158784411638749, - "grad_norm": 1140.2772216796875, - "learning_rate": 4.99832737389205e-05, - "loss": 195.8672, - "step": 13810 - }, - { - "epoch": 0.11166864632067162, - "grad_norm": 568.3184814453125, - "learning_rate": 4.998301743051459e-05, - "loss": 118.64, - "step": 13820 - }, - { - "epoch": 0.11174944852495576, - "grad_norm": 1318.251708984375, - "learning_rate": 4.998275917388154e-05, - "loss": 166.9598, - "step": 13830 - }, - { - "epoch": 0.1118302507292399, - "grad_norm": 1014.582275390625, - "learning_rate": 4.998249896904149e-05, - "loss": 144.1429, - "step": 13840 - }, - { - "epoch": 0.11191105293352403, - "grad_norm": 1505.1619873046875, - "learning_rate": 4.998223681601473e-05, - "loss": 154.8697, - "step": 13850 - }, - { - "epoch": 0.11199185513780816, - "grad_norm": 932.104248046875, - "learning_rate": 4.998197271482171e-05, - "loss": 120.6193, - "step": 13860 - }, - { - "epoch": 0.11207265734209229, - "grad_norm": 792.6611328125, - "learning_rate": 4.998170666548302e-05, - "loss": 165.055, - "step": 13870 - }, - { - "epoch": 0.11215345954637643, - "grad_norm": 768.7438354492188, - "learning_rate": 4.998143866801942e-05, - "loss": 152.542, - "step": 13880 - }, - { - "epoch": 0.11223426175066056, - "grad_norm": 951.4129638671875, - "learning_rate": 4.9981168722451776e-05, - "loss": 127.052, - "step": 13890 - }, - { - "epoch": 0.11231506395494469, - "grad_norm": 1439.15625, - "learning_rate": 4.998089682880117e-05, - "loss": 158.0334, - "step": 13900 - }, - { - "epoch": 0.11239586615922882, - "grad_norm": 1083.6663818359375, - "learning_rate": 4.9980622987088795e-05, - "loss": 175.9181, - "step": 13910 - }, - { - "epoch": 0.11247666836351296, - "grad_norm": 1544.906005859375, - "learning_rate": 4.9980347197336005e-05, - "loss": 137.1661, - "step": 13920 - }, - { - "epoch": 0.1125574705677971, - "grad_norm": 830.376708984375, - "learning_rate": 4.998006945956431e-05, - "loss": 188.0634, - "step": 13930 - }, - { - "epoch": 0.11263827277208122, - "grad_norm": 1686.7161865234375, - "learning_rate": 4.997978977379536e-05, - "loss": 141.7165, - "step": 13940 - }, - { - "epoch": 0.11271907497636535, - "grad_norm": 886.5191650390625, - "learning_rate": 4.997950814005098e-05, - "loss": 145.7173, - "step": 13950 - }, - { - "epoch": 0.11279987718064949, - "grad_norm": 1361.8348388671875, - "learning_rate": 4.997922455835311e-05, - "loss": 136.5869, - "step": 13960 - }, - { - "epoch": 0.11288067938493362, - "grad_norm": 1612.2431640625, - "learning_rate": 4.9978939028723894e-05, - "loss": 136.1402, - "step": 13970 - }, - { - "epoch": 0.11296148158921776, - "grad_norm": 1038.875732421875, - "learning_rate": 4.997865155118557e-05, - "loss": 182.0497, - "step": 13980 - }, - { - "epoch": 0.11304228379350188, - "grad_norm": 1513.9254150390625, - "learning_rate": 4.997836212576057e-05, - "loss": 156.0715, - "step": 13990 - }, - { - "epoch": 0.11312308599778602, - "grad_norm": 1226.705078125, - "learning_rate": 4.997807075247146e-05, - "loss": 199.7709, - "step": 14000 - }, - { - "epoch": 0.11320388820207015, - "grad_norm": 1169.3992919921875, - "learning_rate": 4.997777743134097e-05, - "loss": 145.5653, - "step": 14010 - }, - { - "epoch": 0.11328469040635429, - "grad_norm": 882.8123168945312, - "learning_rate": 4.997748216239196e-05, - "loss": 183.3331, - "step": 14020 - }, - { - "epoch": 0.11336549261063841, - "grad_norm": 765.9967651367188, - "learning_rate": 4.9977184945647473e-05, - "loss": 156.8764, - "step": 14030 - }, - { - "epoch": 0.11344629481492255, - "grad_norm": 1152.5396728515625, - "learning_rate": 4.9976885781130665e-05, - "loss": 212.5292, - "step": 14040 - }, - { - "epoch": 0.11352709701920669, - "grad_norm": 1833.1990966796875, - "learning_rate": 4.997658466886489e-05, - "loss": 226.1368, - "step": 14050 - }, - { - "epoch": 0.11360789922349082, - "grad_norm": 1181.484619140625, - "learning_rate": 4.997628160887361e-05, - "loss": 141.6887, - "step": 14060 - }, - { - "epoch": 0.11368870142777494, - "grad_norm": 709.9965209960938, - "learning_rate": 4.997597660118046e-05, - "loss": 140.9013, - "step": 14070 - }, - { - "epoch": 0.11376950363205908, - "grad_norm": 3030.4013671875, - "learning_rate": 4.9975669645809244e-05, - "loss": 157.6175, - "step": 14080 - }, - { - "epoch": 0.11385030583634322, - "grad_norm": 1184.831787109375, - "learning_rate": 4.997536074278387e-05, - "loss": 166.5892, - "step": 14090 - }, - { - "epoch": 0.11393110804062735, - "grad_norm": 796.1829223632812, - "learning_rate": 4.9975049892128455e-05, - "loss": 160.6216, - "step": 14100 - }, - { - "epoch": 0.11401191024491149, - "grad_norm": 942.1198120117188, - "learning_rate": 4.997473709386722e-05, - "loss": 126.5784, - "step": 14110 - }, - { - "epoch": 0.11409271244919561, - "grad_norm": 1137.658447265625, - "learning_rate": 4.997442234802456e-05, - "loss": 154.7953, - "step": 14120 - }, - { - "epoch": 0.11417351465347975, - "grad_norm": 1263.6761474609375, - "learning_rate": 4.9974105654625036e-05, - "loss": 134.6791, - "step": 14130 - }, - { - "epoch": 0.11425431685776388, - "grad_norm": 1910.0404052734375, - "learning_rate": 4.997378701369332e-05, - "loss": 224.972, - "step": 14140 - }, - { - "epoch": 0.11433511906204802, - "grad_norm": 825.7420654296875, - "learning_rate": 4.9973466425254286e-05, - "loss": 118.567, - "step": 14150 - }, - { - "epoch": 0.11441592126633214, - "grad_norm": 1374.5643310546875, - "learning_rate": 4.997314388933291e-05, - "loss": 190.3887, - "step": 14160 - }, - { - "epoch": 0.11449672347061628, - "grad_norm": 865.79248046875, - "learning_rate": 4.9972819405954366e-05, - "loss": 141.0639, - "step": 14170 - }, - { - "epoch": 0.11457752567490041, - "grad_norm": 863.3421630859375, - "learning_rate": 4.997249297514394e-05, - "loss": 160.1106, - "step": 14180 - }, - { - "epoch": 0.11465832787918455, - "grad_norm": 1046.8785400390625, - "learning_rate": 4.997216459692709e-05, - "loss": 155.2761, - "step": 14190 - }, - { - "epoch": 0.11473913008346867, - "grad_norm": 718.43701171875, - "learning_rate": 4.997183427132943e-05, - "loss": 154.2209, - "step": 14200 - }, - { - "epoch": 0.11481993228775281, - "grad_norm": 1266.812255859375, - "learning_rate": 4.997150199837671e-05, - "loss": 174.2161, - "step": 14210 - }, - { - "epoch": 0.11490073449203694, - "grad_norm": 1139.7977294921875, - "learning_rate": 4.9971167778094863e-05, - "loss": 153.6624, - "step": 14220 - }, - { - "epoch": 0.11498153669632108, - "grad_norm": 1293.97705078125, - "learning_rate": 4.997083161050994e-05, - "loss": 131.6517, - "step": 14230 - }, - { - "epoch": 0.1150623389006052, - "grad_norm": 1099.0384521484375, - "learning_rate": 4.997049349564814e-05, - "loss": 129.0612, - "step": 14240 - }, - { - "epoch": 0.11514314110488934, - "grad_norm": 924.1503295898438, - "learning_rate": 4.997015343353585e-05, - "loss": 197.318, - "step": 14250 - }, - { - "epoch": 0.11522394330917347, - "grad_norm": 675.231201171875, - "learning_rate": 4.996981142419959e-05, - "loss": 137.751, - "step": 14260 - }, - { - "epoch": 0.11530474551345761, - "grad_norm": 2197.999755859375, - "learning_rate": 4.996946746766601e-05, - "loss": 136.4753, - "step": 14270 - }, - { - "epoch": 0.11538554771774175, - "grad_norm": 1485.24560546875, - "learning_rate": 4.9969121563961956e-05, - "loss": 152.8822, - "step": 14280 - }, - { - "epoch": 0.11546634992202587, - "grad_norm": 806.576416015625, - "learning_rate": 4.996877371311439e-05, - "loss": 145.9381, - "step": 14290 - }, - { - "epoch": 0.11554715212631, - "grad_norm": 4764.51416015625, - "learning_rate": 4.996842391515044e-05, - "loss": 199.131, - "step": 14300 - }, - { - "epoch": 0.11562795433059414, - "grad_norm": 898.8399047851562, - "learning_rate": 4.996807217009738e-05, - "loss": 165.7666, - "step": 14310 - }, - { - "epoch": 0.11570875653487828, - "grad_norm": 829.5301513671875, - "learning_rate": 4.996771847798265e-05, - "loss": 107.2207, - "step": 14320 - }, - { - "epoch": 0.1157895587391624, - "grad_norm": 2489.66064453125, - "learning_rate": 4.996736283883382e-05, - "loss": 220.5123, - "step": 14330 - }, - { - "epoch": 0.11587036094344653, - "grad_norm": 1868.64453125, - "learning_rate": 4.9967005252678634e-05, - "loss": 194.7474, - "step": 14340 - }, - { - "epoch": 0.11595116314773067, - "grad_norm": 1988.3172607421875, - "learning_rate": 4.996664571954497e-05, - "loss": 150.1648, - "step": 14350 - }, - { - "epoch": 0.11603196535201481, - "grad_norm": 1290.5560302734375, - "learning_rate": 4.996628423946087e-05, - "loss": 176.3776, - "step": 14360 - }, - { - "epoch": 0.11611276755629893, - "grad_norm": 1115.728515625, - "learning_rate": 4.996592081245451e-05, - "loss": 162.0988, - "step": 14370 - }, - { - "epoch": 0.11619356976058307, - "grad_norm": 1063.6759033203125, - "learning_rate": 4.9965555438554254e-05, - "loss": 105.5058, - "step": 14380 - }, - { - "epoch": 0.1162743719648672, - "grad_norm": 1047.7816162109375, - "learning_rate": 4.996518811778858e-05, - "loss": 163.7689, - "step": 14390 - }, - { - "epoch": 0.11635517416915134, - "grad_norm": 1390.6163330078125, - "learning_rate": 4.9964818850186135e-05, - "loss": 191.2053, - "step": 14400 - }, - { - "epoch": 0.11643597637343547, - "grad_norm": 1005.9442138671875, - "learning_rate": 4.9964447635775714e-05, - "loss": 164.1976, - "step": 14410 - }, - { - "epoch": 0.1165167785777196, - "grad_norm": 889.4789428710938, - "learning_rate": 4.996407447458626e-05, - "loss": 151.3009, - "step": 14420 - }, - { - "epoch": 0.11659758078200373, - "grad_norm": 956.8281860351562, - "learning_rate": 4.996369936664688e-05, - "loss": 141.5787, - "step": 14430 - }, - { - "epoch": 0.11667838298628787, - "grad_norm": 885.4521484375, - "learning_rate": 4.996332231198683e-05, - "loss": 150.2247, - "step": 14440 - }, - { - "epoch": 0.116759185190572, - "grad_norm": 1397.32275390625, - "learning_rate": 4.99629433106355e-05, - "loss": 238.4367, - "step": 14450 - }, - { - "epoch": 0.11683998739485613, - "grad_norm": 1110.79931640625, - "learning_rate": 4.996256236262245e-05, - "loss": 149.2973, - "step": 14460 - }, - { - "epoch": 0.11692078959914026, - "grad_norm": 1041.5343017578125, - "learning_rate": 4.99621794679774e-05, - "loss": 131.2134, - "step": 14470 - }, - { - "epoch": 0.1170015918034244, - "grad_norm": 1128.94091796875, - "learning_rate": 4.99617946267302e-05, - "loss": 204.2253, - "step": 14480 - }, - { - "epoch": 0.11708239400770853, - "grad_norm": 3384.326904296875, - "learning_rate": 4.996140783891085e-05, - "loss": 151.8341, - "step": 14490 - }, - { - "epoch": 0.11716319621199266, - "grad_norm": 660.67626953125, - "learning_rate": 4.996101910454953e-05, - "loss": 152.4279, - "step": 14500 - }, - { - "epoch": 0.11724399841627679, - "grad_norm": 2205.68994140625, - "learning_rate": 4.996062842367654e-05, - "loss": 233.8809, - "step": 14510 - }, - { - "epoch": 0.11732480062056093, - "grad_norm": 1460.4385986328125, - "learning_rate": 4.996023579632236e-05, - "loss": 140.3975, - "step": 14520 - }, - { - "epoch": 0.11740560282484507, - "grad_norm": 697.2841796875, - "learning_rate": 4.9959841222517596e-05, - "loss": 151.8424, - "step": 14530 - }, - { - "epoch": 0.1174864050291292, - "grad_norm": 2516.592529296875, - "learning_rate": 4.995944470229302e-05, - "loss": 156.5141, - "step": 14540 - }, - { - "epoch": 0.11756720723341332, - "grad_norm": 1740.1024169921875, - "learning_rate": 4.9959046235679565e-05, - "loss": 131.6542, - "step": 14550 - }, - { - "epoch": 0.11764800943769746, - "grad_norm": 952.4142456054688, - "learning_rate": 4.9958645822708285e-05, - "loss": 142.8636, - "step": 14560 - }, - { - "epoch": 0.1177288116419816, - "grad_norm": 1014.7145385742188, - "learning_rate": 4.9958243463410414e-05, - "loss": 127.906, - "step": 14570 - }, - { - "epoch": 0.11780961384626573, - "grad_norm": 960.5801391601562, - "learning_rate": 4.995783915781734e-05, - "loss": 146.5821, - "step": 14580 - }, - { - "epoch": 0.11789041605054985, - "grad_norm": 1083.92626953125, - "learning_rate": 4.995743290596057e-05, - "loss": 271.1002, - "step": 14590 - }, - { - "epoch": 0.11797121825483399, - "grad_norm": 926.5228271484375, - "learning_rate": 4.9957024707871806e-05, - "loss": 188.0781, - "step": 14600 - }, - { - "epoch": 0.11805202045911813, - "grad_norm": 1364.3609619140625, - "learning_rate": 4.995661456358286e-05, - "loss": 172.0326, - "step": 14610 - }, - { - "epoch": 0.11813282266340226, - "grad_norm": 2353.000732421875, - "learning_rate": 4.9956202473125736e-05, - "loss": 182.2644, - "step": 14620 - }, - { - "epoch": 0.11821362486768638, - "grad_norm": 971.534423828125, - "learning_rate": 4.9955788436532545e-05, - "loss": 150.654, - "step": 14630 - }, - { - "epoch": 0.11829442707197052, - "grad_norm": 982.6012573242188, - "learning_rate": 4.99553724538356e-05, - "loss": 179.7823, - "step": 14640 - }, - { - "epoch": 0.11837522927625466, - "grad_norm": 1529.090576171875, - "learning_rate": 4.9954954525067334e-05, - "loss": 212.5037, - "step": 14650 - }, - { - "epoch": 0.11845603148053879, - "grad_norm": 817.869384765625, - "learning_rate": 4.995453465026032e-05, - "loss": 180.9261, - "step": 14660 - }, - { - "epoch": 0.11853683368482293, - "grad_norm": 1415.3538818359375, - "learning_rate": 4.995411282944732e-05, - "loss": 188.3301, - "step": 14670 - }, - { - "epoch": 0.11861763588910705, - "grad_norm": 806.4898681640625, - "learning_rate": 4.9953689062661226e-05, - "loss": 176.7523, - "step": 14680 - }, - { - "epoch": 0.11869843809339119, - "grad_norm": 1228.0869140625, - "learning_rate": 4.9953263349935074e-05, - "loss": 147.81, - "step": 14690 - }, - { - "epoch": 0.11877924029767532, - "grad_norm": 773.8961791992188, - "learning_rate": 4.995283569130207e-05, - "loss": 162.8876, - "step": 14700 - }, - { - "epoch": 0.11886004250195946, - "grad_norm": 1415.76806640625, - "learning_rate": 4.9952406086795564e-05, - "loss": 160.2567, - "step": 14710 - }, - { - "epoch": 0.11894084470624358, - "grad_norm": 1326.4412841796875, - "learning_rate": 4.995197453644905e-05, - "loss": 170.8327, - "step": 14720 - }, - { - "epoch": 0.11902164691052772, - "grad_norm": 836.1159057617188, - "learning_rate": 4.995154104029619e-05, - "loss": 144.7179, - "step": 14730 - }, - { - "epoch": 0.11910244911481185, - "grad_norm": 1337.840576171875, - "learning_rate": 4.995110559837078e-05, - "loss": 156.0535, - "step": 14740 - }, - { - "epoch": 0.11918325131909599, - "grad_norm": 1090.3624267578125, - "learning_rate": 4.995066821070679e-05, - "loss": 184.9325, - "step": 14750 - }, - { - "epoch": 0.11926405352338011, - "grad_norm": 1149.684814453125, - "learning_rate": 4.995022887733832e-05, - "loss": 169.8896, - "step": 14760 - }, - { - "epoch": 0.11934485572766425, - "grad_norm": 2414.326416015625, - "learning_rate": 4.994978759829963e-05, - "loss": 192.3995, - "step": 14770 - }, - { - "epoch": 0.11942565793194838, - "grad_norm": 1045.4832763671875, - "learning_rate": 4.994934437362513e-05, - "loss": 137.3873, - "step": 14780 - }, - { - "epoch": 0.11950646013623252, - "grad_norm": 936.674072265625, - "learning_rate": 4.9948899203349384e-05, - "loss": 154.0399, - "step": 14790 - }, - { - "epoch": 0.11958726234051664, - "grad_norm": 1168.2762451171875, - "learning_rate": 4.9948452087507116e-05, - "loss": 117.3101, - "step": 14800 - }, - { - "epoch": 0.11966806454480078, - "grad_norm": 837.468505859375, - "learning_rate": 4.994800302613318e-05, - "loss": 161.9647, - "step": 14810 - }, - { - "epoch": 0.11974886674908491, - "grad_norm": 915.2671508789062, - "learning_rate": 4.9947552019262605e-05, - "loss": 137.4773, - "step": 14820 - }, - { - "epoch": 0.11982966895336905, - "grad_norm": 1203.070068359375, - "learning_rate": 4.994709906693056e-05, - "loss": 189.3817, - "step": 14830 - }, - { - "epoch": 0.11991047115765319, - "grad_norm": 5586.5947265625, - "learning_rate": 4.9946644169172355e-05, - "loss": 167.9691, - "step": 14840 - }, - { - "epoch": 0.11999127336193731, - "grad_norm": 1484.06396484375, - "learning_rate": 4.994618732602349e-05, - "loss": 191.2319, - "step": 14850 - }, - { - "epoch": 0.12007207556622145, - "grad_norm": 2234.567626953125, - "learning_rate": 4.9945728537519555e-05, - "loss": 219.025, - "step": 14860 - }, - { - "epoch": 0.12015287777050558, - "grad_norm": 1168.8365478515625, - "learning_rate": 4.9945267803696364e-05, - "loss": 159.255, - "step": 14870 - }, - { - "epoch": 0.12023367997478972, - "grad_norm": 952.6293334960938, - "learning_rate": 4.994480512458981e-05, - "loss": 164.3987, - "step": 14880 - }, - { - "epoch": 0.12031448217907384, - "grad_norm": 1229.493896484375, - "learning_rate": 4.994434050023601e-05, - "loss": 156.5038, - "step": 14890 - }, - { - "epoch": 0.12039528438335798, - "grad_norm": 1771.1019287109375, - "learning_rate": 4.994387393067117e-05, - "loss": 132.9723, - "step": 14900 - }, - { - "epoch": 0.12047608658764211, - "grad_norm": 1019.0162353515625, - "learning_rate": 4.9943405415931674e-05, - "loss": 140.0254, - "step": 14910 - }, - { - "epoch": 0.12055688879192625, - "grad_norm": 1247.2222900390625, - "learning_rate": 4.9942934956054076e-05, - "loss": 145.8201, - "step": 14920 - }, - { - "epoch": 0.12063769099621037, - "grad_norm": 706.9746704101562, - "learning_rate": 4.9942462551075056e-05, - "loss": 122.2676, - "step": 14930 - }, - { - "epoch": 0.1207184932004945, - "grad_norm": 1271.373291015625, - "learning_rate": 4.994198820103145e-05, - "loss": 129.9075, - "step": 14940 - }, - { - "epoch": 0.12079929540477864, - "grad_norm": 915.8984985351562, - "learning_rate": 4.994151190596025e-05, - "loss": 169.9812, - "step": 14950 - }, - { - "epoch": 0.12088009760906278, - "grad_norm": 714.3693237304688, - "learning_rate": 4.994103366589859e-05, - "loss": 173.155, - "step": 14960 - }, - { - "epoch": 0.12096089981334691, - "grad_norm": 1518.5596923828125, - "learning_rate": 4.994055348088378e-05, - "loss": 225.9214, - "step": 14970 - }, - { - "epoch": 0.12104170201763104, - "grad_norm": 3558.85205078125, - "learning_rate": 4.9940071350953255e-05, - "loss": 206.1069, - "step": 14980 - }, - { - "epoch": 0.12112250422191517, - "grad_norm": 1201.43701171875, - "learning_rate": 4.9939587276144616e-05, - "loss": 163.2415, - "step": 14990 - }, - { - "epoch": 0.12120330642619931, - "grad_norm": 2851.87109375, - "learning_rate": 4.993910125649561e-05, - "loss": 172.4465, - "step": 15000 - }, - { - "epoch": 0.12128410863048344, - "grad_norm": 1293.496826171875, - "learning_rate": 4.993861329204414e-05, - "loss": 164.2404, - "step": 15010 - }, - { - "epoch": 0.12136491083476757, - "grad_norm": 587.5494384765625, - "learning_rate": 4.993812338282826e-05, - "loss": 184.3685, - "step": 15020 - }, - { - "epoch": 0.1214457130390517, - "grad_norm": 2932.148193359375, - "learning_rate": 4.993763152888617e-05, - "loss": 194.6474, - "step": 15030 - }, - { - "epoch": 0.12152651524333584, - "grad_norm": 3050.3369140625, - "learning_rate": 4.993713773025623e-05, - "loss": 177.5111, - "step": 15040 - }, - { - "epoch": 0.12160731744761998, - "grad_norm": 1415.346435546875, - "learning_rate": 4.993664198697694e-05, - "loss": 143.9001, - "step": 15050 - }, - { - "epoch": 0.1216881196519041, - "grad_norm": 978.0360717773438, - "learning_rate": 4.993614429908697e-05, - "loss": 147.1109, - "step": 15060 - }, - { - "epoch": 0.12176892185618823, - "grad_norm": 1312.6900634765625, - "learning_rate": 4.9935644666625125e-05, - "loss": 134.3053, - "step": 15070 - }, - { - "epoch": 0.12184972406047237, - "grad_norm": 1400.928955078125, - "learning_rate": 4.993514308963036e-05, - "loss": 170.6984, - "step": 15080 - }, - { - "epoch": 0.1219305262647565, - "grad_norm": 673.9713745117188, - "learning_rate": 4.993463956814181e-05, - "loss": 162.189, - "step": 15090 - }, - { - "epoch": 0.12201132846904064, - "grad_norm": 806.5405883789062, - "learning_rate": 4.993413410219871e-05, - "loss": 136.5441, - "step": 15100 - }, - { - "epoch": 0.12209213067332476, - "grad_norm": 927.8473510742188, - "learning_rate": 4.993362669184051e-05, - "loss": 125.2105, - "step": 15110 - }, - { - "epoch": 0.1221729328776089, - "grad_norm": 1110.3685302734375, - "learning_rate": 4.993311733710676e-05, - "loss": 149.1199, - "step": 15120 - }, - { - "epoch": 0.12225373508189304, - "grad_norm": 1533.1611328125, - "learning_rate": 4.993260603803718e-05, - "loss": 137.3492, - "step": 15130 - }, - { - "epoch": 0.12233453728617717, - "grad_norm": 959.220703125, - "learning_rate": 4.993209279467164e-05, - "loss": 95.8579, - "step": 15140 - }, - { - "epoch": 0.1224153394904613, - "grad_norm": 924.2977905273438, - "learning_rate": 4.9931577607050175e-05, - "loss": 138.668, - "step": 15150 - }, - { - "epoch": 0.12249614169474543, - "grad_norm": 1749.72216796875, - "learning_rate": 4.993106047521296e-05, - "loss": 194.7024, - "step": 15160 - }, - { - "epoch": 0.12257694389902957, - "grad_norm": 2221.097900390625, - "learning_rate": 4.993054139920032e-05, - "loss": 177.3626, - "step": 15170 - }, - { - "epoch": 0.1226577461033137, - "grad_norm": 1769.4202880859375, - "learning_rate": 4.993002037905272e-05, - "loss": 163.4251, - "step": 15180 - }, - { - "epoch": 0.12273854830759783, - "grad_norm": 816.726318359375, - "learning_rate": 4.99294974148108e-05, - "loss": 166.8434, - "step": 15190 - }, - { - "epoch": 0.12281935051188196, - "grad_norm": 1143.0623779296875, - "learning_rate": 4.992897250651535e-05, - "loss": 154.8357, - "step": 15200 - }, - { - "epoch": 0.1229001527161661, - "grad_norm": 691.0133666992188, - "learning_rate": 4.99284456542073e-05, - "loss": 109.4805, - "step": 15210 - }, - { - "epoch": 0.12298095492045023, - "grad_norm": 971.121337890625, - "learning_rate": 4.992791685792772e-05, - "loss": 147.3611, - "step": 15220 - }, - { - "epoch": 0.12306175712473437, - "grad_norm": 1528.03271484375, - "learning_rate": 4.992738611771787e-05, - "loss": 135.7302, - "step": 15230 - }, - { - "epoch": 0.12314255932901849, - "grad_norm": 1477.2353515625, - "learning_rate": 4.992685343361911e-05, - "loss": 195.6973, - "step": 15240 - }, - { - "epoch": 0.12322336153330263, - "grad_norm": 799.9385375976562, - "learning_rate": 4.992631880567301e-05, - "loss": 195.4634, - "step": 15250 - }, - { - "epoch": 0.12330416373758676, - "grad_norm": 1074.04296875, - "learning_rate": 4.992578223392124e-05, - "loss": 157.2892, - "step": 15260 - }, - { - "epoch": 0.1233849659418709, - "grad_norm": 1499.16455078125, - "learning_rate": 4.992524371840566e-05, - "loss": 151.4733, - "step": 15270 - }, - { - "epoch": 0.12346576814615502, - "grad_norm": 1253.4857177734375, - "learning_rate": 4.9924703259168244e-05, - "loss": 144.6192, - "step": 15280 - }, - { - "epoch": 0.12354657035043916, - "grad_norm": 1631.5015869140625, - "learning_rate": 4.992416085625115e-05, - "loss": 132.9751, - "step": 15290 - }, - { - "epoch": 0.1236273725547233, - "grad_norm": 941.0845947265625, - "learning_rate": 4.9923616509696683e-05, - "loss": 146.1303, - "step": 15300 - }, - { - "epoch": 0.12370817475900743, - "grad_norm": 1486.3992919921875, - "learning_rate": 4.9923070219547275e-05, - "loss": 213.4393, - "step": 15310 - }, - { - "epoch": 0.12378897696329155, - "grad_norm": 1923.984130859375, - "learning_rate": 4.992252198584554e-05, - "loss": 201.0075, - "step": 15320 - }, - { - "epoch": 0.12386977916757569, - "grad_norm": 1468.6561279296875, - "learning_rate": 4.992197180863422e-05, - "loss": 130.5384, - "step": 15330 - }, - { - "epoch": 0.12395058137185982, - "grad_norm": 3888.402099609375, - "learning_rate": 4.992141968795623e-05, - "loss": 163.4949, - "step": 15340 - }, - { - "epoch": 0.12403138357614396, - "grad_norm": 1559.868408203125, - "learning_rate": 4.9920865623854615e-05, - "loss": 167.3677, - "step": 15350 - }, - { - "epoch": 0.1241121857804281, - "grad_norm": 3625.310791015625, - "learning_rate": 4.9920309616372596e-05, - "loss": 139.593, - "step": 15360 - }, - { - "epoch": 0.12419298798471222, - "grad_norm": 2025.14453125, - "learning_rate": 4.9919751665553525e-05, - "loss": 132.3654, - "step": 15370 - }, - { - "epoch": 0.12427379018899636, - "grad_norm": 687.716552734375, - "learning_rate": 4.9919191771440905e-05, - "loss": 113.4958, - "step": 15380 - }, - { - "epoch": 0.12435459239328049, - "grad_norm": 1035.1671142578125, - "learning_rate": 4.99186299340784e-05, - "loss": 130.6346, - "step": 15390 - }, - { - "epoch": 0.12443539459756463, - "grad_norm": 1409.4853515625, - "learning_rate": 4.9918066153509834e-05, - "loss": 198.3188, - "step": 15400 - }, - { - "epoch": 0.12451619680184875, - "grad_norm": 737.7066650390625, - "learning_rate": 4.991750042977916e-05, - "loss": 120.4647, - "step": 15410 - }, - { - "epoch": 0.12459699900613289, - "grad_norm": 879.3247680664062, - "learning_rate": 4.99169327629305e-05, - "loss": 162.4415, - "step": 15420 - }, - { - "epoch": 0.12467780121041702, - "grad_norm": 1339.2764892578125, - "learning_rate": 4.9916363153008114e-05, - "loss": 157.4068, - "step": 15430 - }, - { - "epoch": 0.12475860341470116, - "grad_norm": 901.47802734375, - "learning_rate": 4.991579160005644e-05, - "loss": 141.9083, - "step": 15440 - }, - { - "epoch": 0.12483940561898528, - "grad_norm": 1027.944091796875, - "learning_rate": 4.991521810412002e-05, - "loss": 177.5538, - "step": 15450 - }, - { - "epoch": 0.12492020782326942, - "grad_norm": 874.61376953125, - "learning_rate": 4.99146426652436e-05, - "loss": 127.5313, - "step": 15460 - }, - { - "epoch": 0.12500101002755354, - "grad_norm": 645.454345703125, - "learning_rate": 4.991406528347206e-05, - "loss": 165.3097, - "step": 15470 - }, - { - "epoch": 0.12508181223183767, - "grad_norm": 1772.091796875, - "learning_rate": 4.991348595885039e-05, - "loss": 192.8554, - "step": 15480 - }, - { - "epoch": 0.1251626144361218, - "grad_norm": 1488.9267578125, - "learning_rate": 4.99129046914238e-05, - "loss": 139.4656, - "step": 15490 - }, - { - "epoch": 0.12524341664040595, - "grad_norm": 1205.8192138671875, - "learning_rate": 4.991232148123761e-05, - "loss": 165.5289, - "step": 15500 - }, - { - "epoch": 0.12532421884469008, - "grad_norm": 1326.690185546875, - "learning_rate": 4.9911736328337296e-05, - "loss": 158.799, - "step": 15510 - }, - { - "epoch": 0.12540502104897422, - "grad_norm": 1478.056640625, - "learning_rate": 4.991114923276849e-05, - "loss": 189.4746, - "step": 15520 - }, - { - "epoch": 0.12548582325325836, - "grad_norm": 3051.164306640625, - "learning_rate": 4.991056019457697e-05, - "loss": 142.0503, - "step": 15530 - }, - { - "epoch": 0.1255666254575425, - "grad_norm": 1539.4234619140625, - "learning_rate": 4.9909969213808683e-05, - "loss": 147.4014, - "step": 15540 - }, - { - "epoch": 0.12564742766182663, - "grad_norm": 760.8482055664062, - "learning_rate": 4.990937629050971e-05, - "loss": 176.7314, - "step": 15550 - }, - { - "epoch": 0.12572822986611074, - "grad_norm": 792.4769897460938, - "learning_rate": 4.990878142472628e-05, - "loss": 128.0528, - "step": 15560 - }, - { - "epoch": 0.12580903207039487, - "grad_norm": 1313.121826171875, - "learning_rate": 4.990818461650479e-05, - "loss": 152.6274, - "step": 15570 - }, - { - "epoch": 0.125889834274679, - "grad_norm": 1833.918212890625, - "learning_rate": 4.990758586589178e-05, - "loss": 142.1924, - "step": 15580 - }, - { - "epoch": 0.12597063647896314, - "grad_norm": 455.7380065917969, - "learning_rate": 4.990698517293395e-05, - "loss": 122.1153, - "step": 15590 - }, - { - "epoch": 0.12605143868324728, - "grad_norm": 1216.321044921875, - "learning_rate": 4.990638253767812e-05, - "loss": 134.6947, - "step": 15600 - }, - { - "epoch": 0.12613224088753142, - "grad_norm": 1764.4124755859375, - "learning_rate": 4.9905777960171304e-05, - "loss": 174.6715, - "step": 15610 - }, - { - "epoch": 0.12621304309181555, - "grad_norm": 837.7501220703125, - "learning_rate": 4.990517144046064e-05, - "loss": 143.5834, - "step": 15620 - }, - { - "epoch": 0.1262938452960997, - "grad_norm": 2157.06884765625, - "learning_rate": 4.9904562978593426e-05, - "loss": 187.1105, - "step": 15630 - }, - { - "epoch": 0.12637464750038382, - "grad_norm": 1229.7435302734375, - "learning_rate": 4.990395257461712e-05, - "loss": 104.4801, - "step": 15640 - }, - { - "epoch": 0.12645544970466793, - "grad_norm": 885.621826171875, - "learning_rate": 4.990334022857932e-05, - "loss": 163.4258, - "step": 15650 - }, - { - "epoch": 0.12653625190895207, - "grad_norm": 849.3128051757812, - "learning_rate": 4.990272594052776e-05, - "loss": 152.9101, - "step": 15660 - }, - { - "epoch": 0.1266170541132362, - "grad_norm": 1435.2025146484375, - "learning_rate": 4.990210971051037e-05, - "loss": 102.1439, - "step": 15670 - }, - { - "epoch": 0.12669785631752034, - "grad_norm": 1617.59619140625, - "learning_rate": 4.9901491538575185e-05, - "loss": 119.0009, - "step": 15680 - }, - { - "epoch": 0.12677865852180448, - "grad_norm": 1613.3414306640625, - "learning_rate": 4.9900871424770424e-05, - "loss": 170.1293, - "step": 15690 - }, - { - "epoch": 0.1268594607260886, - "grad_norm": 886.3299560546875, - "learning_rate": 4.9900249369144434e-05, - "loss": 158.7941, - "step": 15700 - }, - { - "epoch": 0.12694026293037275, - "grad_norm": 717.4042358398438, - "learning_rate": 4.9899625371745726e-05, - "loss": 190.0221, - "step": 15710 - }, - { - "epoch": 0.12702106513465689, - "grad_norm": 1099.2711181640625, - "learning_rate": 4.9898999432622974e-05, - "loss": 140.3318, - "step": 15720 - }, - { - "epoch": 0.127101867338941, - "grad_norm": 1195.45166015625, - "learning_rate": 4.9898371551824974e-05, - "loss": 177.7176, - "step": 15730 - }, - { - "epoch": 0.12718266954322513, - "grad_norm": 1423.959228515625, - "learning_rate": 4.9897741729400705e-05, - "loss": 166.1359, - "step": 15740 - }, - { - "epoch": 0.12726347174750927, - "grad_norm": 1396.514404296875, - "learning_rate": 4.989710996539926e-05, - "loss": 169.5669, - "step": 15750 - }, - { - "epoch": 0.1273442739517934, - "grad_norm": 907.1665649414062, - "learning_rate": 4.989647625986993e-05, - "loss": 164.1884, - "step": 15760 - }, - { - "epoch": 0.12742507615607754, - "grad_norm": 634.7987670898438, - "learning_rate": 4.989584061286211e-05, - "loss": 172.1849, - "step": 15770 - }, - { - "epoch": 0.12750587836036167, - "grad_norm": 659.5897216796875, - "learning_rate": 4.9895203024425385e-05, - "loss": 159.9483, - "step": 15780 - }, - { - "epoch": 0.1275866805646458, - "grad_norm": 1982.08447265625, - "learning_rate": 4.989456349460947e-05, - "loss": 189.0069, - "step": 15790 - }, - { - "epoch": 0.12766748276892995, - "grad_norm": 1139.15478515625, - "learning_rate": 4.9893922023464236e-05, - "loss": 131.2969, - "step": 15800 - }, - { - "epoch": 0.12774828497321408, - "grad_norm": 1150.562744140625, - "learning_rate": 4.98932786110397e-05, - "loss": 143.9886, - "step": 15810 - }, - { - "epoch": 0.1278290871774982, - "grad_norm": 1281.29833984375, - "learning_rate": 4.989263325738605e-05, - "loss": 173.2364, - "step": 15820 - }, - { - "epoch": 0.12790988938178233, - "grad_norm": 7039.4423828125, - "learning_rate": 4.9891985962553606e-05, - "loss": 218.6095, - "step": 15830 - }, - { - "epoch": 0.12799069158606646, - "grad_norm": 1216.67578125, - "learning_rate": 4.9891336726592844e-05, - "loss": 144.8143, - "step": 15840 - }, - { - "epoch": 0.1280714937903506, - "grad_norm": 933.6668701171875, - "learning_rate": 4.989068554955439e-05, - "loss": 185.2274, - "step": 15850 - }, - { - "epoch": 0.12815229599463474, - "grad_norm": 905.1338500976562, - "learning_rate": 4.989003243148904e-05, - "loss": 150.5198, - "step": 15860 - }, - { - "epoch": 0.12823309819891887, - "grad_norm": 3021.301025390625, - "learning_rate": 4.9889377372447706e-05, - "loss": 171.7874, - "step": 15870 - }, - { - "epoch": 0.128313900403203, - "grad_norm": 1128.1397705078125, - "learning_rate": 4.988872037248148e-05, - "loss": 168.7623, - "step": 15880 - }, - { - "epoch": 0.12839470260748714, - "grad_norm": 1293.630126953125, - "learning_rate": 4.988806143164159e-05, - "loss": 172.3305, - "step": 15890 - }, - { - "epoch": 0.12847550481177125, - "grad_norm": 815.5888061523438, - "learning_rate": 4.988740054997943e-05, - "loss": 154.5057, - "step": 15900 - }, - { - "epoch": 0.1285563070160554, - "grad_norm": 2671.7880859375, - "learning_rate": 4.988673772754653e-05, - "loss": 160.6701, - "step": 15910 - }, - { - "epoch": 0.12863710922033952, - "grad_norm": 867.8973999023438, - "learning_rate": 4.988607296439458e-05, - "loss": 152.7452, - "step": 15920 - }, - { - "epoch": 0.12871791142462366, - "grad_norm": 833.93115234375, - "learning_rate": 4.988540626057543e-05, - "loss": 153.8134, - "step": 15930 - }, - { - "epoch": 0.1287987136289078, - "grad_norm": 832.8592529296875, - "learning_rate": 4.988473761614105e-05, - "loss": 121.0948, - "step": 15940 - }, - { - "epoch": 0.12887951583319193, - "grad_norm": 5799.212890625, - "learning_rate": 4.98840670311436e-05, - "loss": 151.9168, - "step": 15950 - }, - { - "epoch": 0.12896031803747607, - "grad_norm": 1821.7926025390625, - "learning_rate": 4.9883394505635364e-05, - "loss": 135.1265, - "step": 15960 - }, - { - "epoch": 0.1290411202417602, - "grad_norm": 592.3580932617188, - "learning_rate": 4.988272003966879e-05, - "loss": 161.6558, - "step": 15970 - }, - { - "epoch": 0.12912192244604434, - "grad_norm": 921.9555053710938, - "learning_rate": 4.988204363329648e-05, - "loss": 127.3051, - "step": 15980 - }, - { - "epoch": 0.12920272465032845, - "grad_norm": 577.0763549804688, - "learning_rate": 4.988136528657118e-05, - "loss": 105.9127, - "step": 15990 - }, - { - "epoch": 0.12928352685461258, - "grad_norm": 1444.57177734375, - "learning_rate": 4.988068499954578e-05, - "loss": 166.2644, - "step": 16000 - }, - { - "epoch": 0.12936432905889672, - "grad_norm": 2282.3720703125, - "learning_rate": 4.988000277227334e-05, - "loss": 204.9646, - "step": 16010 - }, - { - "epoch": 0.12944513126318086, - "grad_norm": 883.1238403320312, - "learning_rate": 4.987931860480705e-05, - "loss": 147.516, - "step": 16020 - }, - { - "epoch": 0.129525933467465, - "grad_norm": 1411.989501953125, - "learning_rate": 4.987863249720027e-05, - "loss": 126.5586, - "step": 16030 - }, - { - "epoch": 0.12960673567174913, - "grad_norm": 690.98291015625, - "learning_rate": 4.987794444950651e-05, - "loss": 186.7527, - "step": 16040 - }, - { - "epoch": 0.12968753787603327, - "grad_norm": 1121.184326171875, - "learning_rate": 4.987725446177941e-05, - "loss": 146.0869, - "step": 16050 - }, - { - "epoch": 0.1297683400803174, - "grad_norm": 1537.690673828125, - "learning_rate": 4.98765625340728e-05, - "loss": 162.6881, - "step": 16060 - }, - { - "epoch": 0.12984914228460154, - "grad_norm": 690.1722412109375, - "learning_rate": 4.9875868666440604e-05, - "loss": 120.9229, - "step": 16070 - }, - { - "epoch": 0.12992994448888565, - "grad_norm": 1415.0672607421875, - "learning_rate": 4.987517285893697e-05, - "loss": 163.8807, - "step": 16080 - }, - { - "epoch": 0.13001074669316978, - "grad_norm": 1055.4945068359375, - "learning_rate": 4.987447511161612e-05, - "loss": 167.0536, - "step": 16090 - }, - { - "epoch": 0.13009154889745392, - "grad_norm": 958.2368774414062, - "learning_rate": 4.987377542453251e-05, - "loss": 171.2699, - "step": 16100 - }, - { - "epoch": 0.13017235110173805, - "grad_norm": 1073.2728271484375, - "learning_rate": 4.987307379774066e-05, - "loss": 145.0437, - "step": 16110 - }, - { - "epoch": 0.1302531533060222, - "grad_norm": 2019.6201171875, - "learning_rate": 4.987237023129531e-05, - "loss": 148.3471, - "step": 16120 - }, - { - "epoch": 0.13033395551030633, - "grad_norm": 1011.5209350585938, - "learning_rate": 4.9871664725251314e-05, - "loss": 126.0875, - "step": 16130 - }, - { - "epoch": 0.13041475771459046, - "grad_norm": 1349.309814453125, - "learning_rate": 4.98709572796637e-05, - "loss": 135.4849, - "step": 16140 - }, - { - "epoch": 0.1304955599188746, - "grad_norm": 552.923095703125, - "learning_rate": 4.987024789458762e-05, - "loss": 145.0853, - "step": 16150 - }, - { - "epoch": 0.1305763621231587, - "grad_norm": 3444.074462890625, - "learning_rate": 4.986953657007841e-05, - "loss": 170.7159, - "step": 16160 - }, - { - "epoch": 0.13065716432744284, - "grad_norm": 578.19140625, - "learning_rate": 4.986882330619152e-05, - "loss": 139.0433, - "step": 16170 - }, - { - "epoch": 0.13073796653172698, - "grad_norm": 1995.472412109375, - "learning_rate": 4.9868108102982604e-05, - "loss": 194.3335, - "step": 16180 - }, - { - "epoch": 0.13081876873601112, - "grad_norm": 753.7646484375, - "learning_rate": 4.98673909605074e-05, - "loss": 129.9241, - "step": 16190 - }, - { - "epoch": 0.13089957094029525, - "grad_norm": 895.2208862304688, - "learning_rate": 4.986667187882186e-05, - "loss": 151.2042, - "step": 16200 - }, - { - "epoch": 0.1309803731445794, - "grad_norm": 938.2938842773438, - "learning_rate": 4.986595085798204e-05, - "loss": 153.1665, - "step": 16210 - }, - { - "epoch": 0.13106117534886352, - "grad_norm": 1215.581787109375, - "learning_rate": 4.986522789804417e-05, - "loss": 132.5531, - "step": 16220 - }, - { - "epoch": 0.13114197755314766, - "grad_norm": 1030.686279296875, - "learning_rate": 4.986450299906464e-05, - "loss": 187.8237, - "step": 16230 - }, - { - "epoch": 0.1312227797574318, - "grad_norm": 1050.9188232421875, - "learning_rate": 4.9863776161099964e-05, - "loss": 135.7022, - "step": 16240 - }, - { - "epoch": 0.1313035819617159, - "grad_norm": 1255.7137451171875, - "learning_rate": 4.9863047384206835e-05, - "loss": 169.7048, - "step": 16250 - }, - { - "epoch": 0.13138438416600004, - "grad_norm": 1686.0699462890625, - "learning_rate": 4.986231666844208e-05, - "loss": 155.087, - "step": 16260 - }, - { - "epoch": 0.13146518637028418, - "grad_norm": 3442.361328125, - "learning_rate": 4.986158401386268e-05, - "loss": 201.1184, - "step": 16270 - }, - { - "epoch": 0.1315459885745683, - "grad_norm": 1908.744384765625, - "learning_rate": 4.9860849420525766e-05, - "loss": 180.5946, - "step": 16280 - }, - { - "epoch": 0.13162679077885245, - "grad_norm": 1508.2030029296875, - "learning_rate": 4.986011288848863e-05, - "loss": 136.8041, - "step": 16290 - }, - { - "epoch": 0.13170759298313658, - "grad_norm": 1722.505615234375, - "learning_rate": 4.98593744178087e-05, - "loss": 147.4105, - "step": 16300 - }, - { - "epoch": 0.13178839518742072, - "grad_norm": 2715.455810546875, - "learning_rate": 4.985863400854358e-05, - "loss": 169.9449, - "step": 16310 - }, - { - "epoch": 0.13186919739170486, - "grad_norm": 961.5862426757812, - "learning_rate": 4.9857891660750986e-05, - "loss": 154.6072, - "step": 16320 - }, - { - "epoch": 0.13194999959598896, - "grad_norm": 2247.080810546875, - "learning_rate": 4.985714737448882e-05, - "loss": 166.1652, - "step": 16330 - }, - { - "epoch": 0.1320308018002731, - "grad_norm": 743.886962890625, - "learning_rate": 4.9856401149815126e-05, - "loss": 113.0431, - "step": 16340 - }, - { - "epoch": 0.13211160400455724, - "grad_norm": 971.4644775390625, - "learning_rate": 4.985565298678809e-05, - "loss": 137.2266, - "step": 16350 - }, - { - "epoch": 0.13219240620884137, - "grad_norm": 852.8344116210938, - "learning_rate": 4.985490288546606e-05, - "loss": 155.6294, - "step": 16360 - }, - { - "epoch": 0.1322732084131255, - "grad_norm": 1399.8111572265625, - "learning_rate": 4.985415084590752e-05, - "loss": 135.8477, - "step": 16370 - }, - { - "epoch": 0.13235401061740965, - "grad_norm": 773.2186889648438, - "learning_rate": 4.985339686817113e-05, - "loss": 192.4227, - "step": 16380 - }, - { - "epoch": 0.13243481282169378, - "grad_norm": 2009.193115234375, - "learning_rate": 4.9852640952315674e-05, - "loss": 136.643, - "step": 16390 - }, - { - "epoch": 0.13251561502597792, - "grad_norm": 1868.528564453125, - "learning_rate": 4.985188309840012e-05, - "loss": 158.3561, - "step": 16400 - }, - { - "epoch": 0.13259641723026205, - "grad_norm": 817.8035888671875, - "learning_rate": 4.985112330648354e-05, - "loss": 185.092, - "step": 16410 - }, - { - "epoch": 0.13267721943454616, - "grad_norm": 1471.85205078125, - "learning_rate": 4.985036157662521e-05, - "loss": 177.9861, - "step": 16420 - }, - { - "epoch": 0.1327580216388303, - "grad_norm": 3544.751953125, - "learning_rate": 4.98495979088845e-05, - "loss": 203.457, - "step": 16430 - }, - { - "epoch": 0.13283882384311443, - "grad_norm": 1203.506591796875, - "learning_rate": 4.984883230332099e-05, - "loss": 132.8798, - "step": 16440 - }, - { - "epoch": 0.13291962604739857, - "grad_norm": 631.9423217773438, - "learning_rate": 4.984806475999437e-05, - "loss": 154.4456, - "step": 16450 - }, - { - "epoch": 0.1330004282516827, - "grad_norm": 1113.6688232421875, - "learning_rate": 4.9847295278964514e-05, - "loss": 106.8571, - "step": 16460 - }, - { - "epoch": 0.13308123045596684, - "grad_norm": 939.092529296875, - "learning_rate": 4.984652386029139e-05, - "loss": 178.3816, - "step": 16470 - }, - { - "epoch": 0.13316203266025098, - "grad_norm": 795.2464599609375, - "learning_rate": 4.9845750504035195e-05, - "loss": 156.9716, - "step": 16480 - }, - { - "epoch": 0.13324283486453511, - "grad_norm": 1656.269287109375, - "learning_rate": 4.9844975210256217e-05, - "loss": 149.8815, - "step": 16490 - }, - { - "epoch": 0.13332363706881925, - "grad_norm": 655.8922729492188, - "learning_rate": 4.984419797901491e-05, - "loss": 150.7373, - "step": 16500 - }, - { - "epoch": 0.13340443927310336, - "grad_norm": 741.923828125, - "learning_rate": 4.98434188103719e-05, - "loss": 127.3173, - "step": 16510 - }, - { - "epoch": 0.1334852414773875, - "grad_norm": 1200.45751953125, - "learning_rate": 4.984263770438793e-05, - "loss": 161.5114, - "step": 16520 - }, - { - "epoch": 0.13356604368167163, - "grad_norm": 1891.3720703125, - "learning_rate": 4.9841854661123936e-05, - "loss": 145.1845, - "step": 16530 - }, - { - "epoch": 0.13364684588595577, - "grad_norm": 939.89892578125, - "learning_rate": 4.984106968064095e-05, - "loss": 157.5545, - "step": 16540 - }, - { - "epoch": 0.1337276480902399, - "grad_norm": 854.355224609375, - "learning_rate": 4.984028276300021e-05, - "loss": 178.013, - "step": 16550 - }, - { - "epoch": 0.13380845029452404, - "grad_norm": 987.6085205078125, - "learning_rate": 4.983949390826308e-05, - "loss": 146.7997, - "step": 16560 - }, - { - "epoch": 0.13388925249880818, - "grad_norm": 1321.1260986328125, - "learning_rate": 4.983870311649107e-05, - "loss": 151.0627, - "step": 16570 - }, - { - "epoch": 0.1339700547030923, - "grad_norm": 1190.7801513671875, - "learning_rate": 4.9837910387745845e-05, - "loss": 140.5531, - "step": 16580 - }, - { - "epoch": 0.13405085690737642, - "grad_norm": 727.1590576171875, - "learning_rate": 4.983711572208924e-05, - "loss": 126.6944, - "step": 16590 - }, - { - "epoch": 0.13413165911166056, - "grad_norm": 1485.8936767578125, - "learning_rate": 4.983631911958319e-05, - "loss": 138.0572, - "step": 16600 - }, - { - "epoch": 0.1342124613159447, - "grad_norm": 811.7257080078125, - "learning_rate": 4.9835520580289854e-05, - "loss": 100.7419, - "step": 16610 - }, - { - "epoch": 0.13429326352022883, - "grad_norm": 1383.6395263671875, - "learning_rate": 4.9834720104271484e-05, - "loss": 132.5897, - "step": 16620 - }, - { - "epoch": 0.13437406572451296, - "grad_norm": 2057.96533203125, - "learning_rate": 4.9833917691590506e-05, - "loss": 136.6982, - "step": 16630 - }, - { - "epoch": 0.1344548679287971, - "grad_norm": 1012.1217651367188, - "learning_rate": 4.98331133423095e-05, - "loss": 173.4038, - "step": 16640 - }, - { - "epoch": 0.13453567013308124, - "grad_norm": 1609.89306640625, - "learning_rate": 4.983230705649118e-05, - "loss": 144.2255, - "step": 16650 - }, - { - "epoch": 0.13461647233736537, - "grad_norm": 773.6680297851562, - "learning_rate": 4.983149883419842e-05, - "loss": 160.3266, - "step": 16660 - }, - { - "epoch": 0.1346972745416495, - "grad_norm": 3496.8505859375, - "learning_rate": 4.9830688675494265e-05, - "loss": 177.2727, - "step": 16670 - }, - { - "epoch": 0.13477807674593362, - "grad_norm": 900.9786987304688, - "learning_rate": 4.982987658044188e-05, - "loss": 144.9409, - "step": 16680 - }, - { - "epoch": 0.13485887895021775, - "grad_norm": 769.6142578125, - "learning_rate": 4.982906254910459e-05, - "loss": 126.4068, - "step": 16690 - }, - { - "epoch": 0.1349396811545019, - "grad_norm": 720.4602661132812, - "learning_rate": 4.982824658154589e-05, - "loss": 142.1272, - "step": 16700 - }, - { - "epoch": 0.13502048335878603, - "grad_norm": 1880.3961181640625, - "learning_rate": 4.982742867782939e-05, - "loss": 165.7867, - "step": 16710 - }, - { - "epoch": 0.13510128556307016, - "grad_norm": 972.5383911132812, - "learning_rate": 4.982660883801889e-05, - "loss": 194.2058, - "step": 16720 - }, - { - "epoch": 0.1351820877673543, - "grad_norm": 1582.2630615234375, - "learning_rate": 4.9825787062178315e-05, - "loss": 165.0465, - "step": 16730 - }, - { - "epoch": 0.13526288997163843, - "grad_norm": 971.8427734375, - "learning_rate": 4.982496335037175e-05, - "loss": 116.1355, - "step": 16740 - }, - { - "epoch": 0.13534369217592257, - "grad_norm": 847.18701171875, - "learning_rate": 4.982413770266342e-05, - "loss": 106.9377, - "step": 16750 - }, - { - "epoch": 0.1354244943802067, - "grad_norm": 1521.8050537109375, - "learning_rate": 4.982331011911774e-05, - "loss": 204.7232, - "step": 16760 - }, - { - "epoch": 0.13550529658449081, - "grad_norm": 841.8185424804688, - "learning_rate": 4.982248059979921e-05, - "loss": 147.1471, - "step": 16770 - }, - { - "epoch": 0.13558609878877495, - "grad_norm": 2477.669189453125, - "learning_rate": 4.9821649144772545e-05, - "loss": 126.5678, - "step": 16780 - }, - { - "epoch": 0.1356669009930591, - "grad_norm": 1012.5833129882812, - "learning_rate": 4.982081575410256e-05, - "loss": 143.7285, - "step": 16790 - }, - { - "epoch": 0.13574770319734322, - "grad_norm": 1106.0914306640625, - "learning_rate": 4.981998042785427e-05, - "loss": 155.6399, - "step": 16800 - }, - { - "epoch": 0.13582850540162736, - "grad_norm": 547.897216796875, - "learning_rate": 4.9819143166092796e-05, - "loss": 172.4704, - "step": 16810 - }, - { - "epoch": 0.1359093076059115, - "grad_norm": 2956.504638671875, - "learning_rate": 4.981830396888344e-05, - "loss": 145.8728, - "step": 16820 - }, - { - "epoch": 0.13599010981019563, - "grad_norm": 832.076416015625, - "learning_rate": 4.981746283629164e-05, - "loss": 202.1632, - "step": 16830 - }, - { - "epoch": 0.13607091201447977, - "grad_norm": 1670.494873046875, - "learning_rate": 4.981661976838299e-05, - "loss": 162.7365, - "step": 16840 - }, - { - "epoch": 0.13615171421876388, - "grad_norm": 793.0515747070312, - "learning_rate": 4.9815774765223226e-05, - "loss": 145.8182, - "step": 16850 - }, - { - "epoch": 0.136232516423048, - "grad_norm": 889.9873046875, - "learning_rate": 4.9814927826878256e-05, - "loss": 154.0136, - "step": 16860 - }, - { - "epoch": 0.13631331862733215, - "grad_norm": 1307.1876220703125, - "learning_rate": 4.981407895341412e-05, - "loss": 192.9161, - "step": 16870 - }, - { - "epoch": 0.13639412083161628, - "grad_norm": 1417.89501953125, - "learning_rate": 4.981322814489703e-05, - "loss": 133.9221, - "step": 16880 - }, - { - "epoch": 0.13647492303590042, - "grad_norm": 2171.959228515625, - "learning_rate": 4.981237540139331e-05, - "loss": 169.6973, - "step": 16890 - }, - { - "epoch": 0.13655572524018456, - "grad_norm": 851.3436889648438, - "learning_rate": 4.9811520722969465e-05, - "loss": 141.0785, - "step": 16900 - }, - { - "epoch": 0.1366365274444687, - "grad_norm": 1375.1319580078125, - "learning_rate": 4.981066410969215e-05, - "loss": 172.2803, - "step": 16910 - }, - { - "epoch": 0.13671732964875283, - "grad_norm": 955.9098510742188, - "learning_rate": 4.980980556162816e-05, - "loss": 184.0196, - "step": 16920 - }, - { - "epoch": 0.13679813185303696, - "grad_norm": 1010.2421264648438, - "learning_rate": 4.9808945078844456e-05, - "loss": 133.9699, - "step": 16930 - }, - { - "epoch": 0.13687893405732107, - "grad_norm": 1332.0765380859375, - "learning_rate": 4.980808266140813e-05, - "loss": 160.7025, - "step": 16940 - }, - { - "epoch": 0.1369597362616052, - "grad_norm": 1153.353271484375, - "learning_rate": 4.9807218309386444e-05, - "loss": 145.1448, - "step": 16950 - }, - { - "epoch": 0.13704053846588934, - "grad_norm": 1398.808837890625, - "learning_rate": 4.980635202284679e-05, - "loss": 184.6072, - "step": 16960 - }, - { - "epoch": 0.13712134067017348, - "grad_norm": 3841.353515625, - "learning_rate": 4.980548380185674e-05, - "loss": 154.3389, - "step": 16970 - }, - { - "epoch": 0.13720214287445762, - "grad_norm": 1237.549560546875, - "learning_rate": 4.980461364648398e-05, - "loss": 161.6204, - "step": 16980 - }, - { - "epoch": 0.13728294507874175, - "grad_norm": 1093.3084716796875, - "learning_rate": 4.980374155679639e-05, - "loss": 172.3415, - "step": 16990 - }, - { - "epoch": 0.1373637472830259, - "grad_norm": 1487.4310302734375, - "learning_rate": 4.980286753286195e-05, - "loss": 183.5333, - "step": 17000 - }, - { - "epoch": 0.13744454948731002, - "grad_norm": 1426.180908203125, - "learning_rate": 4.980199157474884e-05, - "loss": 153.6975, - "step": 17010 - }, - { - "epoch": 0.13752535169159413, - "grad_norm": 954.7348022460938, - "learning_rate": 4.980111368252535e-05, - "loss": 116.307, - "step": 17020 - }, - { - "epoch": 0.13760615389587827, - "grad_norm": 1467.380615234375, - "learning_rate": 4.980023385625996e-05, - "loss": 168.5433, - "step": 17030 - }, - { - "epoch": 0.1376869561001624, - "grad_norm": 842.9469604492188, - "learning_rate": 4.9799352096021266e-05, - "loss": 126.3834, - "step": 17040 - }, - { - "epoch": 0.13776775830444654, - "grad_norm": 1359.48779296875, - "learning_rate": 4.979846840187804e-05, - "loss": 138.4327, - "step": 17050 - }, - { - "epoch": 0.13784856050873068, - "grad_norm": 1179.5399169921875, - "learning_rate": 4.979758277389919e-05, - "loss": 148.6625, - "step": 17060 - }, - { - "epoch": 0.1379293627130148, - "grad_norm": 1959.16455078125, - "learning_rate": 4.9796695212153764e-05, - "loss": 179.9702, - "step": 17070 - }, - { - "epoch": 0.13801016491729895, - "grad_norm": 1162.3765869140625, - "learning_rate": 4.9795805716711e-05, - "loss": 149.501, - "step": 17080 - }, - { - "epoch": 0.13809096712158309, - "grad_norm": 1537.8314208984375, - "learning_rate": 4.979491428764026e-05, - "loss": 147.3485, - "step": 17090 - }, - { - "epoch": 0.13817176932586722, - "grad_norm": 1022.4971923828125, - "learning_rate": 4.9794020925011044e-05, - "loss": 166.4088, - "step": 17100 - }, - { - "epoch": 0.13825257153015133, - "grad_norm": 993.126953125, - "learning_rate": 4.979312562889302e-05, - "loss": 148.4066, - "step": 17110 - }, - { - "epoch": 0.13833337373443547, - "grad_norm": 575.3409423828125, - "learning_rate": 4.979222839935602e-05, - "loss": 131.3587, - "step": 17120 - }, - { - "epoch": 0.1384141759387196, - "grad_norm": 1890.7564697265625, - "learning_rate": 4.979132923647001e-05, - "loss": 197.1948, - "step": 17130 - }, - { - "epoch": 0.13849497814300374, - "grad_norm": 1188.45751953125, - "learning_rate": 4.979042814030509e-05, - "loss": 143.5426, - "step": 17140 - }, - { - "epoch": 0.13857578034728787, - "grad_norm": 1359.6156005859375, - "learning_rate": 4.9789525110931545e-05, - "loss": 158.367, - "step": 17150 - }, - { - "epoch": 0.138656582551572, - "grad_norm": 1215.458251953125, - "learning_rate": 4.978862014841979e-05, - "loss": 175.4612, - "step": 17160 - }, - { - "epoch": 0.13873738475585615, - "grad_norm": 762.6837768554688, - "learning_rate": 4.97877132528404e-05, - "loss": 165.1317, - "step": 17170 - }, - { - "epoch": 0.13881818696014028, - "grad_norm": 848.4654541015625, - "learning_rate": 4.9786804424264085e-05, - "loss": 138.1874, - "step": 17180 - }, - { - "epoch": 0.13889898916442442, - "grad_norm": 884.6715698242188, - "learning_rate": 4.978589366276174e-05, - "loss": 149.637, - "step": 17190 - }, - { - "epoch": 0.13897979136870853, - "grad_norm": 1178.405517578125, - "learning_rate": 4.978498096840436e-05, - "loss": 140.1801, - "step": 17200 - }, - { - "epoch": 0.13906059357299266, - "grad_norm": 1031.2545166015625, - "learning_rate": 4.978406634126315e-05, - "loss": 134.6002, - "step": 17210 - }, - { - "epoch": 0.1391413957772768, - "grad_norm": 1731.0985107421875, - "learning_rate": 4.9783149781409404e-05, - "loss": 183.743, - "step": 17220 - }, - { - "epoch": 0.13922219798156094, - "grad_norm": 1650.5875244140625, - "learning_rate": 4.9782231288914614e-05, - "loss": 120.0381, - "step": 17230 - }, - { - "epoch": 0.13930300018584507, - "grad_norm": 1495.403076171875, - "learning_rate": 4.9781310863850405e-05, - "loss": 175.3697, - "step": 17240 - }, - { - "epoch": 0.1393838023901292, - "grad_norm": 501.58941650390625, - "learning_rate": 4.978038850628854e-05, - "loss": 133.6067, - "step": 17250 - }, - { - "epoch": 0.13946460459441334, - "grad_norm": 1925.8092041015625, - "learning_rate": 4.977946421630098e-05, - "loss": 148.8892, - "step": 17260 - }, - { - "epoch": 0.13954540679869748, - "grad_norm": 1026.5667724609375, - "learning_rate": 4.977853799395976e-05, - "loss": 140.0857, - "step": 17270 - }, - { - "epoch": 0.1396262090029816, - "grad_norm": 1062.37158203125, - "learning_rate": 4.977760983933714e-05, - "loss": 181.9256, - "step": 17280 - }, - { - "epoch": 0.13970701120726572, - "grad_norm": 553.3063354492188, - "learning_rate": 4.9776679752505476e-05, - "loss": 111.2899, - "step": 17290 - }, - { - "epoch": 0.13978781341154986, - "grad_norm": 931.499267578125, - "learning_rate": 4.977574773353732e-05, - "loss": 156.3531, - "step": 17300 - }, - { - "epoch": 0.139868615615834, - "grad_norm": 2204.541259765625, - "learning_rate": 4.9774813782505346e-05, - "loss": 162.9769, - "step": 17310 - }, - { - "epoch": 0.13994941782011813, - "grad_norm": 2574.611328125, - "learning_rate": 4.977387789948238e-05, - "loss": 130.0136, - "step": 17320 - }, - { - "epoch": 0.14003022002440227, - "grad_norm": 1076.545166015625, - "learning_rate": 4.9772940084541405e-05, - "loss": 139.3396, - "step": 17330 - }, - { - "epoch": 0.1401110222286864, - "grad_norm": 4904.904296875, - "learning_rate": 4.977200033775555e-05, - "loss": 161.8568, - "step": 17340 - }, - { - "epoch": 0.14019182443297054, - "grad_norm": 1411.177734375, - "learning_rate": 4.977105865919812e-05, - "loss": 193.977, - "step": 17350 - }, - { - "epoch": 0.14027262663725468, - "grad_norm": 761.4797973632812, - "learning_rate": 4.977011504894252e-05, - "loss": 164.1023, - "step": 17360 - }, - { - "epoch": 0.14035342884153879, - "grad_norm": 825.9505004882812, - "learning_rate": 4.9769169507062355e-05, - "loss": 167.3119, - "step": 17370 - }, - { - "epoch": 0.14043423104582292, - "grad_norm": 1150.2398681640625, - "learning_rate": 4.976822203363135e-05, - "loss": 198.5971, - "step": 17380 - }, - { - "epoch": 0.14051503325010706, - "grad_norm": 876.4376831054688, - "learning_rate": 4.9767272628723396e-05, - "loss": 133.4655, - "step": 17390 - }, - { - "epoch": 0.1405958354543912, - "grad_norm": 767.9694213867188, - "learning_rate": 4.976632129241252e-05, - "loss": 144.9063, - "step": 17400 - }, - { - "epoch": 0.14067663765867533, - "grad_norm": 1063.9190673828125, - "learning_rate": 4.976536802477293e-05, - "loss": 148.8876, - "step": 17410 - }, - { - "epoch": 0.14075743986295947, - "grad_norm": 1127.183837890625, - "learning_rate": 4.9764412825878943e-05, - "loss": 164.7263, - "step": 17420 - }, - { - "epoch": 0.1408382420672436, - "grad_norm": 1377.240478515625, - "learning_rate": 4.9763455695805056e-05, - "loss": 146.4068, - "step": 17430 - }, - { - "epoch": 0.14091904427152774, - "grad_norm": 1672.3421630859375, - "learning_rate": 4.97624966346259e-05, - "loss": 124.8671, - "step": 17440 - }, - { - "epoch": 0.14099984647581185, - "grad_norm": 771.4324951171875, - "learning_rate": 4.976153564241628e-05, - "loss": 156.0245, - "step": 17450 - }, - { - "epoch": 0.14108064868009598, - "grad_norm": 1298.169921875, - "learning_rate": 4.976057271925113e-05, - "loss": 147.4714, - "step": 17460 - }, - { - "epoch": 0.14116145088438012, - "grad_norm": 983.837646484375, - "learning_rate": 4.9759607865205534e-05, - "loss": 157.2918, - "step": 17470 - }, - { - "epoch": 0.14124225308866425, - "grad_norm": 1022.8980102539062, - "learning_rate": 4.975864108035474e-05, - "loss": 155.9393, - "step": 17480 - }, - { - "epoch": 0.1413230552929484, - "grad_norm": 946.48388671875, - "learning_rate": 4.975767236477413e-05, - "loss": 160.4426, - "step": 17490 - }, - { - "epoch": 0.14140385749723253, - "grad_norm": 1108.152099609375, - "learning_rate": 4.975670171853926e-05, - "loss": 247.9724, - "step": 17500 - }, - { - "epoch": 0.14148465970151666, - "grad_norm": 1151.1837158203125, - "learning_rate": 4.975572914172582e-05, - "loss": 148.1506, - "step": 17510 - }, - { - "epoch": 0.1415654619058008, - "grad_norm": 1201.533203125, - "learning_rate": 4.975475463440964e-05, - "loss": 158.5323, - "step": 17520 - }, - { - "epoch": 0.14164626411008494, - "grad_norm": 1211.3099365234375, - "learning_rate": 4.9753778196666737e-05, - "loss": 149.4649, - "step": 17530 - }, - { - "epoch": 0.14172706631436904, - "grad_norm": 1683.60546875, - "learning_rate": 4.975279982857324e-05, - "loss": 160.6301, - "step": 17540 - }, - { - "epoch": 0.14180786851865318, - "grad_norm": 2908.14111328125, - "learning_rate": 4.975181953020544e-05, - "loss": 141.8548, - "step": 17550 - }, - { - "epoch": 0.14188867072293732, - "grad_norm": 2263.890380859375, - "learning_rate": 4.9750837301639796e-05, - "loss": 182.0562, - "step": 17560 - }, - { - "epoch": 0.14196947292722145, - "grad_norm": 528.302978515625, - "learning_rate": 4.97498531429529e-05, - "loss": 164.7776, - "step": 17570 - }, - { - "epoch": 0.1420502751315056, - "grad_norm": 1195.1876220703125, - "learning_rate": 4.974886705422149e-05, - "loss": 143.7635, - "step": 17580 - }, - { - "epoch": 0.14213107733578972, - "grad_norm": 1085.2696533203125, - "learning_rate": 4.974787903552247e-05, - "loss": 148.5739, - "step": 17590 - }, - { - "epoch": 0.14221187954007386, - "grad_norm": 1278.501953125, - "learning_rate": 4.9746889086932895e-05, - "loss": 214.0191, - "step": 17600 - }, - { - "epoch": 0.142292681744358, - "grad_norm": 2206.2509765625, - "learning_rate": 4.9745897208529956e-05, - "loss": 132.2688, - "step": 17610 - }, - { - "epoch": 0.14237348394864213, - "grad_norm": 1119.11669921875, - "learning_rate": 4.9744903400391e-05, - "loss": 174.7807, - "step": 17620 - }, - { - "epoch": 0.14245428615292624, - "grad_norm": 1402.7005615234375, - "learning_rate": 4.9743907662593524e-05, - "loss": 147.3089, - "step": 17630 - }, - { - "epoch": 0.14253508835721038, - "grad_norm": 1532.6014404296875, - "learning_rate": 4.974290999521519e-05, - "loss": 173.4095, - "step": 17640 - }, - { - "epoch": 0.1426158905614945, - "grad_norm": 996.5668334960938, - "learning_rate": 4.974191039833378e-05, - "loss": 136.0841, - "step": 17650 - }, - { - "epoch": 0.14269669276577865, - "grad_norm": 1744.9200439453125, - "learning_rate": 4.974090887202726e-05, - "loss": 164.2604, - "step": 17660 - }, - { - "epoch": 0.14277749497006278, - "grad_norm": 999.5769653320312, - "learning_rate": 4.973990541637373e-05, - "loss": 167.7739, - "step": 17670 - }, - { - "epoch": 0.14285829717434692, - "grad_norm": 1184.4854736328125, - "learning_rate": 4.973890003145143e-05, - "loss": 162.1145, - "step": 17680 - }, - { - "epoch": 0.14293909937863106, - "grad_norm": 520.3111572265625, - "learning_rate": 4.9737892717338774e-05, - "loss": 134.3008, - "step": 17690 - }, - { - "epoch": 0.1430199015829152, - "grad_norm": 1235.701904296875, - "learning_rate": 4.973688347411431e-05, - "loss": 175.0318, - "step": 17700 - }, - { - "epoch": 0.1431007037871993, - "grad_norm": 1255.1722412109375, - "learning_rate": 4.9735872301856734e-05, - "loss": 153.4417, - "step": 17710 - }, - { - "epoch": 0.14318150599148344, - "grad_norm": 1761.381103515625, - "learning_rate": 4.9734859200644905e-05, - "loss": 169.9493, - "step": 17720 - }, - { - "epoch": 0.14326230819576757, - "grad_norm": 4513.69189453125, - "learning_rate": 4.973384417055784e-05, - "loss": 160.0505, - "step": 17730 - }, - { - "epoch": 0.1433431104000517, - "grad_norm": 906.417724609375, - "learning_rate": 4.973282721167467e-05, - "loss": 142.5354, - "step": 17740 - }, - { - "epoch": 0.14342391260433585, - "grad_norm": 1306.0880126953125, - "learning_rate": 4.9731808324074717e-05, - "loss": 146.9365, - "step": 17750 - }, - { - "epoch": 0.14350471480861998, - "grad_norm": 1127.4208984375, - "learning_rate": 4.973078750783742e-05, - "loss": 109.1662, - "step": 17760 - }, - { - "epoch": 0.14358551701290412, - "grad_norm": 2638.851318359375, - "learning_rate": 4.9729764763042394e-05, - "loss": 138.079, - "step": 17770 - }, - { - "epoch": 0.14366631921718825, - "grad_norm": 755.9400024414062, - "learning_rate": 4.97287400897694e-05, - "loss": 216.0159, - "step": 17780 - }, - { - "epoch": 0.1437471214214724, - "grad_norm": 829.8555297851562, - "learning_rate": 4.9727713488098335e-05, - "loss": 123.3962, - "step": 17790 - }, - { - "epoch": 0.1438279236257565, - "grad_norm": 2474.89453125, - "learning_rate": 4.9726684958109266e-05, - "loss": 158.4403, - "step": 17800 - }, - { - "epoch": 0.14390872583004063, - "grad_norm": 1540.85888671875, - "learning_rate": 4.972565449988239e-05, - "loss": 131.0721, - "step": 17810 - }, - { - "epoch": 0.14398952803432477, - "grad_norm": 1134.7333984375, - "learning_rate": 4.972462211349806e-05, - "loss": 134.9225, - "step": 17820 - }, - { - "epoch": 0.1440703302386089, - "grad_norm": 1411.2059326171875, - "learning_rate": 4.97235877990368e-05, - "loss": 131.4243, - "step": 17830 - }, - { - "epoch": 0.14415113244289304, - "grad_norm": 926.7616577148438, - "learning_rate": 4.972255155657925e-05, - "loss": 162.4818, - "step": 17840 - }, - { - "epoch": 0.14423193464717718, - "grad_norm": 1091.10009765625, - "learning_rate": 4.972151338620623e-05, - "loss": 119.6657, - "step": 17850 - }, - { - "epoch": 0.14431273685146132, - "grad_norm": 675.8668212890625, - "learning_rate": 4.9720473287998695e-05, - "loss": 190.9068, - "step": 17860 - }, - { - "epoch": 0.14439353905574545, - "grad_norm": 1118.227783203125, - "learning_rate": 4.9719431262037755e-05, - "loss": 126.8068, - "step": 17870 - }, - { - "epoch": 0.1444743412600296, - "grad_norm": 2237.990478515625, - "learning_rate": 4.9718387308404675e-05, - "loss": 190.5311, - "step": 17880 - }, - { - "epoch": 0.1445551434643137, - "grad_norm": 1136.5, - "learning_rate": 4.971734142718085e-05, - "loss": 171.6272, - "step": 17890 - }, - { - "epoch": 0.14463594566859783, - "grad_norm": 1384.3721923828125, - "learning_rate": 4.971629361844785e-05, - "loss": 187.1821, - "step": 17900 - }, - { - "epoch": 0.14471674787288197, - "grad_norm": 670.4478149414062, - "learning_rate": 4.9715243882287386e-05, - "loss": 164.4186, - "step": 17910 - }, - { - "epoch": 0.1447975500771661, - "grad_norm": 780.3966674804688, - "learning_rate": 4.9714192218781316e-05, - "loss": 128.9477, - "step": 17920 - }, - { - "epoch": 0.14487835228145024, - "grad_norm": 1382.050048828125, - "learning_rate": 4.9713138628011654e-05, - "loss": 149.3526, - "step": 17930 - }, - { - "epoch": 0.14495915448573438, - "grad_norm": 832.4025268554688, - "learning_rate": 4.9712083110060556e-05, - "loss": 175.8968, - "step": 17940 - }, - { - "epoch": 0.1450399566900185, - "grad_norm": 1626.2508544921875, - "learning_rate": 4.971102566501034e-05, - "loss": 143.8687, - "step": 17950 - }, - { - "epoch": 0.14512075889430265, - "grad_norm": 854.646484375, - "learning_rate": 4.9709966292943455e-05, - "loss": 127.2132, - "step": 17960 - }, - { - "epoch": 0.14520156109858676, - "grad_norm": 1088.522705078125, - "learning_rate": 4.970890499394253e-05, - "loss": 188.5076, - "step": 17970 - }, - { - "epoch": 0.1452823633028709, - "grad_norm": 1159.2142333984375, - "learning_rate": 4.9707841768090314e-05, - "loss": 104.6871, - "step": 17980 - }, - { - "epoch": 0.14536316550715503, - "grad_norm": 1308.4671630859375, - "learning_rate": 4.9706776615469716e-05, - "loss": 126.851, - "step": 17990 - }, - { - "epoch": 0.14544396771143916, - "grad_norm": 1473.3458251953125, - "learning_rate": 4.9705709536163824e-05, - "loss": 190.8592, - "step": 18000 - }, - { - "epoch": 0.1455247699157233, - "grad_norm": 1616.076904296875, - "learning_rate": 4.9704640530255826e-05, - "loss": 113.4265, - "step": 18010 - }, - { - "epoch": 0.14560557212000744, - "grad_norm": 922.9618530273438, - "learning_rate": 4.970356959782909e-05, - "loss": 166.5429, - "step": 18020 - }, - { - "epoch": 0.14568637432429157, - "grad_norm": 1396.592041015625, - "learning_rate": 4.970249673896714e-05, - "loss": 167.4586, - "step": 18030 - }, - { - "epoch": 0.1457671765285757, - "grad_norm": 896.9103393554688, - "learning_rate": 4.970142195375363e-05, - "loss": 137.7651, - "step": 18040 - }, - { - "epoch": 0.14584797873285985, - "grad_norm": 759.9065551757812, - "learning_rate": 4.970034524227238e-05, - "loss": 176.354, - "step": 18050 - }, - { - "epoch": 0.14592878093714395, - "grad_norm": 1305.7763671875, - "learning_rate": 4.9699266604607355e-05, - "loss": 181.9855, - "step": 18060 - }, - { - "epoch": 0.1460095831414281, - "grad_norm": 763.9457397460938, - "learning_rate": 4.9698186040842654e-05, - "loss": 166.7526, - "step": 18070 - }, - { - "epoch": 0.14609038534571223, - "grad_norm": 876.3093872070312, - "learning_rate": 4.9697103551062556e-05, - "loss": 112.0097, - "step": 18080 - }, - { - "epoch": 0.14617118754999636, - "grad_norm": 2863.716064453125, - "learning_rate": 4.969601913535148e-05, - "loss": 158.0161, - "step": 18090 - }, - { - "epoch": 0.1462519897542805, - "grad_norm": 890.2374267578125, - "learning_rate": 4.969493279379398e-05, - "loss": 195.66, - "step": 18100 - }, - { - "epoch": 0.14633279195856463, - "grad_norm": 2909.5830078125, - "learning_rate": 4.969384452647477e-05, - "loss": 151.6327, - "step": 18110 - }, - { - "epoch": 0.14641359416284877, - "grad_norm": 2923.943359375, - "learning_rate": 4.969275433347872e-05, - "loss": 144.4132, - "step": 18120 - }, - { - "epoch": 0.1464943963671329, - "grad_norm": 1014.2239990234375, - "learning_rate": 4.9691662214890856e-05, - "loss": 133.4733, - "step": 18130 - }, - { - "epoch": 0.14657519857141701, - "grad_norm": 1697.953857421875, - "learning_rate": 4.969056817079633e-05, - "loss": 218.5211, - "step": 18140 - }, - { - "epoch": 0.14665600077570115, - "grad_norm": 1076.7310791015625, - "learning_rate": 4.968947220128045e-05, - "loss": 136.3602, - "step": 18150 - }, - { - "epoch": 0.1467368029799853, - "grad_norm": 1023.2250366210938, - "learning_rate": 4.9688374306428696e-05, - "loss": 167.6447, - "step": 18160 - }, - { - "epoch": 0.14681760518426942, - "grad_norm": 1222.014892578125, - "learning_rate": 4.968727448632669e-05, - "loss": 150.6737, - "step": 18170 - }, - { - "epoch": 0.14689840738855356, - "grad_norm": 1008.44921875, - "learning_rate": 4.968617274106019e-05, - "loss": 147.6667, - "step": 18180 - }, - { - "epoch": 0.1469792095928377, - "grad_norm": 663.5960693359375, - "learning_rate": 4.9685069070715106e-05, - "loss": 144.4962, - "step": 18190 - }, - { - "epoch": 0.14706001179712183, - "grad_norm": 562.7923583984375, - "learning_rate": 4.968396347537751e-05, - "loss": 133.3471, - "step": 18200 - }, - { - "epoch": 0.14714081400140597, - "grad_norm": 1160.2293701171875, - "learning_rate": 4.9682855955133625e-05, - "loss": 138.87, - "step": 18210 - }, - { - "epoch": 0.1472216162056901, - "grad_norm": 1147.7430419921875, - "learning_rate": 4.9681746510069805e-05, - "loss": 165.9871, - "step": 18220 - }, - { - "epoch": 0.1473024184099742, - "grad_norm": 1126.2359619140625, - "learning_rate": 4.9680635140272575e-05, - "loss": 145.725, - "step": 18230 - }, - { - "epoch": 0.14738322061425835, - "grad_norm": 629.0250854492188, - "learning_rate": 4.9679521845828604e-05, - "loss": 123.7119, - "step": 18240 - }, - { - "epoch": 0.14746402281854248, - "grad_norm": 769.4390869140625, - "learning_rate": 4.96784066268247e-05, - "loss": 110.2391, - "step": 18250 - }, - { - "epoch": 0.14754482502282662, - "grad_norm": 1543.56494140625, - "learning_rate": 4.967728948334784e-05, - "loss": 153.0158, - "step": 18260 - }, - { - "epoch": 0.14762562722711076, - "grad_norm": 729.558349609375, - "learning_rate": 4.967617041548513e-05, - "loss": 143.14, - "step": 18270 - }, - { - "epoch": 0.1477064294313949, - "grad_norm": 919.5125122070312, - "learning_rate": 4.967504942332385e-05, - "loss": 143.3715, - "step": 18280 - }, - { - "epoch": 0.14778723163567903, - "grad_norm": 743.6918334960938, - "learning_rate": 4.9673926506951404e-05, - "loss": 195.5614, - "step": 18290 - }, - { - "epoch": 0.14786803383996316, - "grad_norm": 1108.04931640625, - "learning_rate": 4.967280166645538e-05, - "loss": 136.592, - "step": 18300 - }, - { - "epoch": 0.1479488360442473, - "grad_norm": 736.45654296875, - "learning_rate": 4.967167490192347e-05, - "loss": 178.5675, - "step": 18310 - }, - { - "epoch": 0.1480296382485314, - "grad_norm": 1075.8597412109375, - "learning_rate": 4.967054621344356e-05, - "loss": 144.5254, - "step": 18320 - }, - { - "epoch": 0.14811044045281554, - "grad_norm": 2959.510498046875, - "learning_rate": 4.966941560110366e-05, - "loss": 184.1778, - "step": 18330 - }, - { - "epoch": 0.14819124265709968, - "grad_norm": 739.1339721679688, - "learning_rate": 4.966828306499193e-05, - "loss": 138.4536, - "step": 18340 - }, - { - "epoch": 0.14827204486138382, - "grad_norm": 832.9607543945312, - "learning_rate": 4.96671486051967e-05, - "loss": 128.8362, - "step": 18350 - }, - { - "epoch": 0.14835284706566795, - "grad_norm": 452.4673767089844, - "learning_rate": 4.9666012221806434e-05, - "loss": 192.8594, - "step": 18360 - }, - { - "epoch": 0.1484336492699521, - "grad_norm": 603.176513671875, - "learning_rate": 4.9664873914909755e-05, - "loss": 136.2663, - "step": 18370 - }, - { - "epoch": 0.14851445147423623, - "grad_norm": 1999.5748291015625, - "learning_rate": 4.966373368459541e-05, - "loss": 171.2875, - "step": 18380 - }, - { - "epoch": 0.14859525367852036, - "grad_norm": 1634.054443359375, - "learning_rate": 4.966259153095235e-05, - "loss": 135.1655, - "step": 18390 - }, - { - "epoch": 0.14867605588280447, - "grad_norm": 1002.5790405273438, - "learning_rate": 4.966144745406961e-05, - "loss": 137.1475, - "step": 18400 - }, - { - "epoch": 0.1487568580870886, - "grad_norm": 1454.2562255859375, - "learning_rate": 4.966030145403642e-05, - "loss": 143.3825, - "step": 18410 - }, - { - "epoch": 0.14883766029137274, - "grad_norm": 1260.619140625, - "learning_rate": 4.965915353094215e-05, - "loss": 170.3403, - "step": 18420 - }, - { - "epoch": 0.14891846249565688, - "grad_norm": 656.329345703125, - "learning_rate": 4.965800368487632e-05, - "loss": 129.0547, - "step": 18430 - }, - { - "epoch": 0.14899926469994101, - "grad_norm": 895.2418823242188, - "learning_rate": 4.965685191592859e-05, - "loss": 153.7566, - "step": 18440 - }, - { - "epoch": 0.14908006690422515, - "grad_norm": 1156.1583251953125, - "learning_rate": 4.965569822418877e-05, - "loss": 136.3192, - "step": 18450 - }, - { - "epoch": 0.1491608691085093, - "grad_norm": 822.8226928710938, - "learning_rate": 4.965454260974685e-05, - "loss": 134.8324, - "step": 18460 - }, - { - "epoch": 0.14924167131279342, - "grad_norm": 1393.2203369140625, - "learning_rate": 4.965338507269294e-05, - "loss": 147.69, - "step": 18470 - }, - { - "epoch": 0.14932247351707756, - "grad_norm": 1577.3673095703125, - "learning_rate": 4.9652225613117284e-05, - "loss": 195.3279, - "step": 18480 - }, - { - "epoch": 0.14940327572136167, - "grad_norm": 1152.5206298828125, - "learning_rate": 4.965106423111033e-05, - "loss": 176.9082, - "step": 18490 - }, - { - "epoch": 0.1494840779256458, - "grad_norm": 1685.4100341796875, - "learning_rate": 4.964990092676263e-05, - "loss": 178.6365, - "step": 18500 - }, - { - "epoch": 0.14956488012992994, - "grad_norm": 1178.341796875, - "learning_rate": 4.9648735700164895e-05, - "loss": 141.5229, - "step": 18510 - }, - { - "epoch": 0.14964568233421408, - "grad_norm": 1358.003662109375, - "learning_rate": 4.964756855140801e-05, - "loss": 135.0068, - "step": 18520 - }, - { - "epoch": 0.1497264845384982, - "grad_norm": 1388.4532470703125, - "learning_rate": 4.964639948058297e-05, - "loss": 157.6969, - "step": 18530 - }, - { - "epoch": 0.14980728674278235, - "grad_norm": 539.1663818359375, - "learning_rate": 4.964522848778096e-05, - "loss": 137.0673, - "step": 18540 - }, - { - "epoch": 0.14988808894706648, - "grad_norm": 1940.96533203125, - "learning_rate": 4.964405557309328e-05, - "loss": 134.2935, - "step": 18550 - }, - { - "epoch": 0.14996889115135062, - "grad_norm": 1246.306396484375, - "learning_rate": 4.964288073661142e-05, - "loss": 151.5028, - "step": 18560 - }, - { - "epoch": 0.15004969335563473, - "grad_norm": 2533.47607421875, - "learning_rate": 4.964170397842697e-05, - "loss": 207.9403, - "step": 18570 - }, - { - "epoch": 0.15013049555991886, - "grad_norm": 1079.6358642578125, - "learning_rate": 4.964052529863171e-05, - "loss": 150.4151, - "step": 18580 - }, - { - "epoch": 0.150211297764203, - "grad_norm": 1041.2977294921875, - "learning_rate": 4.963934469731756e-05, - "loss": 151.485, - "step": 18590 - }, - { - "epoch": 0.15029209996848714, - "grad_norm": 1581.884033203125, - "learning_rate": 4.963816217457657e-05, - "loss": 169.047, - "step": 18600 - }, - { - "epoch": 0.15037290217277127, - "grad_norm": 1402.7991943359375, - "learning_rate": 4.963697773050097e-05, - "loss": 176.3163, - "step": 18610 - }, - { - "epoch": 0.1504537043770554, - "grad_norm": 683.5438842773438, - "learning_rate": 4.963579136518312e-05, - "loss": 95.8959, - "step": 18620 - }, - { - "epoch": 0.15053450658133954, - "grad_norm": 1199.534423828125, - "learning_rate": 4.963460307871553e-05, - "loss": 170.5686, - "step": 18630 - }, - { - "epoch": 0.15061530878562368, - "grad_norm": 1392.210205078125, - "learning_rate": 4.9633412871190873e-05, - "loss": 153.4607, - "step": 18640 - }, - { - "epoch": 0.15069611098990782, - "grad_norm": 995.7383422851562, - "learning_rate": 4.9632220742701965e-05, - "loss": 154.5092, - "step": 18650 - }, - { - "epoch": 0.15077691319419192, - "grad_norm": 961.5668334960938, - "learning_rate": 4.9631026693341764e-05, - "loss": 103.7251, - "step": 18660 - }, - { - "epoch": 0.15085771539847606, - "grad_norm": 671.3668823242188, - "learning_rate": 4.9629830723203384e-05, - "loss": 146.025, - "step": 18670 - }, - { - "epoch": 0.1509385176027602, - "grad_norm": 1794.40283203125, - "learning_rate": 4.96286328323801e-05, - "loss": 164.6229, - "step": 18680 - }, - { - "epoch": 0.15101931980704433, - "grad_norm": 1768.9986572265625, - "learning_rate": 4.9627433020965314e-05, - "loss": 161.9986, - "step": 18690 - }, - { - "epoch": 0.15110012201132847, - "grad_norm": 754.0249633789062, - "learning_rate": 4.9626231289052596e-05, - "loss": 126.9793, - "step": 18700 - }, - { - "epoch": 0.1511809242156126, - "grad_norm": 1145.47265625, - "learning_rate": 4.962502763673565e-05, - "loss": 173.8803, - "step": 18710 - }, - { - "epoch": 0.15126172641989674, - "grad_norm": 793.9984130859375, - "learning_rate": 4.9623822064108364e-05, - "loss": 126.0002, - "step": 18720 - }, - { - "epoch": 0.15134252862418088, - "grad_norm": 864.0772705078125, - "learning_rate": 4.9622614571264715e-05, - "loss": 143.4308, - "step": 18730 - }, - { - "epoch": 0.151423330828465, - "grad_norm": 1112.0252685546875, - "learning_rate": 4.96214051582989e-05, - "loss": 109.9276, - "step": 18740 - }, - { - "epoch": 0.15150413303274912, - "grad_norm": 922.2892456054688, - "learning_rate": 4.962019382530521e-05, - "loss": 155.0567, - "step": 18750 - }, - { - "epoch": 0.15158493523703326, - "grad_norm": 814.68798828125, - "learning_rate": 4.96189805723781e-05, - "loss": 125.7654, - "step": 18760 - }, - { - "epoch": 0.1516657374413174, - "grad_norm": 974.1572875976562, - "learning_rate": 4.961776539961222e-05, - "loss": 129.9775, - "step": 18770 - }, - { - "epoch": 0.15174653964560153, - "grad_norm": 1688.48974609375, - "learning_rate": 4.961654830710229e-05, - "loss": 129.0737, - "step": 18780 - }, - { - "epoch": 0.15182734184988567, - "grad_norm": 837.8389892578125, - "learning_rate": 4.961532929494325e-05, - "loss": 139.8685, - "step": 18790 - }, - { - "epoch": 0.1519081440541698, - "grad_norm": 1166.718994140625, - "learning_rate": 4.9614108363230135e-05, - "loss": 160.1121, - "step": 18800 - }, - { - "epoch": 0.15198894625845394, - "grad_norm": 578.4459228515625, - "learning_rate": 4.961288551205818e-05, - "loss": 126.9589, - "step": 18810 - }, - { - "epoch": 0.15206974846273807, - "grad_norm": 1691.2015380859375, - "learning_rate": 4.961166074152274e-05, - "loss": 185.6235, - "step": 18820 - }, - { - "epoch": 0.15215055066702218, - "grad_norm": 1280.07470703125, - "learning_rate": 4.961043405171931e-05, - "loss": 130.3408, - "step": 18830 - }, - { - "epoch": 0.15223135287130632, - "grad_norm": 1204.08056640625, - "learning_rate": 4.9609205442743566e-05, - "loss": 142.9045, - "step": 18840 - }, - { - "epoch": 0.15231215507559046, - "grad_norm": 1418.345947265625, - "learning_rate": 4.9607974914691316e-05, - "loss": 139.4245, - "step": 18850 - }, - { - "epoch": 0.1523929572798746, - "grad_norm": 1319.41748046875, - "learning_rate": 4.960674246765851e-05, - "loss": 127.2336, - "step": 18860 - }, - { - "epoch": 0.15247375948415873, - "grad_norm": 1763.4229736328125, - "learning_rate": 4.960550810174126e-05, - "loss": 103.9958, - "step": 18870 - }, - { - "epoch": 0.15255456168844286, - "grad_norm": 1491.818115234375, - "learning_rate": 4.9604271817035834e-05, - "loss": 139.6183, - "step": 18880 - }, - { - "epoch": 0.152635363892727, - "grad_norm": 1419.6324462890625, - "learning_rate": 4.9603033613638626e-05, - "loss": 191.6112, - "step": 18890 - }, - { - "epoch": 0.15271616609701114, - "grad_norm": 807.424560546875, - "learning_rate": 4.960179349164621e-05, - "loss": 118.863, - "step": 18900 - }, - { - "epoch": 0.15279696830129527, - "grad_norm": 1491.949951171875, - "learning_rate": 4.9600551451155274e-05, - "loss": 168.4407, - "step": 18910 - }, - { - "epoch": 0.15287777050557938, - "grad_norm": 1058.746337890625, - "learning_rate": 4.959930749226269e-05, - "loss": 196.2374, - "step": 18920 - }, - { - "epoch": 0.15295857270986352, - "grad_norm": 976.9373168945312, - "learning_rate": 4.959806161506545e-05, - "loss": 144.424, - "step": 18930 - }, - { - "epoch": 0.15303937491414765, - "grad_norm": 1131.5087890625, - "learning_rate": 4.959681381966073e-05, - "loss": 157.0084, - "step": 18940 - }, - { - "epoch": 0.1531201771184318, - "grad_norm": 482.86053466796875, - "learning_rate": 4.959556410614582e-05, - "loss": 97.7862, - "step": 18950 - }, - { - "epoch": 0.15320097932271592, - "grad_norm": 1314.1749267578125, - "learning_rate": 4.9594312474618175e-05, - "loss": 117.1681, - "step": 18960 - }, - { - "epoch": 0.15328178152700006, - "grad_norm": 1176.4951171875, - "learning_rate": 4.9593058925175406e-05, - "loss": 149.3221, - "step": 18970 - }, - { - "epoch": 0.1533625837312842, - "grad_norm": 693.3486328125, - "learning_rate": 4.959180345791528e-05, - "loss": 124.6155, - "step": 18980 - }, - { - "epoch": 0.15344338593556833, - "grad_norm": 912.6985473632812, - "learning_rate": 4.959054607293567e-05, - "loss": 166.8624, - "step": 18990 - }, - { - "epoch": 0.15352418813985247, - "grad_norm": 1163.0361328125, - "learning_rate": 4.9589286770334654e-05, - "loss": 126.0562, - "step": 19000 - }, - { - "epoch": 0.15360499034413658, - "grad_norm": 704.7905883789062, - "learning_rate": 4.958802555021042e-05, - "loss": 134.0688, - "step": 19010 - }, - { - "epoch": 0.1536857925484207, - "grad_norm": 1288.105712890625, - "learning_rate": 4.9586762412661333e-05, - "loss": 139.363, - "step": 19020 - }, - { - "epoch": 0.15376659475270485, - "grad_norm": 1211.3101806640625, - "learning_rate": 4.958549735778589e-05, - "loss": 139.7495, - "step": 19030 - }, - { - "epoch": 0.15384739695698899, - "grad_norm": 720.6503295898438, - "learning_rate": 4.958423038568274e-05, - "loss": 127.6737, - "step": 19040 - }, - { - "epoch": 0.15392819916127312, - "grad_norm": 1007.2885131835938, - "learning_rate": 4.958296149645069e-05, - "loss": 158.6584, - "step": 19050 - }, - { - "epoch": 0.15400900136555726, - "grad_norm": 586.2289428710938, - "learning_rate": 4.958169069018869e-05, - "loss": 150.9298, - "step": 19060 - }, - { - "epoch": 0.1540898035698414, - "grad_norm": 773.6448364257812, - "learning_rate": 4.958041796699583e-05, - "loss": 126.1137, - "step": 19070 - }, - { - "epoch": 0.15417060577412553, - "grad_norm": 912.1433715820312, - "learning_rate": 4.957914332697137e-05, - "loss": 130.7233, - "step": 19080 - }, - { - "epoch": 0.15425140797840964, - "grad_norm": 783.219482421875, - "learning_rate": 4.957786677021471e-05, - "loss": 135.7538, - "step": 19090 - }, - { - "epoch": 0.15433221018269377, - "grad_norm": 1783.0704345703125, - "learning_rate": 4.9576588296825386e-05, - "loss": 105.7392, - "step": 19100 - }, - { - "epoch": 0.1544130123869779, - "grad_norm": 660.9375, - "learning_rate": 4.957530790690311e-05, - "loss": 137.4091, - "step": 19110 - }, - { - "epoch": 0.15449381459126205, - "grad_norm": 1616.544189453125, - "learning_rate": 4.957402560054773e-05, - "loss": 190.3333, - "step": 19120 - }, - { - "epoch": 0.15457461679554618, - "grad_norm": 751.7031860351562, - "learning_rate": 4.957274137785922e-05, - "loss": 179.2467, - "step": 19130 - }, - { - "epoch": 0.15465541899983032, - "grad_norm": 1965.0093994140625, - "learning_rate": 4.957145523893776e-05, - "loss": 162.9892, - "step": 19140 - }, - { - "epoch": 0.15473622120411445, - "grad_norm": 1119.140869140625, - "learning_rate": 4.957016718388362e-05, - "loss": 154.8354, - "step": 19150 - }, - { - "epoch": 0.1548170234083986, - "grad_norm": 1030.7235107421875, - "learning_rate": 4.956887721279726e-05, - "loss": 170.8057, - "step": 19160 - }, - { - "epoch": 0.15489782561268273, - "grad_norm": 701.3992919921875, - "learning_rate": 4.956758532577926e-05, - "loss": 119.7391, - "step": 19170 - }, - { - "epoch": 0.15497862781696684, - "grad_norm": 1213.8740234375, - "learning_rate": 4.9566291522930375e-05, - "loss": 114.7812, - "step": 19180 - }, - { - "epoch": 0.15505943002125097, - "grad_norm": 1067.879150390625, - "learning_rate": 4.95649958043515e-05, - "loss": 127.6939, - "step": 19190 - }, - { - "epoch": 0.1551402322255351, - "grad_norm": 1341.472412109375, - "learning_rate": 4.9563698170143666e-05, - "loss": 131.0653, - "step": 19200 - }, - { - "epoch": 0.15522103442981924, - "grad_norm": 878.7048950195312, - "learning_rate": 4.956239862040808e-05, - "loss": 193.8155, - "step": 19210 - }, - { - "epoch": 0.15530183663410338, - "grad_norm": 1413.94091796875, - "learning_rate": 4.956109715524608e-05, - "loss": 167.4405, - "step": 19220 - }, - { - "epoch": 0.15538263883838752, - "grad_norm": 1081.027587890625, - "learning_rate": 4.955979377475915e-05, - "loss": 146.3277, - "step": 19230 - }, - { - "epoch": 0.15546344104267165, - "grad_norm": 1057.214111328125, - "learning_rate": 4.955848847904894e-05, - "loss": 160.2817, - "step": 19240 - }, - { - "epoch": 0.1555442432469558, - "grad_norm": 1322.79150390625, - "learning_rate": 4.9557181268217227e-05, - "loss": 171.6942, - "step": 19250 - }, - { - "epoch": 0.1556250454512399, - "grad_norm": 666.101806640625, - "learning_rate": 4.9555872142365945e-05, - "loss": 165.4335, - "step": 19260 - }, - { - "epoch": 0.15570584765552403, - "grad_norm": 630.2947387695312, - "learning_rate": 4.9554561101597206e-05, - "loss": 139.2407, - "step": 19270 - }, - { - "epoch": 0.15578664985980817, - "grad_norm": 1148.80859375, - "learning_rate": 4.955324814601324e-05, - "loss": 121.6671, - "step": 19280 - }, - { - "epoch": 0.1558674520640923, - "grad_norm": 852.5416870117188, - "learning_rate": 4.955193327571642e-05, - "loss": 143.8562, - "step": 19290 - }, - { - "epoch": 0.15594825426837644, - "grad_norm": 685.1456298828125, - "learning_rate": 4.95506164908093e-05, - "loss": 164.1034, - "step": 19300 - }, - { - "epoch": 0.15602905647266058, - "grad_norm": 998.1287231445312, - "learning_rate": 4.954929779139455e-05, - "loss": 138.9662, - "step": 19310 - }, - { - "epoch": 0.1561098586769447, - "grad_norm": 814.4309692382812, - "learning_rate": 4.9547977177575014e-05, - "loss": 160.1065, - "step": 19320 - }, - { - "epoch": 0.15619066088122885, - "grad_norm": 425.35919189453125, - "learning_rate": 4.9546654649453675e-05, - "loss": 123.6092, - "step": 19330 - }, - { - "epoch": 0.15627146308551298, - "grad_norm": 507.13641357421875, - "learning_rate": 4.9545330207133664e-05, - "loss": 177.7387, - "step": 19340 - }, - { - "epoch": 0.1563522652897971, - "grad_norm": 924.7611694335938, - "learning_rate": 4.9544003850718266e-05, - "loss": 166.7888, - "step": 19350 - }, - { - "epoch": 0.15643306749408123, - "grad_norm": 1208.8778076171875, - "learning_rate": 4.954267558031092e-05, - "loss": 98.5059, - "step": 19360 - }, - { - "epoch": 0.15651386969836537, - "grad_norm": 1196.059326171875, - "learning_rate": 4.9541345396015193e-05, - "loss": 152.097, - "step": 19370 - }, - { - "epoch": 0.1565946719026495, - "grad_norm": 1203.1712646484375, - "learning_rate": 4.9540013297934826e-05, - "loss": 138.9323, - "step": 19380 - }, - { - "epoch": 0.15667547410693364, - "grad_norm": 2048.553466796875, - "learning_rate": 4.9538679286173696e-05, - "loss": 137.2229, - "step": 19390 - }, - { - "epoch": 0.15675627631121777, - "grad_norm": 1005.4833374023438, - "learning_rate": 4.953734336083583e-05, - "loss": 127.1456, - "step": 19400 - }, - { - "epoch": 0.1568370785155019, - "grad_norm": 1367.4559326171875, - "learning_rate": 4.95360055220254e-05, - "loss": 156.0823, - "step": 19410 - }, - { - "epoch": 0.15691788071978605, - "grad_norm": 990.8677978515625, - "learning_rate": 4.953466576984675e-05, - "loss": 142.3487, - "step": 19420 - }, - { - "epoch": 0.15699868292407018, - "grad_norm": 1195.5384521484375, - "learning_rate": 4.953332410440435e-05, - "loss": 135.7667, - "step": 19430 - }, - { - "epoch": 0.1570794851283543, - "grad_norm": 323.1562805175781, - "learning_rate": 4.953198052580281e-05, - "loss": 146.5829, - "step": 19440 - }, - { - "epoch": 0.15716028733263843, - "grad_norm": 623.1360473632812, - "learning_rate": 4.953063503414692e-05, - "loss": 119.0924, - "step": 19450 - }, - { - "epoch": 0.15724108953692256, - "grad_norm": 970.6582641601562, - "learning_rate": 4.952928762954161e-05, - "loss": 138.0683, - "step": 19460 - }, - { - "epoch": 0.1573218917412067, - "grad_norm": 1314.8089599609375, - "learning_rate": 4.952793831209195e-05, - "loss": 153.499, - "step": 19470 - }, - { - "epoch": 0.15740269394549083, - "grad_norm": 1902.4688720703125, - "learning_rate": 4.9526587081903145e-05, - "loss": 148.2058, - "step": 19480 - }, - { - "epoch": 0.15748349614977497, - "grad_norm": 853.5953979492188, - "learning_rate": 4.952523393908059e-05, - "loss": 157.7026, - "step": 19490 - }, - { - "epoch": 0.1575642983540591, - "grad_norm": 918.8594970703125, - "learning_rate": 4.952387888372979e-05, - "loss": 166.0788, - "step": 19500 - }, - { - "epoch": 0.15764510055834324, - "grad_norm": 929.1913452148438, - "learning_rate": 4.952252191595643e-05, - "loss": 120.1585, - "step": 19510 - }, - { - "epoch": 0.15772590276262735, - "grad_norm": 921.7418823242188, - "learning_rate": 4.952116303586631e-05, - "loss": 146.7802, - "step": 19520 - }, - { - "epoch": 0.1578067049669115, - "grad_norm": 967.5203247070312, - "learning_rate": 4.9519802243565414e-05, - "loss": 149.0987, - "step": 19530 - }, - { - "epoch": 0.15788750717119562, - "grad_norm": 716.4798583984375, - "learning_rate": 4.951843953915985e-05, - "loss": 153.834, - "step": 19540 - }, - { - "epoch": 0.15796830937547976, - "grad_norm": 1175.443603515625, - "learning_rate": 4.951707492275589e-05, - "loss": 146.1824, - "step": 19550 - }, - { - "epoch": 0.1580491115797639, - "grad_norm": 1372.8192138671875, - "learning_rate": 4.951570839445995e-05, - "loss": 145.7422, - "step": 19560 - }, - { - "epoch": 0.15812991378404803, - "grad_norm": 727.4046630859375, - "learning_rate": 4.951433995437859e-05, - "loss": 130.863, - "step": 19570 - }, - { - "epoch": 0.15821071598833217, - "grad_norm": 1707.0059814453125, - "learning_rate": 4.951296960261853e-05, - "loss": 135.7865, - "step": 19580 - }, - { - "epoch": 0.1582915181926163, - "grad_norm": 1369.9830322265625, - "learning_rate": 4.951159733928663e-05, - "loss": 131.4685, - "step": 19590 - }, - { - "epoch": 0.15837232039690044, - "grad_norm": 1806.6419677734375, - "learning_rate": 4.95102231644899e-05, - "loss": 110.966, - "step": 19600 - }, - { - "epoch": 0.15845312260118455, - "grad_norm": 1035.0638427734375, - "learning_rate": 4.9508847078335495e-05, - "loss": 138.5729, - "step": 19610 - }, - { - "epoch": 0.15853392480546868, - "grad_norm": 1077.9068603515625, - "learning_rate": 4.9507469080930734e-05, - "loss": 146.8132, - "step": 19620 - }, - { - "epoch": 0.15861472700975282, - "grad_norm": 778.3943481445312, - "learning_rate": 4.950608917238308e-05, - "loss": 107.0532, - "step": 19630 - }, - { - "epoch": 0.15869552921403696, - "grad_norm": 554.7048950195312, - "learning_rate": 4.9504707352800125e-05, - "loss": 135.2163, - "step": 19640 - }, - { - "epoch": 0.1587763314183211, - "grad_norm": 1492.74365234375, - "learning_rate": 4.9503323622289655e-05, - "loss": 119.3357, - "step": 19650 - }, - { - "epoch": 0.15885713362260523, - "grad_norm": 726.6300048828125, - "learning_rate": 4.9501937980959545e-05, - "loss": 113.2614, - "step": 19660 - }, - { - "epoch": 0.15893793582688936, - "grad_norm": 1585.4371337890625, - "learning_rate": 4.950055042891786e-05, - "loss": 137.945, - "step": 19670 - }, - { - "epoch": 0.1590187380311735, - "grad_norm": 1734.0233154296875, - "learning_rate": 4.949916096627282e-05, - "loss": 191.3335, - "step": 19680 - }, - { - "epoch": 0.15909954023545764, - "grad_norm": 1037.6551513671875, - "learning_rate": 4.949776959313275e-05, - "loss": 148.4212, - "step": 19690 - }, - { - "epoch": 0.15918034243974175, - "grad_norm": 960.6198120117188, - "learning_rate": 4.949637630960617e-05, - "loss": 145.6399, - "step": 19700 - }, - { - "epoch": 0.15926114464402588, - "grad_norm": 1109.8087158203125, - "learning_rate": 4.949498111580174e-05, - "loss": 143.8584, - "step": 19710 - }, - { - "epoch": 0.15934194684831002, - "grad_norm": 996.52294921875, - "learning_rate": 4.949358401182824e-05, - "loss": 151.3598, - "step": 19720 - }, - { - "epoch": 0.15942274905259415, - "grad_norm": 910.0574340820312, - "learning_rate": 4.9492184997794624e-05, - "loss": 188.6052, - "step": 19730 - }, - { - "epoch": 0.1595035512568783, - "grad_norm": 1397.9278564453125, - "learning_rate": 4.949078407381e-05, - "loss": 142.9454, - "step": 19740 - }, - { - "epoch": 0.15958435346116243, - "grad_norm": 966.9539184570312, - "learning_rate": 4.94893812399836e-05, - "loss": 116.4149, - "step": 19750 - }, - { - "epoch": 0.15966515566544656, - "grad_norm": 678.293701171875, - "learning_rate": 4.948797649642484e-05, - "loss": 128.2683, - "step": 19760 - }, - { - "epoch": 0.1597459578697307, - "grad_norm": 685.981201171875, - "learning_rate": 4.9486569843243244e-05, - "loss": 137.4343, - "step": 19770 - }, - { - "epoch": 0.1598267600740148, - "grad_norm": 1091.409423828125, - "learning_rate": 4.948516128054852e-05, - "loss": 166.9283, - "step": 19780 - }, - { - "epoch": 0.15990756227829894, - "grad_norm": 1352.7330322265625, - "learning_rate": 4.94837508084505e-05, - "loss": 166.8614, - "step": 19790 - }, - { - "epoch": 0.15998836448258308, - "grad_norm": 805.1770629882812, - "learning_rate": 4.948233842705919e-05, - "loss": 154.0036, - "step": 19800 - }, - { - "epoch": 0.16006916668686721, - "grad_norm": 665.4652709960938, - "learning_rate": 4.948092413648471e-05, - "loss": 171.7432, - "step": 19810 - }, - { - "epoch": 0.16014996889115135, - "grad_norm": 1816.2601318359375, - "learning_rate": 4.9479507936837364e-05, - "loss": 188.3652, - "step": 19820 - }, - { - "epoch": 0.1602307710954355, - "grad_norm": 1458.8013916015625, - "learning_rate": 4.947808982822759e-05, - "loss": 156.6303, - "step": 19830 - }, - { - "epoch": 0.16031157329971962, - "grad_norm": 778.2112426757812, - "learning_rate": 4.947666981076597e-05, - "loss": 144.638, - "step": 19840 - }, - { - "epoch": 0.16039237550400376, - "grad_norm": 925.9711303710938, - "learning_rate": 4.947524788456325e-05, - "loss": 97.0869, - "step": 19850 - }, - { - "epoch": 0.1604731777082879, - "grad_norm": 1024.874755859375, - "learning_rate": 4.94738240497303e-05, - "loss": 154.9577, - "step": 19860 - }, - { - "epoch": 0.160553979912572, - "grad_norm": 1501.3096923828125, - "learning_rate": 4.947239830637815e-05, - "loss": 134.6487, - "step": 19870 - }, - { - "epoch": 0.16063478211685614, - "grad_norm": 690.8103637695312, - "learning_rate": 4.947097065461801e-05, - "loss": 150.3672, - "step": 19880 - }, - { - "epoch": 0.16071558432114028, - "grad_norm": 1367.2078857421875, - "learning_rate": 4.946954109456118e-05, - "loss": 185.5702, - "step": 19890 - }, - { - "epoch": 0.1607963865254244, - "grad_norm": 1530.769287109375, - "learning_rate": 4.946810962631916e-05, - "loss": 107.1337, - "step": 19900 - }, - { - "epoch": 0.16087718872970855, - "grad_norm": 974.5053100585938, - "learning_rate": 4.9466676250003576e-05, - "loss": 145.1648, - "step": 19910 - }, - { - "epoch": 0.16095799093399268, - "grad_norm": 1430.806640625, - "learning_rate": 4.9465240965726195e-05, - "loss": 164.3391, - "step": 19920 - }, - { - "epoch": 0.16103879313827682, - "grad_norm": 559.2431030273438, - "learning_rate": 4.946380377359895e-05, - "loss": 101.8703, - "step": 19930 - }, - { - "epoch": 0.16111959534256096, - "grad_norm": 981.9447631835938, - "learning_rate": 4.946236467373392e-05, - "loss": 137.5427, - "step": 19940 - }, - { - "epoch": 0.16120039754684506, - "grad_norm": 773.423583984375, - "learning_rate": 4.946092366624333e-05, - "loss": 150.1945, - "step": 19950 - }, - { - "epoch": 0.1612811997511292, - "grad_norm": 508.3699645996094, - "learning_rate": 4.945948075123954e-05, - "loss": 149.6164, - "step": 19960 - }, - { - "epoch": 0.16136200195541334, - "grad_norm": 742.3327026367188, - "learning_rate": 4.945803592883509e-05, - "loss": 171.7382, - "step": 19970 - }, - { - "epoch": 0.16144280415969747, - "grad_norm": 1495.3865966796875, - "learning_rate": 4.9456589199142637e-05, - "loss": 167.0686, - "step": 19980 - }, - { - "epoch": 0.1615236063639816, - "grad_norm": 1066.609130859375, - "learning_rate": 4.9455140562274995e-05, - "loss": 179.7604, - "step": 19990 - }, - { - "epoch": 0.16160440856826574, - "grad_norm": 1205.5853271484375, - "learning_rate": 4.9453690018345144e-05, - "loss": 153.549, - "step": 20000 - }, - { - "epoch": 0.16168521077254988, - "grad_norm": 1061.2216796875, - "learning_rate": 4.9452237567466194e-05, - "loss": 127.6706, - "step": 20010 - }, - { - "epoch": 0.16176601297683402, - "grad_norm": 733.6787109375, - "learning_rate": 4.945078320975142e-05, - "loss": 127.7049, - "step": 20020 - }, - { - "epoch": 0.16184681518111815, - "grad_norm": 2328.76171875, - "learning_rate": 4.944932694531422e-05, - "loss": 149.9511, - "step": 20030 - }, - { - "epoch": 0.16192761738540226, - "grad_norm": 1744.631591796875, - "learning_rate": 4.9447868774268166e-05, - "loss": 127.2936, - "step": 20040 - }, - { - "epoch": 0.1620084195896864, - "grad_norm": 1439.965576171875, - "learning_rate": 4.9446408696726974e-05, - "loss": 121.25, - "step": 20050 - }, - { - "epoch": 0.16208922179397053, - "grad_norm": 794.9253540039062, - "learning_rate": 4.9444946712804494e-05, - "loss": 145.7818, - "step": 20060 - }, - { - "epoch": 0.16217002399825467, - "grad_norm": 712.621337890625, - "learning_rate": 4.944348282261474e-05, - "loss": 201.6032, - "step": 20070 - }, - { - "epoch": 0.1622508262025388, - "grad_norm": 1319.3455810546875, - "learning_rate": 4.9442017026271864e-05, - "loss": 165.4126, - "step": 20080 - }, - { - "epoch": 0.16233162840682294, - "grad_norm": 745.81396484375, - "learning_rate": 4.9440549323890176e-05, - "loss": 156.9648, - "step": 20090 - }, - { - "epoch": 0.16241243061110708, - "grad_norm": 699.705810546875, - "learning_rate": 4.9439079715584135e-05, - "loss": 178.4161, - "step": 20100 - }, - { - "epoch": 0.16249323281539121, - "grad_norm": 645.7938842773438, - "learning_rate": 4.9437608201468336e-05, - "loss": 142.2565, - "step": 20110 - }, - { - "epoch": 0.16257403501967535, - "grad_norm": 900.8884887695312, - "learning_rate": 4.943613478165753e-05, - "loss": 116.8283, - "step": 20120 - }, - { - "epoch": 0.16265483722395946, - "grad_norm": 489.8723449707031, - "learning_rate": 4.943465945626662e-05, - "loss": 112.9659, - "step": 20130 - }, - { - "epoch": 0.1627356394282436, - "grad_norm": 646.6622924804688, - "learning_rate": 4.943318222541066e-05, - "loss": 212.4018, - "step": 20140 - }, - { - "epoch": 0.16281644163252773, - "grad_norm": 891.543212890625, - "learning_rate": 4.943170308920484e-05, - "loss": 112.9879, - "step": 20150 - }, - { - "epoch": 0.16289724383681187, - "grad_norm": 1459.892822265625, - "learning_rate": 4.9430222047764506e-05, - "loss": 155.3651, - "step": 20160 - }, - { - "epoch": 0.162978046041096, - "grad_norm": 1222.5064697265625, - "learning_rate": 4.942873910120516e-05, - "loss": 208.5409, - "step": 20170 - }, - { - "epoch": 0.16305884824538014, - "grad_norm": 1320.199462890625, - "learning_rate": 4.9427254249642444e-05, - "loss": 125.2266, - "step": 20180 - }, - { - "epoch": 0.16313965044966428, - "grad_norm": 1213.1181640625, - "learning_rate": 4.9425767493192144e-05, - "loss": 144.2527, - "step": 20190 - }, - { - "epoch": 0.1632204526539484, - "grad_norm": 930.0297241210938, - "learning_rate": 4.942427883197021e-05, - "loss": 151.6325, - "step": 20200 - }, - { - "epoch": 0.16330125485823252, - "grad_norm": 1261.0780029296875, - "learning_rate": 4.9422788266092715e-05, - "loss": 133.9808, - "step": 20210 - }, - { - "epoch": 0.16338205706251666, - "grad_norm": 1093.81640625, - "learning_rate": 4.94212957956759e-05, - "loss": 159.8245, - "step": 20220 - }, - { - "epoch": 0.1634628592668008, - "grad_norm": 726.3095092773438, - "learning_rate": 4.941980142083617e-05, - "loss": 117.4203, - "step": 20230 - }, - { - "epoch": 0.16354366147108493, - "grad_norm": 1333.00146484375, - "learning_rate": 4.941830514169004e-05, - "loss": 127.2281, - "step": 20240 - }, - { - "epoch": 0.16362446367536906, - "grad_norm": 2235.844970703125, - "learning_rate": 4.94168069583542e-05, - "loss": 183.8472, - "step": 20250 - }, - { - "epoch": 0.1637052658796532, - "grad_norm": 614.4901733398438, - "learning_rate": 4.941530687094548e-05, - "loss": 106.1366, - "step": 20260 - }, - { - "epoch": 0.16378606808393734, - "grad_norm": 1060.7833251953125, - "learning_rate": 4.941380487958086e-05, - "loss": 144.1953, - "step": 20270 - }, - { - "epoch": 0.16386687028822147, - "grad_norm": 1574.5162353515625, - "learning_rate": 4.941230098437747e-05, - "loss": 212.1144, - "step": 20280 - }, - { - "epoch": 0.1639476724925056, - "grad_norm": 917.2064208984375, - "learning_rate": 4.941079518545258e-05, - "loss": 120.3781, - "step": 20290 - }, - { - "epoch": 0.16402847469678972, - "grad_norm": 1259.378173828125, - "learning_rate": 4.940928748292363e-05, - "loss": 158.3393, - "step": 20300 - }, - { - "epoch": 0.16410927690107385, - "grad_norm": 1609.08251953125, - "learning_rate": 4.9407777876908174e-05, - "loss": 151.2765, - "step": 20310 - }, - { - "epoch": 0.164190079105358, - "grad_norm": 1164.8643798828125, - "learning_rate": 4.9406266367523945e-05, - "loss": 169.377, - "step": 20320 - }, - { - "epoch": 0.16427088130964212, - "grad_norm": 827.8549194335938, - "learning_rate": 4.9404752954888824e-05, - "loss": 177.074, - "step": 20330 - }, - { - "epoch": 0.16435168351392626, - "grad_norm": 1134.0233154296875, - "learning_rate": 4.9403237639120805e-05, - "loss": 107.5158, - "step": 20340 - }, - { - "epoch": 0.1644324857182104, - "grad_norm": 1077.4449462890625, - "learning_rate": 4.940172042033808e-05, - "loss": 147.0112, - "step": 20350 - }, - { - "epoch": 0.16451328792249453, - "grad_norm": 878.2044677734375, - "learning_rate": 4.940020129865895e-05, - "loss": 138.4124, - "step": 20360 - }, - { - "epoch": 0.16459409012677867, - "grad_norm": 1378.62548828125, - "learning_rate": 4.939868027420189e-05, - "loss": 165.4997, - "step": 20370 - }, - { - "epoch": 0.16467489233106278, - "grad_norm": 952.5693969726562, - "learning_rate": 4.93971573470855e-05, - "loss": 147.6113, - "step": 20380 - }, - { - "epoch": 0.1647556945353469, - "grad_norm": 802.552734375, - "learning_rate": 4.939563251742855e-05, - "loss": 126.6972, - "step": 20390 - }, - { - "epoch": 0.16483649673963105, - "grad_norm": 1076.546875, - "learning_rate": 4.9394105785349944e-05, - "loss": 115.4133, - "step": 20400 - }, - { - "epoch": 0.16491729894391519, - "grad_norm": 1236.44140625, - "learning_rate": 4.9392577150968745e-05, - "loss": 178.8129, - "step": 20410 - }, - { - "epoch": 0.16499810114819932, - "grad_norm": 740.436767578125, - "learning_rate": 4.939104661440415e-05, - "loss": 124.116, - "step": 20420 - }, - { - "epoch": 0.16507890335248346, - "grad_norm": 819.5441284179688, - "learning_rate": 4.938951417577552e-05, - "loss": 122.5825, - "step": 20430 - }, - { - "epoch": 0.1651597055567676, - "grad_norm": 1098.3006591796875, - "learning_rate": 4.938797983520237e-05, - "loss": 125.6823, - "step": 20440 - }, - { - "epoch": 0.16524050776105173, - "grad_norm": 1068.104248046875, - "learning_rate": 4.938644359280433e-05, - "loss": 148.1049, - "step": 20450 - }, - { - "epoch": 0.16532130996533587, - "grad_norm": 1058.569091796875, - "learning_rate": 4.938490544870121e-05, - "loss": 171.6145, - "step": 20460 - }, - { - "epoch": 0.16540211216961997, - "grad_norm": 1309.0264892578125, - "learning_rate": 4.938336540301295e-05, - "loss": 198.3, - "step": 20470 - }, - { - "epoch": 0.1654829143739041, - "grad_norm": 1446.93310546875, - "learning_rate": 4.938182345585966e-05, - "loss": 143.8909, - "step": 20480 - }, - { - "epoch": 0.16556371657818825, - "grad_norm": 982.760986328125, - "learning_rate": 4.9380279607361575e-05, - "loss": 121.5969, - "step": 20490 - }, - { - "epoch": 0.16564451878247238, - "grad_norm": 917.6383056640625, - "learning_rate": 4.937873385763908e-05, - "loss": 109.6795, - "step": 20500 - }, - { - "epoch": 0.16572532098675652, - "grad_norm": 900.640380859375, - "learning_rate": 4.937718620681273e-05, - "loss": 123.6096, - "step": 20510 - }, - { - "epoch": 0.16580612319104066, - "grad_norm": 1174.46240234375, - "learning_rate": 4.937563665500321e-05, - "loss": 142.0072, - "step": 20520 - }, - { - "epoch": 0.1658869253953248, - "grad_norm": 939.9627685546875, - "learning_rate": 4.9374085202331354e-05, - "loss": 145.0303, - "step": 20530 - }, - { - "epoch": 0.16596772759960893, - "grad_norm": 789.8377075195312, - "learning_rate": 4.9372531848918145e-05, - "loss": 128.0519, - "step": 20540 - }, - { - "epoch": 0.16604852980389306, - "grad_norm": 774.9410400390625, - "learning_rate": 4.9370976594884723e-05, - "loss": 149.1509, - "step": 20550 - }, - { - "epoch": 0.16612933200817717, - "grad_norm": 1172.1417236328125, - "learning_rate": 4.936941944035237e-05, - "loss": 158.9689, - "step": 20560 - }, - { - "epoch": 0.1662101342124613, - "grad_norm": 607.3966674804688, - "learning_rate": 4.936786038544251e-05, - "loss": 87.294, - "step": 20570 - }, - { - "epoch": 0.16629093641674544, - "grad_norm": 837.52685546875, - "learning_rate": 4.936629943027672e-05, - "loss": 134.8621, - "step": 20580 - }, - { - "epoch": 0.16637173862102958, - "grad_norm": 1001.40673828125, - "learning_rate": 4.9364736574976736e-05, - "loss": 142.8577, - "step": 20590 - }, - { - "epoch": 0.16645254082531372, - "grad_norm": 4562.16357421875, - "learning_rate": 4.9363171819664434e-05, - "loss": 177.6516, - "step": 20600 - }, - { - "epoch": 0.16653334302959785, - "grad_norm": 462.9619445800781, - "learning_rate": 4.936160516446182e-05, - "loss": 147.8353, - "step": 20610 - }, - { - "epoch": 0.166614145233882, - "grad_norm": 1223.6934814453125, - "learning_rate": 4.936003660949108e-05, - "loss": 127.2198, - "step": 20620 - }, - { - "epoch": 0.16669494743816612, - "grad_norm": 562.229248046875, - "learning_rate": 4.935846615487453e-05, - "loss": 120.6695, - "step": 20630 - }, - { - "epoch": 0.16677574964245023, - "grad_norm": 2545.493408203125, - "learning_rate": 4.935689380073464e-05, - "loss": 243.0539, - "step": 20640 - }, - { - "epoch": 0.16685655184673437, - "grad_norm": 941.8065795898438, - "learning_rate": 4.9355319547194014e-05, - "loss": 151.3856, - "step": 20650 - }, - { - "epoch": 0.1669373540510185, - "grad_norm": 892.9322509765625, - "learning_rate": 4.935374339437543e-05, - "loss": 129.7287, - "step": 20660 - }, - { - "epoch": 0.16701815625530264, - "grad_norm": 1112.4737548828125, - "learning_rate": 4.935216534240179e-05, - "loss": 163.487, - "step": 20670 - }, - { - "epoch": 0.16709895845958678, - "grad_norm": 1596.5882568359375, - "learning_rate": 4.935058539139615e-05, - "loss": 154.1062, - "step": 20680 - }, - { - "epoch": 0.1671797606638709, - "grad_norm": 867.439453125, - "learning_rate": 4.934900354148173e-05, - "loss": 133.4915, - "step": 20690 - }, - { - "epoch": 0.16726056286815505, - "grad_norm": 1456.6492919921875, - "learning_rate": 4.9347419792781876e-05, - "loss": 166.9409, - "step": 20700 - }, - { - "epoch": 0.16734136507243919, - "grad_norm": 1217.3944091796875, - "learning_rate": 4.934583414542011e-05, - "loss": 126.2898, - "step": 20710 - }, - { - "epoch": 0.16742216727672332, - "grad_norm": 1088.0697021484375, - "learning_rate": 4.934424659952006e-05, - "loss": 134.1592, - "step": 20720 - }, - { - "epoch": 0.16750296948100743, - "grad_norm": 789.9591674804688, - "learning_rate": 4.934265715520553e-05, - "loss": 138.9029, - "step": 20730 - }, - { - "epoch": 0.16758377168529157, - "grad_norm": 3209.039794921875, - "learning_rate": 4.934106581260049e-05, - "loss": 169.6751, - "step": 20740 - }, - { - "epoch": 0.1676645738895757, - "grad_norm": 630.1481323242188, - "learning_rate": 4.933947257182901e-05, - "loss": 114.2374, - "step": 20750 - }, - { - "epoch": 0.16774537609385984, - "grad_norm": 1119.7962646484375, - "learning_rate": 4.933787743301534e-05, - "loss": 126.1659, - "step": 20760 - }, - { - "epoch": 0.16782617829814397, - "grad_norm": 1032.967041015625, - "learning_rate": 4.933628039628389e-05, - "loss": 143.448, - "step": 20770 - }, - { - "epoch": 0.1679069805024281, - "grad_norm": 1353.9566650390625, - "learning_rate": 4.933468146175918e-05, - "loss": 159.5805, - "step": 20780 - }, - { - "epoch": 0.16798778270671225, - "grad_norm": 861.2966918945312, - "learning_rate": 4.933308062956591e-05, - "loss": 181.934, - "step": 20790 - }, - { - "epoch": 0.16806858491099638, - "grad_norm": 950.9557495117188, - "learning_rate": 4.93314778998289e-05, - "loss": 134.8973, - "step": 20800 - }, - { - "epoch": 0.16814938711528052, - "grad_norm": 684.4946899414062, - "learning_rate": 4.932987327267316e-05, - "loss": 153.327, - "step": 20810 - }, - { - "epoch": 0.16823018931956463, - "grad_norm": 1124.705078125, - "learning_rate": 4.93282667482238e-05, - "loss": 126.8134, - "step": 20820 - }, - { - "epoch": 0.16831099152384876, - "grad_norm": 1403.6356201171875, - "learning_rate": 4.9326658326606114e-05, - "loss": 136.8704, - "step": 20830 - }, - { - "epoch": 0.1683917937281329, - "grad_norm": 1115.89501953125, - "learning_rate": 4.9325048007945526e-05, - "loss": 168.8359, - "step": 20840 - }, - { - "epoch": 0.16847259593241704, - "grad_norm": 931.3789672851562, - "learning_rate": 4.93234357923676e-05, - "loss": 124.9794, - "step": 20850 - }, - { - "epoch": 0.16855339813670117, - "grad_norm": 596.74169921875, - "learning_rate": 4.9321821679998074e-05, - "loss": 163.0768, - "step": 20860 - }, - { - "epoch": 0.1686342003409853, - "grad_norm": 350.2550354003906, - "learning_rate": 4.9320205670962814e-05, - "loss": 162.3876, - "step": 20870 - }, - { - "epoch": 0.16871500254526944, - "grad_norm": 805.6371459960938, - "learning_rate": 4.9318587765387845e-05, - "loss": 144.4133, - "step": 20880 - }, - { - "epoch": 0.16879580474955358, - "grad_norm": 1130.451171875, - "learning_rate": 4.9316967963399335e-05, - "loss": 129.5721, - "step": 20890 - }, - { - "epoch": 0.1688766069538377, - "grad_norm": 1024.6787109375, - "learning_rate": 4.9315346265123594e-05, - "loss": 123.6477, - "step": 20900 - }, - { - "epoch": 0.16895740915812182, - "grad_norm": 908.877685546875, - "learning_rate": 4.931372267068708e-05, - "loss": 122.209, - "step": 20910 - }, - { - "epoch": 0.16903821136240596, - "grad_norm": 922.0499267578125, - "learning_rate": 4.9312097180216414e-05, - "loss": 137.9385, - "step": 20920 - }, - { - "epoch": 0.1691190135666901, - "grad_norm": 8524.0908203125, - "learning_rate": 4.931046979383835e-05, - "loss": 175.6334, - "step": 20930 - }, - { - "epoch": 0.16919981577097423, - "grad_norm": 1868.6783447265625, - "learning_rate": 4.9308840511679804e-05, - "loss": 133.4029, - "step": 20940 - }, - { - "epoch": 0.16928061797525837, - "grad_norm": 868.6856689453125, - "learning_rate": 4.930720933386782e-05, - "loss": 130.7314, - "step": 20950 - }, - { - "epoch": 0.1693614201795425, - "grad_norm": 766.35302734375, - "learning_rate": 4.9305576260529607e-05, - "loss": 145.356, - "step": 20960 - }, - { - "epoch": 0.16944222238382664, - "grad_norm": 1678.6231689453125, - "learning_rate": 4.930394129179251e-05, - "loss": 127.5629, - "step": 20970 - }, - { - "epoch": 0.16952302458811078, - "grad_norm": 1218.351806640625, - "learning_rate": 4.930230442778403e-05, - "loss": 154.7323, - "step": 20980 - }, - { - "epoch": 0.16960382679239489, - "grad_norm": 776.2579956054688, - "learning_rate": 4.930066566863182e-05, - "loss": 160.9235, - "step": 20990 - }, - { - "epoch": 0.16968462899667902, - "grad_norm": 749.9014282226562, - "learning_rate": 4.929902501446366e-05, - "loss": 151.9128, - "step": 21000 - }, - { - "epoch": 0.16976543120096316, - "grad_norm": 394.6796569824219, - "learning_rate": 4.92973824654075e-05, - "loss": 133.4124, - "step": 21010 - }, - { - "epoch": 0.1698462334052473, - "grad_norm": 4559.64892578125, - "learning_rate": 4.929573802159143e-05, - "loss": 130.0193, - "step": 21020 - }, - { - "epoch": 0.16992703560953143, - "grad_norm": 807.999755859375, - "learning_rate": 4.929409168314368e-05, - "loss": 111.1172, - "step": 21030 - }, - { - "epoch": 0.17000783781381557, - "grad_norm": 1046.11181640625, - "learning_rate": 4.9292443450192645e-05, - "loss": 140.0229, - "step": 21040 - }, - { - "epoch": 0.1700886400180997, - "grad_norm": 896.216552734375, - "learning_rate": 4.929079332286685e-05, - "loss": 124.7365, - "step": 21050 - }, - { - "epoch": 0.17016944222238384, - "grad_norm": 1904.4871826171875, - "learning_rate": 4.928914130129498e-05, - "loss": 116.5897, - "step": 21060 - }, - { - "epoch": 0.17025024442666795, - "grad_norm": 841.46826171875, - "learning_rate": 4.928748738560586e-05, - "loss": 149.4489, - "step": 21070 - }, - { - "epoch": 0.17033104663095208, - "grad_norm": 939.8714599609375, - "learning_rate": 4.9285831575928465e-05, - "loss": 118.6469, - "step": 21080 - }, - { - "epoch": 0.17041184883523622, - "grad_norm": 1241.536865234375, - "learning_rate": 4.9284173872391925e-05, - "loss": 142.6615, - "step": 21090 - }, - { - "epoch": 0.17049265103952035, - "grad_norm": 927.803955078125, - "learning_rate": 4.92825142751255e-05, - "loss": 185.2253, - "step": 21100 - }, - { - "epoch": 0.1705734532438045, - "grad_norm": 1294.554443359375, - "learning_rate": 4.9280852784258624e-05, - "loss": 159.8822, - "step": 21110 - }, - { - "epoch": 0.17065425544808863, - "grad_norm": 1053.62548828125, - "learning_rate": 4.9279189399920844e-05, - "loss": 137.9816, - "step": 21120 - }, - { - "epoch": 0.17073505765237276, - "grad_norm": 1249.134521484375, - "learning_rate": 4.9277524122241894e-05, - "loss": 146.3467, - "step": 21130 - }, - { - "epoch": 0.1708158598566569, - "grad_norm": 923.3999633789062, - "learning_rate": 4.927585695135162e-05, - "loss": 158.8083, - "step": 21140 - }, - { - "epoch": 0.17089666206094103, - "grad_norm": 1657.6719970703125, - "learning_rate": 4.927418788738004e-05, - "loss": 184.0047, - "step": 21150 - }, - { - "epoch": 0.17097746426522514, - "grad_norm": 719.60205078125, - "learning_rate": 4.9272516930457314e-05, - "loss": 134.7453, - "step": 21160 - }, - { - "epoch": 0.17105826646950928, - "grad_norm": 1066.7142333984375, - "learning_rate": 4.927084408071373e-05, - "loss": 120.2492, - "step": 21170 - }, - { - "epoch": 0.17113906867379342, - "grad_norm": 896.5006103515625, - "learning_rate": 4.9269169338279766e-05, - "loss": 149.4413, - "step": 21180 - }, - { - "epoch": 0.17121987087807755, - "grad_norm": 779.187255859375, - "learning_rate": 4.9267492703286e-05, - "loss": 182.3433, - "step": 21190 - }, - { - "epoch": 0.1713006730823617, - "grad_norm": 807.4977416992188, - "learning_rate": 4.9265814175863186e-05, - "loss": 135.4947, - "step": 21200 - }, - { - "epoch": 0.17138147528664582, - "grad_norm": 819.343017578125, - "learning_rate": 4.9264133756142224e-05, - "loss": 152.1436, - "step": 21210 - }, - { - "epoch": 0.17146227749092996, - "grad_norm": 571.0609741210938, - "learning_rate": 4.926245144425415e-05, - "loss": 149.8402, - "step": 21220 - }, - { - "epoch": 0.1715430796952141, - "grad_norm": 794.7405395507812, - "learning_rate": 4.926076724033016e-05, - "loss": 169.8673, - "step": 21230 - }, - { - "epoch": 0.17162388189949823, - "grad_norm": 868.8946533203125, - "learning_rate": 4.925908114450158e-05, - "loss": 131.4613, - "step": 21240 - }, - { - "epoch": 0.17170468410378234, - "grad_norm": 887.6416625976562, - "learning_rate": 4.925739315689991e-05, - "loss": 125.9924, - "step": 21250 - }, - { - "epoch": 0.17178548630806648, - "grad_norm": 1365.545166015625, - "learning_rate": 4.925570327765678e-05, - "loss": 166.8732, - "step": 21260 - }, - { - "epoch": 0.1718662885123506, - "grad_norm": 606.7777709960938, - "learning_rate": 4.9254011506903963e-05, - "loss": 108.1067, - "step": 21270 - }, - { - "epoch": 0.17194709071663475, - "grad_norm": 795.8794555664062, - "learning_rate": 4.925231784477339e-05, - "loss": 131.4976, - "step": 21280 - }, - { - "epoch": 0.17202789292091888, - "grad_norm": 2665.93115234375, - "learning_rate": 4.925062229139714e-05, - "loss": 145.1969, - "step": 21290 - }, - { - "epoch": 0.17210869512520302, - "grad_norm": 995.7269897460938, - "learning_rate": 4.924892484690743e-05, - "loss": 133.3504, - "step": 21300 - }, - { - "epoch": 0.17218949732948716, - "grad_norm": 635.3858032226562, - "learning_rate": 4.924722551143664e-05, - "loss": 116.0752, - "step": 21310 - }, - { - "epoch": 0.1722702995337713, - "grad_norm": 733.6871948242188, - "learning_rate": 4.9245524285117274e-05, - "loss": 149.4348, - "step": 21320 - }, - { - "epoch": 0.1723511017380554, - "grad_norm": 1482.4849853515625, - "learning_rate": 4.924382116808201e-05, - "loss": 162.5618, - "step": 21330 - }, - { - "epoch": 0.17243190394233954, - "grad_norm": 899.9449462890625, - "learning_rate": 4.924211616046365e-05, - "loss": 129.7874, - "step": 21340 - }, - { - "epoch": 0.17251270614662367, - "grad_norm": 1402.91162109375, - "learning_rate": 4.924040926239515e-05, - "loss": 149.3731, - "step": 21350 - }, - { - "epoch": 0.1725935083509078, - "grad_norm": 954.703857421875, - "learning_rate": 4.923870047400964e-05, - "loss": 151.7146, - "step": 21360 - }, - { - "epoch": 0.17267431055519195, - "grad_norm": 8040.89697265625, - "learning_rate": 4.9236989795440346e-05, - "loss": 161.528, - "step": 21370 - }, - { - "epoch": 0.17275511275947608, - "grad_norm": 1106.1502685546875, - "learning_rate": 4.9235277226820695e-05, - "loss": 154.1534, - "step": 21380 - }, - { - "epoch": 0.17283591496376022, - "grad_norm": 979.8186645507812, - "learning_rate": 4.9233562768284225e-05, - "loss": 151.1577, - "step": 21390 - }, - { - "epoch": 0.17291671716804435, - "grad_norm": 655.7888793945312, - "learning_rate": 4.923184641996463e-05, - "loss": 133.3721, - "step": 21400 - }, - { - "epoch": 0.1729975193723285, - "grad_norm": 2349.8193359375, - "learning_rate": 4.923012818199576e-05, - "loss": 147.665, - "step": 21410 - }, - { - "epoch": 0.1730783215766126, - "grad_norm": 1052.4195556640625, - "learning_rate": 4.922840805451161e-05, - "loss": 176.3396, - "step": 21420 - }, - { - "epoch": 0.17315912378089673, - "grad_norm": 487.7714538574219, - "learning_rate": 4.9226686037646314e-05, - "loss": 139.5684, - "step": 21430 - }, - { - "epoch": 0.17323992598518087, - "grad_norm": 1023.9089965820312, - "learning_rate": 4.922496213153416e-05, - "loss": 128.5465, - "step": 21440 - }, - { - "epoch": 0.173320728189465, - "grad_norm": 1058.539794921875, - "learning_rate": 4.922323633630958e-05, - "loss": 77.237, - "step": 21450 - }, - { - "epoch": 0.17340153039374914, - "grad_norm": 836.1824951171875, - "learning_rate": 4.922150865210715e-05, - "loss": 184.4454, - "step": 21460 - }, - { - "epoch": 0.17348233259803328, - "grad_norm": 1106.0242919921875, - "learning_rate": 4.92197790790616e-05, - "loss": 155.3629, - "step": 21470 - }, - { - "epoch": 0.17356313480231741, - "grad_norm": 2159.267822265625, - "learning_rate": 4.9218047617307824e-05, - "loss": 162.3173, - "step": 21480 - }, - { - "epoch": 0.17364393700660155, - "grad_norm": 965.2177124023438, - "learning_rate": 4.9216314266980824e-05, - "loss": 113.7321, - "step": 21490 - }, - { - "epoch": 0.17372473921088566, - "grad_norm": 540.6083374023438, - "learning_rate": 4.9214579028215776e-05, - "loss": 140.3737, - "step": 21500 - }, - { - "epoch": 0.1738055414151698, - "grad_norm": 1578.821044921875, - "learning_rate": 4.9212841901148e-05, - "loss": 128.743, - "step": 21510 - }, - { - "epoch": 0.17388634361945393, - "grad_norm": 1013.1769409179688, - "learning_rate": 4.9211102885912965e-05, - "loss": 145.901, - "step": 21520 - }, - { - "epoch": 0.17396714582373807, - "grad_norm": 842.1300659179688, - "learning_rate": 4.920936198264627e-05, - "loss": 178.5723, - "step": 21530 - }, - { - "epoch": 0.1740479480280222, - "grad_norm": 1803.3353271484375, - "learning_rate": 4.920761919148369e-05, - "loss": 167.2774, - "step": 21540 - }, - { - "epoch": 0.17412875023230634, - "grad_norm": 1111.0732421875, - "learning_rate": 4.9205874512561115e-05, - "loss": 128.0251, - "step": 21550 - }, - { - "epoch": 0.17420955243659048, - "grad_norm": 1892.2427978515625, - "learning_rate": 4.920412794601461e-05, - "loss": 133.7254, - "step": 21560 - }, - { - "epoch": 0.1742903546408746, - "grad_norm": 1495.9288330078125, - "learning_rate": 4.920237949198037e-05, - "loss": 176.4551, - "step": 21570 - }, - { - "epoch": 0.17437115684515875, - "grad_norm": 876.3128662109375, - "learning_rate": 4.9200629150594744e-05, - "loss": 124.5628, - "step": 21580 - }, - { - "epoch": 0.17445195904944286, - "grad_norm": 832.6767578125, - "learning_rate": 4.919887692199423e-05, - "loss": 130.3372, - "step": 21590 - }, - { - "epoch": 0.174532761253727, - "grad_norm": 1795.543212890625, - "learning_rate": 4.919712280631547e-05, - "loss": 154.757, - "step": 21600 - }, - { - "epoch": 0.17461356345801113, - "grad_norm": 1359.8138427734375, - "learning_rate": 4.919536680369525e-05, - "loss": 130.8899, - "step": 21610 - }, - { - "epoch": 0.17469436566229526, - "grad_norm": 1081.87451171875, - "learning_rate": 4.9193608914270515e-05, - "loss": 134.2298, - "step": 21620 - }, - { - "epoch": 0.1747751678665794, - "grad_norm": 1072.954345703125, - "learning_rate": 4.9191849138178334e-05, - "loss": 140.3875, - "step": 21630 - }, - { - "epoch": 0.17485597007086354, - "grad_norm": 1131.3258056640625, - "learning_rate": 4.9190087475555955e-05, - "loss": 166.9897, - "step": 21640 - }, - { - "epoch": 0.17493677227514767, - "grad_norm": 656.448486328125, - "learning_rate": 4.9188323926540746e-05, - "loss": 110.9589, - "step": 21650 - }, - { - "epoch": 0.1750175744794318, - "grad_norm": 776.7743530273438, - "learning_rate": 4.918655849127024e-05, - "loss": 103.8045, - "step": 21660 - }, - { - "epoch": 0.17509837668371594, - "grad_norm": 511.572509765625, - "learning_rate": 4.91847911698821e-05, - "loss": 151.9778, - "step": 21670 - }, - { - "epoch": 0.17517917888800005, - "grad_norm": 1167.0135498046875, - "learning_rate": 4.918302196251415e-05, - "loss": 146.2524, - "step": 21680 - }, - { - "epoch": 0.1752599810922842, - "grad_norm": 1311.80810546875, - "learning_rate": 4.918125086930435e-05, - "loss": 157.445, - "step": 21690 - }, - { - "epoch": 0.17534078329656833, - "grad_norm": 842.0478515625, - "learning_rate": 4.9179477890390825e-05, - "loss": 149.0061, - "step": 21700 - }, - { - "epoch": 0.17542158550085246, - "grad_norm": 948.2142944335938, - "learning_rate": 4.9177703025911825e-05, - "loss": 101.6154, - "step": 21710 - }, - { - "epoch": 0.1755023877051366, - "grad_norm": 1002.8952026367188, - "learning_rate": 4.917592627600577e-05, - "loss": 138.3118, - "step": 21720 - }, - { - "epoch": 0.17558318990942073, - "grad_norm": 911.0323486328125, - "learning_rate": 4.91741476408112e-05, - "loss": 172.1703, - "step": 21730 - }, - { - "epoch": 0.17566399211370487, - "grad_norm": 1388.8275146484375, - "learning_rate": 4.917236712046682e-05, - "loss": 137.3608, - "step": 21740 - }, - { - "epoch": 0.175744794317989, - "grad_norm": 1317.82421875, - "learning_rate": 4.917058471511149e-05, - "loss": 132.1369, - "step": 21750 - }, - { - "epoch": 0.17582559652227311, - "grad_norm": 549.3938598632812, - "learning_rate": 4.916880042488419e-05, - "loss": 121.9319, - "step": 21760 - }, - { - "epoch": 0.17590639872655725, - "grad_norm": 1055.351318359375, - "learning_rate": 4.9167014249924075e-05, - "loss": 133.7714, - "step": 21770 - }, - { - "epoch": 0.1759872009308414, - "grad_norm": 618.5696411132812, - "learning_rate": 4.916522619037043e-05, - "loss": 121.4759, - "step": 21780 - }, - { - "epoch": 0.17606800313512552, - "grad_norm": 814.7097778320312, - "learning_rate": 4.916343624636269e-05, - "loss": 167.2455, - "step": 21790 - }, - { - "epoch": 0.17614880533940966, - "grad_norm": 950.5711059570312, - "learning_rate": 4.916164441804044e-05, - "loss": 119.8782, - "step": 21800 - }, - { - "epoch": 0.1762296075436938, - "grad_norm": 1106.2169189453125, - "learning_rate": 4.915985070554341e-05, - "loss": 141.9731, - "step": 21810 - }, - { - "epoch": 0.17631040974797793, - "grad_norm": 761.6017456054688, - "learning_rate": 4.915805510901148e-05, - "loss": 147.6505, - "step": 21820 - }, - { - "epoch": 0.17639121195226207, - "grad_norm": 1542.261962890625, - "learning_rate": 4.915625762858467e-05, - "loss": 146.4995, - "step": 21830 - }, - { - "epoch": 0.1764720141565462, - "grad_norm": 840.79443359375, - "learning_rate": 4.915445826440316e-05, - "loss": 136.0519, - "step": 21840 - }, - { - "epoch": 0.1765528163608303, - "grad_norm": 2079.904052734375, - "learning_rate": 4.915265701660726e-05, - "loss": 126.3828, - "step": 21850 - }, - { - "epoch": 0.17663361856511445, - "grad_norm": 3114.213134765625, - "learning_rate": 4.9150853885337426e-05, - "loss": 173.1625, - "step": 21860 - }, - { - "epoch": 0.17671442076939858, - "grad_norm": 824.01025390625, - "learning_rate": 4.9149048870734296e-05, - "loss": 130.2869, - "step": 21870 - }, - { - "epoch": 0.17679522297368272, - "grad_norm": 1139.847900390625, - "learning_rate": 4.9147241972938596e-05, - "loss": 165.5629, - "step": 21880 - }, - { - "epoch": 0.17687602517796686, - "grad_norm": 3070.7890625, - "learning_rate": 4.914543319209126e-05, - "loss": 121.7948, - "step": 21890 - }, - { - "epoch": 0.176956827382251, - "grad_norm": 1102.478759765625, - "learning_rate": 4.914362252833332e-05, - "loss": 148.5109, - "step": 21900 - }, - { - "epoch": 0.17703762958653513, - "grad_norm": 975.4558715820312, - "learning_rate": 4.9141809981805995e-05, - "loss": 157.0883, - "step": 21910 - }, - { - "epoch": 0.17711843179081926, - "grad_norm": 769.4059448242188, - "learning_rate": 4.913999555265062e-05, - "loss": 111.9922, - "step": 21920 - }, - { - "epoch": 0.1771992339951034, - "grad_norm": 905.8001708984375, - "learning_rate": 4.913817924100869e-05, - "loss": 143.9434, - "step": 21930 - }, - { - "epoch": 0.1772800361993875, - "grad_norm": 787.67626953125, - "learning_rate": 4.913636104702183e-05, - "loss": 173.4965, - "step": 21940 - }, - { - "epoch": 0.17736083840367164, - "grad_norm": 1215.833251953125, - "learning_rate": 4.913454097083185e-05, - "loss": 129.2083, - "step": 21950 - }, - { - "epoch": 0.17744164060795578, - "grad_norm": 743.689697265625, - "learning_rate": 4.913271901258067e-05, - "loss": 87.8352, - "step": 21960 - }, - { - "epoch": 0.17752244281223992, - "grad_norm": 1281.1605224609375, - "learning_rate": 4.913089517241037e-05, - "loss": 126.6587, - "step": 21970 - }, - { - "epoch": 0.17760324501652405, - "grad_norm": 714.2695922851562, - "learning_rate": 4.9129069450463186e-05, - "loss": 146.0166, - "step": 21980 - }, - { - "epoch": 0.1776840472208082, - "grad_norm": 860.091796875, - "learning_rate": 4.912724184688149e-05, - "loss": 121.8889, - "step": 21990 - }, - { - "epoch": 0.17776484942509233, - "grad_norm": 1348.712158203125, - "learning_rate": 4.912541236180779e-05, - "loss": 140.7912, - "step": 22000 - }, - { - "epoch": 0.17784565162937646, - "grad_norm": 2976.09228515625, - "learning_rate": 4.912358099538476e-05, - "loss": 144.4217, - "step": 22010 - }, - { - "epoch": 0.17792645383366057, - "grad_norm": 1267.32568359375, - "learning_rate": 4.912174774775522e-05, - "loss": 148.4144, - "step": 22020 - }, - { - "epoch": 0.1780072560379447, - "grad_norm": 1446.2003173828125, - "learning_rate": 4.911991261906212e-05, - "loss": 133.0471, - "step": 22030 - }, - { - "epoch": 0.17808805824222884, - "grad_norm": 1262.1279296875, - "learning_rate": 4.911807560944858e-05, - "loss": 137.762, - "step": 22040 - }, - { - "epoch": 0.17816886044651298, - "grad_norm": 2324.279296875, - "learning_rate": 4.911623671905784e-05, - "loss": 278.9191, - "step": 22050 - }, - { - "epoch": 0.1782496626507971, - "grad_norm": 858.7052612304688, - "learning_rate": 4.9114395948033296e-05, - "loss": 115.8173, - "step": 22060 - }, - { - "epoch": 0.17833046485508125, - "grad_norm": 1069.30029296875, - "learning_rate": 4.911255329651851e-05, - "loss": 162.8984, - "step": 22070 - }, - { - "epoch": 0.17841126705936539, - "grad_norm": 1153.4527587890625, - "learning_rate": 4.911070876465719e-05, - "loss": 145.4523, - "step": 22080 - }, - { - "epoch": 0.17849206926364952, - "grad_norm": 786.6443481445312, - "learning_rate": 4.910886235259314e-05, - "loss": 104.419, - "step": 22090 - }, - { - "epoch": 0.17857287146793366, - "grad_norm": 1044.1168212890625, - "learning_rate": 4.910701406047037e-05, - "loss": 124.9817, - "step": 22100 - }, - { - "epoch": 0.17865367367221777, - "grad_norm": 1491.7518310546875, - "learning_rate": 4.9105163888433e-05, - "loss": 185.6069, - "step": 22110 - }, - { - "epoch": 0.1787344758765019, - "grad_norm": 995.5963134765625, - "learning_rate": 4.910331183662533e-05, - "loss": 120.7781, - "step": 22120 - }, - { - "epoch": 0.17881527808078604, - "grad_norm": 948.0015869140625, - "learning_rate": 4.9101457905191774e-05, - "loss": 138.4682, - "step": 22130 - }, - { - "epoch": 0.17889608028507017, - "grad_norm": 946.0897216796875, - "learning_rate": 4.90996020942769e-05, - "loss": 201.7487, - "step": 22140 - }, - { - "epoch": 0.1789768824893543, - "grad_norm": 1335.8580322265625, - "learning_rate": 4.9097744404025435e-05, - "loss": 125.0365, - "step": 22150 - }, - { - "epoch": 0.17905768469363845, - "grad_norm": 8589.5595703125, - "learning_rate": 4.909588483458225e-05, - "loss": 153.7895, - "step": 22160 - }, - { - "epoch": 0.17913848689792258, - "grad_norm": 677.1741333007812, - "learning_rate": 4.909402338609236e-05, - "loss": 139.8887, - "step": 22170 - }, - { - "epoch": 0.17921928910220672, - "grad_norm": 741.7691040039062, - "learning_rate": 4.90921600587009e-05, - "loss": 155.9494, - "step": 22180 - }, - { - "epoch": 0.17930009130649083, - "grad_norm": 809.6107177734375, - "learning_rate": 4.909029485255321e-05, - "loss": 134.5942, - "step": 22190 - }, - { - "epoch": 0.17938089351077496, - "grad_norm": 1508.6513671875, - "learning_rate": 4.908842776779472e-05, - "loss": 119.9722, - "step": 22200 - }, - { - "epoch": 0.1794616957150591, - "grad_norm": 1333.4815673828125, - "learning_rate": 4.9086558804571034e-05, - "loss": 169.0476, - "step": 22210 - }, - { - "epoch": 0.17954249791934324, - "grad_norm": 1323.66943359375, - "learning_rate": 4.9084687963027894e-05, - "loss": 159.4181, - "step": 22220 - }, - { - "epoch": 0.17962330012362737, - "grad_norm": 961.7247314453125, - "learning_rate": 4.908281524331121e-05, - "loss": 132.897, - "step": 22230 - }, - { - "epoch": 0.1797041023279115, - "grad_norm": 764.607421875, - "learning_rate": 4.9080940645567e-05, - "loss": 109.619, - "step": 22240 - }, - { - "epoch": 0.17978490453219564, - "grad_norm": 1257.1748046875, - "learning_rate": 4.907906416994146e-05, - "loss": 126.2801, - "step": 22250 - }, - { - "epoch": 0.17986570673647978, - "grad_norm": 1149.43359375, - "learning_rate": 4.907718581658091e-05, - "loss": 173.8987, - "step": 22260 - }, - { - "epoch": 0.17994650894076392, - "grad_norm": 697.0416259765625, - "learning_rate": 4.9075305585631845e-05, - "loss": 124.0838, - "step": 22270 - }, - { - "epoch": 0.18002731114504802, - "grad_norm": 1058.27490234375, - "learning_rate": 4.907342347724087e-05, - "loss": 100.3941, - "step": 22280 - }, - { - "epoch": 0.18010811334933216, - "grad_norm": 940.3408813476562, - "learning_rate": 4.907153949155479e-05, - "loss": 162.0708, - "step": 22290 - }, - { - "epoch": 0.1801889155536163, - "grad_norm": 357.243896484375, - "learning_rate": 4.906965362872047e-05, - "loss": 120.6409, - "step": 22300 - }, - { - "epoch": 0.18026971775790043, - "grad_norm": 1092.06103515625, - "learning_rate": 4.906776588888502e-05, - "loss": 147.0123, - "step": 22310 - }, - { - "epoch": 0.18035051996218457, - "grad_norm": 922.9295654296875, - "learning_rate": 4.906587627219562e-05, - "loss": 154.2565, - "step": 22320 - }, - { - "epoch": 0.1804313221664687, - "grad_norm": 2096.77099609375, - "learning_rate": 4.9063984778799645e-05, - "loss": 163.1471, - "step": 22330 - }, - { - "epoch": 0.18051212437075284, - "grad_norm": 3129.79736328125, - "learning_rate": 4.906209140884459e-05, - "loss": 100.585, - "step": 22340 - }, - { - "epoch": 0.18059292657503698, - "grad_norm": 2392.3466796875, - "learning_rate": 4.90601961624781e-05, - "loss": 146.9845, - "step": 22350 - }, - { - "epoch": 0.1806737287793211, - "grad_norm": 1031.2666015625, - "learning_rate": 4.9058299039847975e-05, - "loss": 148.7232, - "step": 22360 - }, - { - "epoch": 0.18075453098360522, - "grad_norm": 1056.4869384765625, - "learning_rate": 4.905640004110216e-05, - "loss": 142.2597, - "step": 22370 - }, - { - "epoch": 0.18083533318788936, - "grad_norm": 1285.865234375, - "learning_rate": 4.905449916638873e-05, - "loss": 118.8351, - "step": 22380 - }, - { - "epoch": 0.1809161353921735, - "grad_norm": 799.2218627929688, - "learning_rate": 4.905259641585594e-05, - "loss": 114.5493, - "step": 22390 - }, - { - "epoch": 0.18099693759645763, - "grad_norm": 585.7321166992188, - "learning_rate": 4.905069178965215e-05, - "loss": 179.0986, - "step": 22400 - }, - { - "epoch": 0.18107773980074177, - "grad_norm": 506.531005859375, - "learning_rate": 4.9048785287925895e-05, - "loss": 107.9276, - "step": 22410 - }, - { - "epoch": 0.1811585420050259, - "grad_norm": 846.5719604492188, - "learning_rate": 4.904687691082585e-05, - "loss": 119.0826, - "step": 22420 - }, - { - "epoch": 0.18123934420931004, - "grad_norm": 801.5440673828125, - "learning_rate": 4.904496665850084e-05, - "loss": 121.0874, - "step": 22430 - }, - { - "epoch": 0.18132014641359417, - "grad_norm": 943.4314575195312, - "learning_rate": 4.904305453109981e-05, - "loss": 166.2178, - "step": 22440 - }, - { - "epoch": 0.18140094861787828, - "grad_norm": 1088.8685302734375, - "learning_rate": 4.904114052877188e-05, - "loss": 142.0752, - "step": 22450 - }, - { - "epoch": 0.18148175082216242, - "grad_norm": 2138.867919921875, - "learning_rate": 4.9039224651666325e-05, - "loss": 96.4502, - "step": 22460 - }, - { - "epoch": 0.18156255302644655, - "grad_norm": 1140.4361572265625, - "learning_rate": 4.903730689993253e-05, - "loss": 130.6906, - "step": 22470 - }, - { - "epoch": 0.1816433552307307, - "grad_norm": 1211.7607421875, - "learning_rate": 4.903538727372005e-05, - "loss": 104.0139, - "step": 22480 - }, - { - "epoch": 0.18172415743501483, - "grad_norm": 1496.7322998046875, - "learning_rate": 4.903346577317859e-05, - "loss": 107.0491, - "step": 22490 - }, - { - "epoch": 0.18180495963929896, - "grad_norm": 1010.459716796875, - "learning_rate": 4.9031542398457974e-05, - "loss": 122.5571, - "step": 22500 - }, - { - "epoch": 0.1818857618435831, - "grad_norm": 1192.0123291015625, - "learning_rate": 4.902961714970821e-05, - "loss": 160.2323, - "step": 22510 - }, - { - "epoch": 0.18196656404786724, - "grad_norm": 965.9971313476562, - "learning_rate": 4.902769002707942e-05, - "loss": 113.0196, - "step": 22520 - }, - { - "epoch": 0.18204736625215137, - "grad_norm": 1351.7047119140625, - "learning_rate": 4.902576103072189e-05, - "loss": 139.1058, - "step": 22530 - }, - { - "epoch": 0.18212816845643548, - "grad_norm": 2100.35546875, - "learning_rate": 4.902383016078605e-05, - "loss": 168.1084, - "step": 22540 - }, - { - "epoch": 0.18220897066071962, - "grad_norm": 696.5603637695312, - "learning_rate": 4.902189741742247e-05, - "loss": 147.7759, - "step": 22550 - }, - { - "epoch": 0.18228977286500375, - "grad_norm": 959.859375, - "learning_rate": 4.901996280078186e-05, - "loss": 155.8262, - "step": 22560 - }, - { - "epoch": 0.1823705750692879, - "grad_norm": 881.1931762695312, - "learning_rate": 4.901802631101511e-05, - "loss": 104.5248, - "step": 22570 - }, - { - "epoch": 0.18245137727357202, - "grad_norm": 1599.84033203125, - "learning_rate": 4.90160879482732e-05, - "loss": 140.668, - "step": 22580 - }, - { - "epoch": 0.18253217947785616, - "grad_norm": 1187.47265625, - "learning_rate": 4.9014147712707316e-05, - "loss": 112.8435, - "step": 22590 - }, - { - "epoch": 0.1826129816821403, - "grad_norm": 781.869873046875, - "learning_rate": 4.9012205604468744e-05, - "loss": 178.4853, - "step": 22600 - }, - { - "epoch": 0.18269378388642443, - "grad_norm": 604.9210205078125, - "learning_rate": 4.9010261623708944e-05, - "loss": 102.6262, - "step": 22610 - }, - { - "epoch": 0.18277458609070854, - "grad_norm": 941.179931640625, - "learning_rate": 4.90083157705795e-05, - "loss": 126.6365, - "step": 22620 - }, - { - "epoch": 0.18285538829499268, - "grad_norm": 1073.9114990234375, - "learning_rate": 4.900636804523217e-05, - "loss": 119.8991, - "step": 22630 - }, - { - "epoch": 0.1829361904992768, - "grad_norm": 1057.618896484375, - "learning_rate": 4.9004418447818815e-05, - "loss": 165.4442, - "step": 22640 - }, - { - "epoch": 0.18301699270356095, - "grad_norm": 964.536865234375, - "learning_rate": 4.90024669784915e-05, - "loss": 120.3884, - "step": 22650 - }, - { - "epoch": 0.18309779490784509, - "grad_norm": 1178.237548828125, - "learning_rate": 4.900051363740238e-05, - "loss": 111.4157, - "step": 22660 - }, - { - "epoch": 0.18317859711212922, - "grad_norm": 1019.692138671875, - "learning_rate": 4.89985584247038e-05, - "loss": 133.637, - "step": 22670 - }, - { - "epoch": 0.18325939931641336, - "grad_norm": 1401.4071044921875, - "learning_rate": 4.8996601340548215e-05, - "loss": 173.2201, - "step": 22680 - }, - { - "epoch": 0.1833402015206975, - "grad_norm": 1275.5079345703125, - "learning_rate": 4.899464238508825e-05, - "loss": 110.9955, - "step": 22690 - }, - { - "epoch": 0.18342100372498163, - "grad_norm": 1765.3773193359375, - "learning_rate": 4.899268155847667e-05, - "loss": 165.5508, - "step": 22700 - }, - { - "epoch": 0.18350180592926574, - "grad_norm": 675.7591552734375, - "learning_rate": 4.899071886086638e-05, - "loss": 138.9387, - "step": 22710 - }, - { - "epoch": 0.18358260813354987, - "grad_norm": 620.6054077148438, - "learning_rate": 4.898875429241044e-05, - "loss": 97.6903, - "step": 22720 - }, - { - "epoch": 0.183663410337834, - "grad_norm": 3295.2353515625, - "learning_rate": 4.898678785326205e-05, - "loss": 161.919, - "step": 22730 - }, - { - "epoch": 0.18374421254211815, - "grad_norm": 844.0809936523438, - "learning_rate": 4.898481954357455e-05, - "loss": 148.4699, - "step": 22740 - }, - { - "epoch": 0.18382501474640228, - "grad_norm": 1163.939697265625, - "learning_rate": 4.898284936350144e-05, - "loss": 159.9641, - "step": 22750 - }, - { - "epoch": 0.18390581695068642, - "grad_norm": 793.322509765625, - "learning_rate": 4.898087731319636e-05, - "loss": 109.0563, - "step": 22760 - }, - { - "epoch": 0.18398661915497055, - "grad_norm": 1573.4891357421875, - "learning_rate": 4.897890339281309e-05, - "loss": 141.0338, - "step": 22770 - }, - { - "epoch": 0.1840674213592547, - "grad_norm": 1555.74951171875, - "learning_rate": 4.897692760250556e-05, - "loss": 157.2911, - "step": 22780 - }, - { - "epoch": 0.18414822356353883, - "grad_norm": 705.9891357421875, - "learning_rate": 4.897494994242785e-05, - "loss": 161.7765, - "step": 22790 - }, - { - "epoch": 0.18422902576782293, - "grad_norm": 1178.649658203125, - "learning_rate": 4.8972970412734176e-05, - "loss": 135.4897, - "step": 22800 - }, - { - "epoch": 0.18430982797210707, - "grad_norm": 3464.89404296875, - "learning_rate": 4.897098901357891e-05, - "loss": 153.2055, - "step": 22810 - }, - { - "epoch": 0.1843906301763912, - "grad_norm": 1479.21875, - "learning_rate": 4.896900574511657e-05, - "loss": 177.4828, - "step": 22820 - }, - { - "epoch": 0.18447143238067534, - "grad_norm": 1141.8677978515625, - "learning_rate": 4.896702060750181e-05, - "loss": 127.3061, - "step": 22830 - }, - { - "epoch": 0.18455223458495948, - "grad_norm": 1337.0460205078125, - "learning_rate": 4.8965033600889435e-05, - "loss": 118.231, - "step": 22840 - }, - { - "epoch": 0.18463303678924362, - "grad_norm": 804.4263916015625, - "learning_rate": 4.89630447254344e-05, - "loss": 125.1156, - "step": 22850 - }, - { - "epoch": 0.18471383899352775, - "grad_norm": 1429.87548828125, - "learning_rate": 4.8961053981291795e-05, - "loss": 101.913, - "step": 22860 - }, - { - "epoch": 0.1847946411978119, - "grad_norm": 865.4014282226562, - "learning_rate": 4.8959061368616863e-05, - "loss": 102.9141, - "step": 22870 - }, - { - "epoch": 0.184875443402096, - "grad_norm": 866.8425903320312, - "learning_rate": 4.8957066887565e-05, - "loss": 150.5124, - "step": 22880 - }, - { - "epoch": 0.18495624560638013, - "grad_norm": 1073.4912109375, - "learning_rate": 4.8955070538291735e-05, - "loss": 116.8165, - "step": 22890 - }, - { - "epoch": 0.18503704781066427, - "grad_norm": 670.2167358398438, - "learning_rate": 4.8953072320952745e-05, - "loss": 113.9903, - "step": 22900 - }, - { - "epoch": 0.1851178500149484, - "grad_norm": 1252.5771484375, - "learning_rate": 4.8951072235703855e-05, - "loss": 180.2601, - "step": 22910 - }, - { - "epoch": 0.18519865221923254, - "grad_norm": 1627.02294921875, - "learning_rate": 4.8949070282701034e-05, - "loss": 117.2664, - "step": 22920 - }, - { - "epoch": 0.18527945442351668, - "grad_norm": 1428.931884765625, - "learning_rate": 4.894706646210041e-05, - "loss": 139.5075, - "step": 22930 - }, - { - "epoch": 0.1853602566278008, - "grad_norm": 1004.9000854492188, - "learning_rate": 4.894506077405824e-05, - "loss": 117.6594, - "step": 22940 - }, - { - "epoch": 0.18544105883208495, - "grad_norm": 751.6952514648438, - "learning_rate": 4.894305321873092e-05, - "loss": 101.807, - "step": 22950 - }, - { - "epoch": 0.18552186103636908, - "grad_norm": 690.0746459960938, - "learning_rate": 4.8941043796275015e-05, - "loss": 138.0377, - "step": 22960 - }, - { - "epoch": 0.1856026632406532, - "grad_norm": 978.247314453125, - "learning_rate": 4.8939032506847224e-05, - "loss": 155.4623, - "step": 22970 - }, - { - "epoch": 0.18568346544493733, - "grad_norm": 993.5946655273438, - "learning_rate": 4.893701935060439e-05, - "loss": 151.6691, - "step": 22980 - }, - { - "epoch": 0.18576426764922147, - "grad_norm": 925.807861328125, - "learning_rate": 4.893500432770349e-05, - "loss": 129.7521, - "step": 22990 - }, - { - "epoch": 0.1858450698535056, - "grad_norm": 892.179931640625, - "learning_rate": 4.893298743830168e-05, - "loss": 122.6083, - "step": 23000 - }, - { - "epoch": 0.18592587205778974, - "grad_norm": 964.4226684570312, - "learning_rate": 4.8930968682556234e-05, - "loss": 100.5539, - "step": 23010 - }, - { - "epoch": 0.18600667426207387, - "grad_norm": 1463.74072265625, - "learning_rate": 4.892894806062458e-05, - "loss": 110.3409, - "step": 23020 - }, - { - "epoch": 0.186087476466358, - "grad_norm": 1081.3094482421875, - "learning_rate": 4.892692557266429e-05, - "loss": 156.8142, - "step": 23030 - }, - { - "epoch": 0.18616827867064215, - "grad_norm": 790.9652099609375, - "learning_rate": 4.892490121883306e-05, - "loss": 167.5007, - "step": 23040 - }, - { - "epoch": 0.18624908087492628, - "grad_norm": 836.5440673828125, - "learning_rate": 4.892287499928879e-05, - "loss": 141.922, - "step": 23050 - }, - { - "epoch": 0.1863298830792104, - "grad_norm": 1207.8095703125, - "learning_rate": 4.892084691418947e-05, - "loss": 168.4551, - "step": 23060 - }, - { - "epoch": 0.18641068528349453, - "grad_norm": 1673.3968505859375, - "learning_rate": 4.891881696369325e-05, - "loss": 119.3171, - "step": 23070 - }, - { - "epoch": 0.18649148748777866, - "grad_norm": 1483.446044921875, - "learning_rate": 4.891678514795843e-05, - "loss": 132.2212, - "step": 23080 - }, - { - "epoch": 0.1865722896920628, - "grad_norm": 654.51611328125, - "learning_rate": 4.891475146714347e-05, - "loss": 160.2236, - "step": 23090 - }, - { - "epoch": 0.18665309189634693, - "grad_norm": 1243.8936767578125, - "learning_rate": 4.891271592140695e-05, - "loss": 154.3544, - "step": 23100 - }, - { - "epoch": 0.18673389410063107, - "grad_norm": 899.3131713867188, - "learning_rate": 4.8910678510907606e-05, - "loss": 213.0602, - "step": 23110 - }, - { - "epoch": 0.1868146963049152, - "grad_norm": 1691.8236083984375, - "learning_rate": 4.8908639235804324e-05, - "loss": 155.3828, - "step": 23120 - }, - { - "epoch": 0.18689549850919934, - "grad_norm": 1439.4566650390625, - "learning_rate": 4.890659809625612e-05, - "loss": 187.1953, - "step": 23130 - }, - { - "epoch": 0.18697630071348345, - "grad_norm": 627.9845581054688, - "learning_rate": 4.890455509242218e-05, - "loss": 121.08, - "step": 23140 - }, - { - "epoch": 0.1870571029177676, - "grad_norm": 778.0470581054688, - "learning_rate": 4.890251022446181e-05, - "loss": 129.215, - "step": 23150 - }, - { - "epoch": 0.18713790512205172, - "grad_norm": 930.5742797851562, - "learning_rate": 4.890046349253448e-05, - "loss": 125.8854, - "step": 23160 - }, - { - "epoch": 0.18721870732633586, - "grad_norm": 930.9414672851562, - "learning_rate": 4.88984148967998e-05, - "loss": 83.4641, - "step": 23170 - }, - { - "epoch": 0.18729950953062, - "grad_norm": 948.5119018554688, - "learning_rate": 4.889636443741752e-05, - "loss": 137.5488, - "step": 23180 - }, - { - "epoch": 0.18738031173490413, - "grad_norm": 1084.151611328125, - "learning_rate": 4.8894312114547535e-05, - "loss": 147.1522, - "step": 23190 - }, - { - "epoch": 0.18746111393918827, - "grad_norm": 702.95263671875, - "learning_rate": 4.889225792834991e-05, - "loss": 117.0958, - "step": 23200 - }, - { - "epoch": 0.1875419161434724, - "grad_norm": 770.4234008789062, - "learning_rate": 4.8890201878984796e-05, - "loss": 162.5534, - "step": 23210 - }, - { - "epoch": 0.18762271834775654, - "grad_norm": 875.0130615234375, - "learning_rate": 4.888814396661256e-05, - "loss": 111.7798, - "step": 23220 - }, - { - "epoch": 0.18770352055204065, - "grad_norm": 753.2984008789062, - "learning_rate": 4.8886084191393677e-05, - "loss": 132.3068, - "step": 23230 - }, - { - "epoch": 0.18778432275632478, - "grad_norm": 848.7901611328125, - "learning_rate": 4.888402255348876e-05, - "loss": 160.3534, - "step": 23240 - }, - { - "epoch": 0.18786512496060892, - "grad_norm": 916.5767211914062, - "learning_rate": 4.888195905305859e-05, - "loss": 109.6705, - "step": 23250 - }, - { - "epoch": 0.18794592716489306, - "grad_norm": 1194.894287109375, - "learning_rate": 4.887989369026409e-05, - "loss": 87.2658, - "step": 23260 - }, - { - "epoch": 0.1880267293691772, - "grad_norm": 628.4296264648438, - "learning_rate": 4.887782646526631e-05, - "loss": 120.2924, - "step": 23270 - }, - { - "epoch": 0.18810753157346133, - "grad_norm": 698.8002319335938, - "learning_rate": 4.887575737822645e-05, - "loss": 100.7729, - "step": 23280 - }, - { - "epoch": 0.18818833377774546, - "grad_norm": 1819.4241943359375, - "learning_rate": 4.887368642930588e-05, - "loss": 164.9432, - "step": 23290 - }, - { - "epoch": 0.1882691359820296, - "grad_norm": 1597.739013671875, - "learning_rate": 4.887161361866608e-05, - "loss": 131.7205, - "step": 23300 - }, - { - "epoch": 0.1883499381863137, - "grad_norm": 1857.836669921875, - "learning_rate": 4.8869538946468694e-05, - "loss": 132.2548, - "step": 23310 - }, - { - "epoch": 0.18843074039059785, - "grad_norm": 867.8088989257812, - "learning_rate": 4.8867462412875526e-05, - "loss": 144.9032, - "step": 23320 - }, - { - "epoch": 0.18851154259488198, - "grad_norm": 579.5343017578125, - "learning_rate": 4.8865384018048494e-05, - "loss": 131.9404, - "step": 23330 - }, - { - "epoch": 0.18859234479916612, - "grad_norm": 1134.3775634765625, - "learning_rate": 4.886330376214968e-05, - "loss": 119.8966, - "step": 23340 - }, - { - "epoch": 0.18867314700345025, - "grad_norm": 641.1632080078125, - "learning_rate": 4.886122164534131e-05, - "loss": 107.3825, - "step": 23350 - }, - { - "epoch": 0.1887539492077344, - "grad_norm": 2375.10546875, - "learning_rate": 4.8859137667785735e-05, - "loss": 197.8785, - "step": 23360 - }, - { - "epoch": 0.18883475141201853, - "grad_norm": 1088.5643310546875, - "learning_rate": 4.8857051829645485e-05, - "loss": 111.5751, - "step": 23370 - }, - { - "epoch": 0.18891555361630266, - "grad_norm": 636.4822387695312, - "learning_rate": 4.88549641310832e-05, - "loss": 117.8517, - "step": 23380 - }, - { - "epoch": 0.1889963558205868, - "grad_norm": 2928.708251953125, - "learning_rate": 4.885287457226172e-05, - "loss": 172.9106, - "step": 23390 - }, - { - "epoch": 0.1890771580248709, - "grad_norm": 968.6165161132812, - "learning_rate": 4.885078315334395e-05, - "loss": 139.9877, - "step": 23400 - }, - { - "epoch": 0.18915796022915504, - "grad_norm": 1029.96826171875, - "learning_rate": 4.884868987449301e-05, - "loss": 124.8321, - "step": 23410 - }, - { - "epoch": 0.18923876243343918, - "grad_norm": 1649.3917236328125, - "learning_rate": 4.884659473587213e-05, - "loss": 136.1436, - "step": 23420 - }, - { - "epoch": 0.18931956463772331, - "grad_norm": 1318.7757568359375, - "learning_rate": 4.884449773764469e-05, - "loss": 120.2704, - "step": 23430 - }, - { - "epoch": 0.18940036684200745, - "grad_norm": 1546.4759521484375, - "learning_rate": 4.884239887997423e-05, - "loss": 131.8311, - "step": 23440 - }, - { - "epoch": 0.1894811690462916, - "grad_norm": 930.0927734375, - "learning_rate": 4.88402981630244e-05, - "loss": 147.8972, - "step": 23450 - }, - { - "epoch": 0.18956197125057572, - "grad_norm": 1559.9215087890625, - "learning_rate": 4.8838195586959046e-05, - "loss": 130.2989, - "step": 23460 - }, - { - "epoch": 0.18964277345485986, - "grad_norm": 1102.040283203125, - "learning_rate": 4.883609115194211e-05, - "loss": 125.5286, - "step": 23470 - }, - { - "epoch": 0.189723575659144, - "grad_norm": 975.6123657226562, - "learning_rate": 4.8833984858137715e-05, - "loss": 132.8176, - "step": 23480 - }, - { - "epoch": 0.1898043778634281, - "grad_norm": 1110.8406982421875, - "learning_rate": 4.88318767057101e-05, - "loss": 110.7206, - "step": 23490 - }, - { - "epoch": 0.18988518006771224, - "grad_norm": 1832.242919921875, - "learning_rate": 4.882976669482367e-05, - "loss": 132.6625, - "step": 23500 - }, - { - "epoch": 0.18996598227199638, - "grad_norm": 1305.0228271484375, - "learning_rate": 4.882765482564298e-05, - "loss": 138.548, - "step": 23510 - }, - { - "epoch": 0.1900467844762805, - "grad_norm": 1140.833740234375, - "learning_rate": 4.8825541098332706e-05, - "loss": 115.6035, - "step": 23520 - }, - { - "epoch": 0.19012758668056465, - "grad_norm": 1267.034912109375, - "learning_rate": 4.8823425513057674e-05, - "loss": 124.995, - "step": 23530 - }, - { - "epoch": 0.19020838888484878, - "grad_norm": 755.327880859375, - "learning_rate": 4.8821308069982867e-05, - "loss": 117.9264, - "step": 23540 - }, - { - "epoch": 0.19028919108913292, - "grad_norm": 827.6521606445312, - "learning_rate": 4.8819188769273414e-05, - "loss": 121.6657, - "step": 23550 - }, - { - "epoch": 0.19036999329341706, - "grad_norm": 1207.525634765625, - "learning_rate": 4.881706761109458e-05, - "loss": 104.6301, - "step": 23560 - }, - { - "epoch": 0.19045079549770116, - "grad_norm": 559.6730346679688, - "learning_rate": 4.8814944595611776e-05, - "loss": 164.8614, - "step": 23570 - }, - { - "epoch": 0.1905315977019853, - "grad_norm": 911.3177490234375, - "learning_rate": 4.881281972299055e-05, - "loss": 159.3027, - "step": 23580 - }, - { - "epoch": 0.19061239990626944, - "grad_norm": 733.9683837890625, - "learning_rate": 4.881069299339662e-05, - "loss": 116.521, - "step": 23590 - }, - { - "epoch": 0.19069320211055357, - "grad_norm": 1349.188232421875, - "learning_rate": 4.880856440699582e-05, - "loss": 115.2912, - "step": 23600 - }, - { - "epoch": 0.1907740043148377, - "grad_norm": 986.3001098632812, - "learning_rate": 4.8806433963954154e-05, - "loss": 156.8348, - "step": 23610 - }, - { - "epoch": 0.19085480651912184, - "grad_norm": 1179.6162109375, - "learning_rate": 4.880430166443775e-05, - "loss": 160.6499, - "step": 23620 - }, - { - "epoch": 0.19093560872340598, - "grad_norm": 754.2844848632812, - "learning_rate": 4.880216750861288e-05, - "loss": 107.2217, - "step": 23630 - }, - { - "epoch": 0.19101641092769012, - "grad_norm": 555.248291015625, - "learning_rate": 4.880003149664599e-05, - "loss": 136.5164, - "step": 23640 - }, - { - "epoch": 0.19109721313197425, - "grad_norm": 950.3463745117188, - "learning_rate": 4.8797893628703635e-05, - "loss": 134.2859, - "step": 23650 - }, - { - "epoch": 0.19117801533625836, - "grad_norm": 547.9365234375, - "learning_rate": 4.8795753904952534e-05, - "loss": 127.5815, - "step": 23660 - }, - { - "epoch": 0.1912588175405425, - "grad_norm": 1058.050537109375, - "learning_rate": 4.879361232555956e-05, - "loss": 115.3421, - "step": 23670 - }, - { - "epoch": 0.19133961974482663, - "grad_norm": 830.857177734375, - "learning_rate": 4.8791468890691696e-05, - "loss": 141.186, - "step": 23680 - }, - { - "epoch": 0.19142042194911077, - "grad_norm": 882.3456420898438, - "learning_rate": 4.8789323600516104e-05, - "loss": 129.0883, - "step": 23690 - }, - { - "epoch": 0.1915012241533949, - "grad_norm": 794.2307739257812, - "learning_rate": 4.878717645520008e-05, - "loss": 127.3695, - "step": 23700 - }, - { - "epoch": 0.19158202635767904, - "grad_norm": 600.991943359375, - "learning_rate": 4.878502745491106e-05, - "loss": 117.6174, - "step": 23710 - }, - { - "epoch": 0.19166282856196318, - "grad_norm": 774.2867431640625, - "learning_rate": 4.878287659981662e-05, - "loss": 126.4504, - "step": 23720 - }, - { - "epoch": 0.1917436307662473, - "grad_norm": 1183.3017578125, - "learning_rate": 4.87807238900845e-05, - "loss": 131.0533, - "step": 23730 - }, - { - "epoch": 0.19182443297053145, - "grad_norm": 970.2349243164062, - "learning_rate": 4.877856932588257e-05, - "loss": 177.7648, - "step": 23740 - }, - { - "epoch": 0.19190523517481556, - "grad_norm": 754.4205932617188, - "learning_rate": 4.877641290737884e-05, - "loss": 140.9191, - "step": 23750 - }, - { - "epoch": 0.1919860373790997, - "grad_norm": 612.7131958007812, - "learning_rate": 4.877425463474148e-05, - "loss": 153.4565, - "step": 23760 - }, - { - "epoch": 0.19206683958338383, - "grad_norm": 698.0365600585938, - "learning_rate": 4.8772094508138796e-05, - "loss": 152.0398, - "step": 23770 - }, - { - "epoch": 0.19214764178766797, - "grad_norm": 417.0340576171875, - "learning_rate": 4.8769932527739225e-05, - "loss": 110.5045, - "step": 23780 - }, - { - "epoch": 0.1922284439919521, - "grad_norm": 1337.1680908203125, - "learning_rate": 4.876776869371139e-05, - "loss": 134.4096, - "step": 23790 - }, - { - "epoch": 0.19230924619623624, - "grad_norm": 1037.9677734375, - "learning_rate": 4.8765603006224006e-05, - "loss": 149.5955, - "step": 23800 - }, - { - "epoch": 0.19239004840052037, - "grad_norm": 917.897705078125, - "learning_rate": 4.8763435465445964e-05, - "loss": 168.2867, - "step": 23810 - }, - { - "epoch": 0.1924708506048045, - "grad_norm": 825.4429321289062, - "learning_rate": 4.87612660715463e-05, - "loss": 115.5069, - "step": 23820 - }, - { - "epoch": 0.19255165280908862, - "grad_norm": 1952.914794921875, - "learning_rate": 4.8759094824694184e-05, - "loss": 143.909, - "step": 23830 - }, - { - "epoch": 0.19263245501337276, - "grad_norm": 745.7085571289062, - "learning_rate": 4.8756921725058934e-05, - "loss": 125.5558, - "step": 23840 - }, - { - "epoch": 0.1927132572176569, - "grad_norm": 1505.0206298828125, - "learning_rate": 4.875474677281002e-05, - "loss": 142.473, - "step": 23850 - }, - { - "epoch": 0.19279405942194103, - "grad_norm": 1388.0023193359375, - "learning_rate": 4.875256996811703e-05, - "loss": 109.5966, - "step": 23860 - }, - { - "epoch": 0.19287486162622516, - "grad_norm": 874.41845703125, - "learning_rate": 4.875039131114975e-05, - "loss": 145.326, - "step": 23870 - }, - { - "epoch": 0.1929556638305093, - "grad_norm": 807.219970703125, - "learning_rate": 4.874821080207803e-05, - "loss": 126.3294, - "step": 23880 - }, - { - "epoch": 0.19303646603479344, - "grad_norm": 2252.218505859375, - "learning_rate": 4.8746028441071943e-05, - "loss": 129.4108, - "step": 23890 - }, - { - "epoch": 0.19311726823907757, - "grad_norm": 814.71630859375, - "learning_rate": 4.874384422830167e-05, - "loss": 104.119, - "step": 23900 - }, - { - "epoch": 0.1931980704433617, - "grad_norm": 957.2908325195312, - "learning_rate": 4.874165816393754e-05, - "loss": 114.3851, - "step": 23910 - }, - { - "epoch": 0.19327887264764582, - "grad_norm": 2235.653076171875, - "learning_rate": 4.873947024815002e-05, - "loss": 170.2336, - "step": 23920 - }, - { - "epoch": 0.19335967485192995, - "grad_norm": 1687.2611083984375, - "learning_rate": 4.8737280481109724e-05, - "loss": 170.5733, - "step": 23930 - }, - { - "epoch": 0.1934404770562141, - "grad_norm": 785.6593017578125, - "learning_rate": 4.873508886298743e-05, - "loss": 127.0746, - "step": 23940 - }, - { - "epoch": 0.19352127926049822, - "grad_norm": 1444.524169921875, - "learning_rate": 4.8732895393954036e-05, - "loss": 132.862, - "step": 23950 - }, - { - "epoch": 0.19360208146478236, - "grad_norm": 2346.66796875, - "learning_rate": 4.873070007418059e-05, - "loss": 147.9806, - "step": 23960 - }, - { - "epoch": 0.1936828836690665, - "grad_norm": 932.36376953125, - "learning_rate": 4.8728502903838295e-05, - "loss": 164.3039, - "step": 23970 - }, - { - "epoch": 0.19376368587335063, - "grad_norm": 852.3303833007812, - "learning_rate": 4.872630388309849e-05, - "loss": 143.2625, - "step": 23980 - }, - { - "epoch": 0.19384448807763477, - "grad_norm": 1062.9395751953125, - "learning_rate": 4.872410301213265e-05, - "loss": 124.879, - "step": 23990 - }, - { - "epoch": 0.19392529028191888, - "grad_norm": 1774.3748779296875, - "learning_rate": 4.8721900291112415e-05, - "loss": 170.4284, - "step": 24000 - }, - { - "epoch": 0.194006092486203, - "grad_norm": 791.8446655273438, - "learning_rate": 4.871969572020955e-05, - "loss": 100.5316, - "step": 24010 - }, - { - "epoch": 0.19408689469048715, - "grad_norm": 2738.352783203125, - "learning_rate": 4.871748929959598e-05, - "loss": 182.8135, - "step": 24020 - }, - { - "epoch": 0.19416769689477129, - "grad_norm": 654.8862915039062, - "learning_rate": 4.871528102944376e-05, - "loss": 137.2547, - "step": 24030 - }, - { - "epoch": 0.19424849909905542, - "grad_norm": 507.965087890625, - "learning_rate": 4.8713070909925094e-05, - "loss": 144.5703, - "step": 24040 - }, - { - "epoch": 0.19432930130333956, - "grad_norm": 1416.873291015625, - "learning_rate": 4.871085894121233e-05, - "loss": 142.8553, - "step": 24050 - }, - { - "epoch": 0.1944101035076237, - "grad_norm": 1289.441650390625, - "learning_rate": 4.870864512347797e-05, - "loss": 103.1509, - "step": 24060 - }, - { - "epoch": 0.19449090571190783, - "grad_norm": 1232.321044921875, - "learning_rate": 4.870642945689465e-05, - "loss": 112.1742, - "step": 24070 - }, - { - "epoch": 0.19457170791619197, - "grad_norm": 1055.31201171875, - "learning_rate": 4.870421194163515e-05, - "loss": 146.1467, - "step": 24080 - }, - { - "epoch": 0.19465251012047607, - "grad_norm": 1095.24658203125, - "learning_rate": 4.87019925778724e-05, - "loss": 138.6979, - "step": 24090 - }, - { - "epoch": 0.1947333123247602, - "grad_norm": 1802.641357421875, - "learning_rate": 4.8699771365779453e-05, - "loss": 120.9011, - "step": 24100 - }, - { - "epoch": 0.19481411452904435, - "grad_norm": 1350.2144775390625, - "learning_rate": 4.869754830552956e-05, - "loss": 129.1392, - "step": 24110 - }, - { - "epoch": 0.19489491673332848, - "grad_norm": 776.7698364257812, - "learning_rate": 4.8695323397296044e-05, - "loss": 129.7124, - "step": 24120 - }, - { - "epoch": 0.19497571893761262, - "grad_norm": 1315.6859130859375, - "learning_rate": 4.8693096641252424e-05, - "loss": 112.2434, - "step": 24130 - }, - { - "epoch": 0.19505652114189675, - "grad_norm": 1351.2559814453125, - "learning_rate": 4.8690868037572346e-05, - "loss": 98.5212, - "step": 24140 - }, - { - "epoch": 0.1951373233461809, - "grad_norm": 2370.138427734375, - "learning_rate": 4.8688637586429595e-05, - "loss": 133.2732, - "step": 24150 - }, - { - "epoch": 0.19521812555046503, - "grad_norm": 2544.447509765625, - "learning_rate": 4.8686405287998116e-05, - "loss": 163.5476, - "step": 24160 - }, - { - "epoch": 0.19529892775474916, - "grad_norm": 1037.4510498046875, - "learning_rate": 4.8684171142451986e-05, - "loss": 113.5169, - "step": 24170 - }, - { - "epoch": 0.19537972995903327, - "grad_norm": 711.7839965820312, - "learning_rate": 4.8681935149965416e-05, - "loss": 149.0609, - "step": 24180 - }, - { - "epoch": 0.1954605321633174, - "grad_norm": 972.8963623046875, - "learning_rate": 4.867969731071279e-05, - "loss": 144.4308, - "step": 24190 - }, - { - "epoch": 0.19554133436760154, - "grad_norm": 2454.304443359375, - "learning_rate": 4.867745762486861e-05, - "loss": 134.2548, - "step": 24200 - }, - { - "epoch": 0.19562213657188568, - "grad_norm": 1283.4901123046875, - "learning_rate": 4.867521609260754e-05, - "loss": 126.5963, - "step": 24210 - }, - { - "epoch": 0.19570293877616982, - "grad_norm": 786.9524536132812, - "learning_rate": 4.8672972714104357e-05, - "loss": 113.1498, - "step": 24220 - }, - { - "epoch": 0.19578374098045395, - "grad_norm": 1864.1240234375, - "learning_rate": 4.8670727489534034e-05, - "loss": 157.5092, - "step": 24230 - }, - { - "epoch": 0.1958645431847381, - "grad_norm": 1075.14111328125, - "learning_rate": 4.866848041907164e-05, - "loss": 162.1051, - "step": 24240 - }, - { - "epoch": 0.19594534538902222, - "grad_norm": 806.90576171875, - "learning_rate": 4.8666231502892415e-05, - "loss": 137.7023, - "step": 24250 - }, - { - "epoch": 0.19602614759330633, - "grad_norm": 721.8324584960938, - "learning_rate": 4.8663980741171724e-05, - "loss": 128.3464, - "step": 24260 - }, - { - "epoch": 0.19610694979759047, - "grad_norm": 1365.617919921875, - "learning_rate": 4.86617281340851e-05, - "loss": 122.2167, - "step": 24270 - }, - { - "epoch": 0.1961877520018746, - "grad_norm": 1113.8824462890625, - "learning_rate": 4.865947368180818e-05, - "loss": 162.3049, - "step": 24280 - }, - { - "epoch": 0.19626855420615874, - "grad_norm": 893.8717651367188, - "learning_rate": 4.86572173845168e-05, - "loss": 140.1659, - "step": 24290 - }, - { - "epoch": 0.19634935641044288, - "grad_norm": 750.738525390625, - "learning_rate": 4.8654959242386896e-05, - "loss": 173.5381, - "step": 24300 - }, - { - "epoch": 0.196430158614727, - "grad_norm": 1939.13427734375, - "learning_rate": 4.865269925559457e-05, - "loss": 151.1877, - "step": 24310 - }, - { - "epoch": 0.19651096081901115, - "grad_norm": 920.9683837890625, - "learning_rate": 4.865043742431605e-05, - "loss": 143.5133, - "step": 24320 - }, - { - "epoch": 0.19659176302329529, - "grad_norm": 1179.9365234375, - "learning_rate": 4.864817374872773e-05, - "loss": 144.763, - "step": 24330 - }, - { - "epoch": 0.19667256522757942, - "grad_norm": 730.5543823242188, - "learning_rate": 4.8645908229006135e-05, - "loss": 82.3449, - "step": 24340 - }, - { - "epoch": 0.19675336743186353, - "grad_norm": 1409.7437744140625, - "learning_rate": 4.864364086532792e-05, - "loss": 93.6332, - "step": 24350 - }, - { - "epoch": 0.19683416963614767, - "grad_norm": 932.4425048828125, - "learning_rate": 4.8641371657869916e-05, - "loss": 154.6392, - "step": 24360 - }, - { - "epoch": 0.1969149718404318, - "grad_norm": 1059.22705078125, - "learning_rate": 4.863910060680907e-05, - "loss": 144.6276, - "step": 24370 - }, - { - "epoch": 0.19699577404471594, - "grad_norm": 5171.248046875, - "learning_rate": 4.863682771232248e-05, - "loss": 197.1616, - "step": 24380 - }, - { - "epoch": 0.19707657624900007, - "grad_norm": 902.489990234375, - "learning_rate": 4.8634552974587414e-05, - "loss": 138.603, - "step": 24390 - }, - { - "epoch": 0.1971573784532842, - "grad_norm": 3003.037353515625, - "learning_rate": 4.863227639378124e-05, - "loss": 174.8408, - "step": 24400 - }, - { - "epoch": 0.19723818065756835, - "grad_norm": 1119.216796875, - "learning_rate": 4.862999797008149e-05, - "loss": 122.6092, - "step": 24410 - }, - { - "epoch": 0.19731898286185248, - "grad_norm": 1428.86669921875, - "learning_rate": 4.862771770366584e-05, - "loss": 160.3341, - "step": 24420 - }, - { - "epoch": 0.1973997850661366, - "grad_norm": 959.92333984375, - "learning_rate": 4.862543559471212e-05, - "loss": 74.3368, - "step": 24430 - }, - { - "epoch": 0.19748058727042073, - "grad_norm": 978.4984741210938, - "learning_rate": 4.862315164339829e-05, - "loss": 118.756, - "step": 24440 - }, - { - "epoch": 0.19756138947470486, - "grad_norm": 815.65234375, - "learning_rate": 4.8620865849902456e-05, - "loss": 99.8532, - "step": 24450 - }, - { - "epoch": 0.197642191678989, - "grad_norm": 1076.5560302734375, - "learning_rate": 4.861857821440287e-05, - "loss": 107.3878, - "step": 24460 - }, - { - "epoch": 0.19772299388327313, - "grad_norm": 1435.7271728515625, - "learning_rate": 4.861628873707792e-05, - "loss": 124.2357, - "step": 24470 - }, - { - "epoch": 0.19780379608755727, - "grad_norm": 1026.2786865234375, - "learning_rate": 4.861399741810615e-05, - "loss": 144.9332, - "step": 24480 - }, - { - "epoch": 0.1978845982918414, - "grad_norm": 1504.50390625, - "learning_rate": 4.861170425766625e-05, - "loss": 119.4577, - "step": 24490 - }, - { - "epoch": 0.19796540049612554, - "grad_norm": 1465.103271484375, - "learning_rate": 4.860940925593703e-05, - "loss": 170.8604, - "step": 24500 - }, - { - "epoch": 0.19804620270040968, - "grad_norm": 1746.6812744140625, - "learning_rate": 4.8607112413097464e-05, - "loss": 133.7556, - "step": 24510 - }, - { - "epoch": 0.1981270049046938, - "grad_norm": 1001.8831176757812, - "learning_rate": 4.860481372932667e-05, - "loss": 129.3315, - "step": 24520 - }, - { - "epoch": 0.19820780710897792, - "grad_norm": 1171.9906005859375, - "learning_rate": 4.8602513204803896e-05, - "loss": 163.0627, - "step": 24530 - }, - { - "epoch": 0.19828860931326206, - "grad_norm": 1638.5550537109375, - "learning_rate": 4.860021083970855e-05, - "loss": 124.2955, - "step": 24540 - }, - { - "epoch": 0.1983694115175462, - "grad_norm": 734.9754028320312, - "learning_rate": 4.859790663422016e-05, - "loss": 125.2358, - "step": 24550 - }, - { - "epoch": 0.19845021372183033, - "grad_norm": 928.3885498046875, - "learning_rate": 4.859560058851844e-05, - "loss": 93.3567, - "step": 24560 - }, - { - "epoch": 0.19853101592611447, - "grad_norm": 1441.7003173828125, - "learning_rate": 4.859329270278319e-05, - "loss": 119.0235, - "step": 24570 - }, - { - "epoch": 0.1986118181303986, - "grad_norm": 1037.3463134765625, - "learning_rate": 4.85909829771944e-05, - "loss": 164.7315, - "step": 24580 - }, - { - "epoch": 0.19869262033468274, - "grad_norm": 4888.283203125, - "learning_rate": 4.858867141193219e-05, - "loss": 151.8262, - "step": 24590 - }, - { - "epoch": 0.19877342253896688, - "grad_norm": 1644.477783203125, - "learning_rate": 4.858635800717681e-05, - "loss": 124.393, - "step": 24600 - }, - { - "epoch": 0.19885422474325098, - "grad_norm": 712.972412109375, - "learning_rate": 4.8584042763108675e-05, - "loss": 108.2681, - "step": 24610 - }, - { - "epoch": 0.19893502694753512, - "grad_norm": 912.517578125, - "learning_rate": 4.8581725679908317e-05, - "loss": 204.6492, - "step": 24620 - }, - { - "epoch": 0.19901582915181926, - "grad_norm": 756.3255615234375, - "learning_rate": 4.8579406757756455e-05, - "loss": 122.0328, - "step": 24630 - }, - { - "epoch": 0.1990966313561034, - "grad_norm": 1632.58203125, - "learning_rate": 4.857708599683389e-05, - "loss": 120.6596, - "step": 24640 - }, - { - "epoch": 0.19917743356038753, - "grad_norm": 889.685546875, - "learning_rate": 4.8574763397321614e-05, - "loss": 86.9638, - "step": 24650 - }, - { - "epoch": 0.19925823576467167, - "grad_norm": 1112.0284423828125, - "learning_rate": 4.857243895940076e-05, - "loss": 109.0597, - "step": 24660 - }, - { - "epoch": 0.1993390379689558, - "grad_norm": 1187.57666015625, - "learning_rate": 4.8570112683252565e-05, - "loss": 142.5116, - "step": 24670 - }, - { - "epoch": 0.19941984017323994, - "grad_norm": 460.8227844238281, - "learning_rate": 4.856778456905846e-05, - "loss": 129.4077, - "step": 24680 - }, - { - "epoch": 0.19950064237752405, - "grad_norm": 909.6550903320312, - "learning_rate": 4.8565454617e-05, - "loss": 99.7407, - "step": 24690 - }, - { - "epoch": 0.19958144458180818, - "grad_norm": 1926.100341796875, - "learning_rate": 4.856312282725886e-05, - "loss": 186.8667, - "step": 24700 - }, - { - "epoch": 0.19966224678609232, - "grad_norm": 1169.4407958984375, - "learning_rate": 4.8560789200016884e-05, - "loss": 115.1627, - "step": 24710 - }, - { - "epoch": 0.19974304899037645, - "grad_norm": 653.35302734375, - "learning_rate": 4.855845373545605e-05, - "loss": 193.9355, - "step": 24720 - }, - { - "epoch": 0.1998238511946606, - "grad_norm": 980.9020385742188, - "learning_rate": 4.85561164337585e-05, - "loss": 118.0105, - "step": 24730 - }, - { - "epoch": 0.19990465339894473, - "grad_norm": 852.4310302734375, - "learning_rate": 4.855377729510648e-05, - "loss": 97.2902, - "step": 24740 - }, - { - "epoch": 0.19998545560322886, - "grad_norm": 904.9118041992188, - "learning_rate": 4.855143631968242e-05, - "loss": 96.693, - "step": 24750 - }, - { - "epoch": 0.200066257807513, - "grad_norm": 658.8388671875, - "learning_rate": 4.8549093507668865e-05, - "loss": 145.2285, - "step": 24760 - }, - { - "epoch": 0.20014706001179713, - "grad_norm": 874.3348388671875, - "learning_rate": 4.85467488592485e-05, - "loss": 103.5924, - "step": 24770 - }, - { - "epoch": 0.20022786221608124, - "grad_norm": 3024.259033203125, - "learning_rate": 4.854440237460418e-05, - "loss": 160.1678, - "step": 24780 - }, - { - "epoch": 0.20030866442036538, - "grad_norm": 567.3709716796875, - "learning_rate": 4.85420540539189e-05, - "loss": 121.92, - "step": 24790 - }, - { - "epoch": 0.20038946662464951, - "grad_norm": 2176.835205078125, - "learning_rate": 4.8539703897375755e-05, - "loss": 129.3314, - "step": 24800 - }, - { - "epoch": 0.20047026882893365, - "grad_norm": 1329.7113037109375, - "learning_rate": 4.853735190515804e-05, - "loss": 144.1131, - "step": 24810 - }, - { - "epoch": 0.2005510710332178, - "grad_norm": 690.2374877929688, - "learning_rate": 4.853499807744916e-05, - "loss": 114.6445, - "step": 24820 - }, - { - "epoch": 0.20063187323750192, - "grad_norm": 1003.6576538085938, - "learning_rate": 4.8532642414432674e-05, - "loss": 125.5531, - "step": 24830 - }, - { - "epoch": 0.20071267544178606, - "grad_norm": 1643.6593017578125, - "learning_rate": 4.853028491629228e-05, - "loss": 129.8056, - "step": 24840 - }, - { - "epoch": 0.2007934776460702, - "grad_norm": 1115.5286865234375, - "learning_rate": 4.852792558321182e-05, - "loss": 117.099, - "step": 24850 - }, - { - "epoch": 0.20087427985035433, - "grad_norm": 869.8193359375, - "learning_rate": 4.852556441537528e-05, - "loss": 137.8783, - "step": 24860 - }, - { - "epoch": 0.20095508205463844, - "grad_norm": 1346.8846435546875, - "learning_rate": 4.852320141296679e-05, - "loss": 178.0168, - "step": 24870 - }, - { - "epoch": 0.20103588425892258, - "grad_norm": 6745.25146484375, - "learning_rate": 4.852083657617061e-05, - "loss": 162.7681, - "step": 24880 - }, - { - "epoch": 0.2011166864632067, - "grad_norm": 822.9663696289062, - "learning_rate": 4.851846990517118e-05, - "loss": 172.4344, - "step": 24890 - }, - { - "epoch": 0.20119748866749085, - "grad_norm": 992.9334106445312, - "learning_rate": 4.851610140015304e-05, - "loss": 151.3646, - "step": 24900 - }, - { - "epoch": 0.20127829087177498, - "grad_norm": 1492.8782958984375, - "learning_rate": 4.8513731061300887e-05, - "loss": 156.8174, - "step": 24910 - }, - { - "epoch": 0.20135909307605912, - "grad_norm": 555.8761596679688, - "learning_rate": 4.851135888879958e-05, - "loss": 93.0914, - "step": 24920 - }, - { - "epoch": 0.20143989528034326, - "grad_norm": 540.625244140625, - "learning_rate": 4.85089848828341e-05, - "loss": 148.2333, - "step": 24930 - }, - { - "epoch": 0.2015206974846274, - "grad_norm": 930.197509765625, - "learning_rate": 4.850660904358956e-05, - "loss": 84.9851, - "step": 24940 - }, - { - "epoch": 0.2016014996889115, - "grad_norm": 919.1648559570312, - "learning_rate": 4.8504231371251255e-05, - "loss": 102.8788, - "step": 24950 - }, - { - "epoch": 0.20168230189319564, - "grad_norm": 779.4244384765625, - "learning_rate": 4.85018518660046e-05, - "loss": 156.6919, - "step": 24960 - }, - { - "epoch": 0.20176310409747977, - "grad_norm": 1119.236328125, - "learning_rate": 4.849947052803514e-05, - "loss": 137.3522, - "step": 24970 - }, - { - "epoch": 0.2018439063017639, - "grad_norm": 1217.1375732421875, - "learning_rate": 4.849708735752859e-05, - "loss": 154.3695, - "step": 24980 - }, - { - "epoch": 0.20192470850604805, - "grad_norm": 909.9269409179688, - "learning_rate": 4.849470235467078e-05, - "loss": 126.6971, - "step": 24990 - }, - { - "epoch": 0.20200551071033218, - "grad_norm": 771.6845092773438, - "learning_rate": 4.849231551964771e-05, - "loss": 168.5038, - "step": 25000 - }, - { - "epoch": 0.20208631291461632, - "grad_norm": 486.8782958984375, - "learning_rate": 4.8489926852645505e-05, - "loss": 141.0097, - "step": 25010 - }, - { - "epoch": 0.20216711511890045, - "grad_norm": 1233.391845703125, - "learning_rate": 4.8487536353850444e-05, - "loss": 163.7804, - "step": 25020 - }, - { - "epoch": 0.2022479173231846, - "grad_norm": 755.8713989257812, - "learning_rate": 4.8485144023448936e-05, - "loss": 114.9309, - "step": 25030 - }, - { - "epoch": 0.2023287195274687, - "grad_norm": 1288.98876953125, - "learning_rate": 4.848274986162754e-05, - "loss": 128.1256, - "step": 25040 - }, - { - "epoch": 0.20240952173175283, - "grad_norm": 1058.4481201171875, - "learning_rate": 4.848035386857296e-05, - "loss": 108.0794, - "step": 25050 - }, - { - "epoch": 0.20249032393603697, - "grad_norm": 884.2367553710938, - "learning_rate": 4.847795604447204e-05, - "loss": 159.4138, - "step": 25060 - }, - { - "epoch": 0.2025711261403211, - "grad_norm": 1407.5733642578125, - "learning_rate": 4.847555638951177e-05, - "loss": 175.659, - "step": 25070 - }, - { - "epoch": 0.20265192834460524, - "grad_norm": 1126.87158203125, - "learning_rate": 4.8473154903879276e-05, - "loss": 147.1258, - "step": 25080 - }, - { - "epoch": 0.20273273054888938, - "grad_norm": 783.553955078125, - "learning_rate": 4.847075158776183e-05, - "loss": 114.147, - "step": 25090 - }, - { - "epoch": 0.20281353275317351, - "grad_norm": 1147.4105224609375, - "learning_rate": 4.846834644134686e-05, - "loss": 118.9104, - "step": 25100 - }, - { - "epoch": 0.20289433495745765, - "grad_norm": 1978.75634765625, - "learning_rate": 4.84659394648219e-05, - "loss": 138.3229, - "step": 25110 - }, - { - "epoch": 0.20297513716174176, - "grad_norm": 920.2445068359375, - "learning_rate": 4.846353065837467e-05, - "loss": 192.1806, - "step": 25120 - }, - { - "epoch": 0.2030559393660259, - "grad_norm": 868.54052734375, - "learning_rate": 4.846112002219301e-05, - "loss": 104.2504, - "step": 25130 - }, - { - "epoch": 0.20313674157031003, - "grad_norm": 860.5809936523438, - "learning_rate": 4.845870755646491e-05, - "loss": 126.0064, - "step": 25140 - }, - { - "epoch": 0.20321754377459417, - "grad_norm": 959.9432983398438, - "learning_rate": 4.845629326137849e-05, - "loss": 92.2342, - "step": 25150 - }, - { - "epoch": 0.2032983459788783, - "grad_norm": 1110.1014404296875, - "learning_rate": 4.845387713712203e-05, - "loss": 140.1007, - "step": 25160 - }, - { - "epoch": 0.20337914818316244, - "grad_norm": 635.9019165039062, - "learning_rate": 4.845145918388393e-05, - "loss": 120.0254, - "step": 25170 - }, - { - "epoch": 0.20345995038744658, - "grad_norm": 734.4845581054688, - "learning_rate": 4.844903940185276e-05, - "loss": 110.2926, - "step": 25180 - }, - { - "epoch": 0.2035407525917307, - "grad_norm": 912.1077270507812, - "learning_rate": 4.844661779121723e-05, - "loss": 126.2406, - "step": 25190 - }, - { - "epoch": 0.20362155479601485, - "grad_norm": 1300.3662109375, - "learning_rate": 4.844419435216615e-05, - "loss": 101.5818, - "step": 25200 - }, - { - "epoch": 0.20370235700029896, - "grad_norm": 824.0321044921875, - "learning_rate": 4.8441769084888534e-05, - "loss": 117.3934, - "step": 25210 - }, - { - "epoch": 0.2037831592045831, - "grad_norm": 1310.4876708984375, - "learning_rate": 4.84393419895735e-05, - "loss": 128.4549, - "step": 25220 - }, - { - "epoch": 0.20386396140886723, - "grad_norm": 931.2530517578125, - "learning_rate": 4.8436913066410316e-05, - "loss": 124.6242, - "step": 25230 - }, - { - "epoch": 0.20394476361315136, - "grad_norm": 1500.2799072265625, - "learning_rate": 4.843448231558839e-05, - "loss": 194.1914, - "step": 25240 - }, - { - "epoch": 0.2040255658174355, - "grad_norm": 1374.7579345703125, - "learning_rate": 4.843204973729729e-05, - "loss": 120.8163, - "step": 25250 - }, - { - "epoch": 0.20410636802171964, - "grad_norm": 487.600341796875, - "learning_rate": 4.84296153317267e-05, - "loss": 106.4792, - "step": 25260 - }, - { - "epoch": 0.20418717022600377, - "grad_norm": 851.5857543945312, - "learning_rate": 4.842717909906647e-05, - "loss": 129.366, - "step": 25270 - }, - { - "epoch": 0.2042679724302879, - "grad_norm": 671.7532348632812, - "learning_rate": 4.8424741039506575e-05, - "loss": 132.9468, - "step": 25280 - }, - { - "epoch": 0.20434877463457204, - "grad_norm": 1049.375, - "learning_rate": 4.8422301153237145e-05, - "loss": 105.2185, - "step": 25290 - }, - { - "epoch": 0.20442957683885615, - "grad_norm": 1166.4991455078125, - "learning_rate": 4.841985944044845e-05, - "loss": 108.833, - "step": 25300 - }, - { - "epoch": 0.2045103790431403, - "grad_norm": 1032.887451171875, - "learning_rate": 4.8417415901330886e-05, - "loss": 137.9159, - "step": 25310 - }, - { - "epoch": 0.20459118124742443, - "grad_norm": 990.6446533203125, - "learning_rate": 4.8414970536075024e-05, - "loss": 130.2095, - "step": 25320 - }, - { - "epoch": 0.20467198345170856, - "grad_norm": 520.9937744140625, - "learning_rate": 4.841252334487154e-05, - "loss": 118.172, - "step": 25330 - }, - { - "epoch": 0.2047527856559927, - "grad_norm": 1301.862060546875, - "learning_rate": 4.841007432791129e-05, - "loss": 99.7393, - "step": 25340 - }, - { - "epoch": 0.20483358786027683, - "grad_norm": 2645.5419921875, - "learning_rate": 4.8407623485385234e-05, - "loss": 175.9606, - "step": 25350 - }, - { - "epoch": 0.20491439006456097, - "grad_norm": 1080.4825439453125, - "learning_rate": 4.8405170817484515e-05, - "loss": 138.9016, - "step": 25360 - }, - { - "epoch": 0.2049951922688451, - "grad_norm": 929.4873657226562, - "learning_rate": 4.840271632440038e-05, - "loss": 100.8917, - "step": 25370 - }, - { - "epoch": 0.20507599447312921, - "grad_norm": 1421.6605224609375, - "learning_rate": 4.8400260006324235e-05, - "loss": 121.3161, - "step": 25380 - }, - { - "epoch": 0.20515679667741335, - "grad_norm": 2122.27734375, - "learning_rate": 4.8397801863447635e-05, - "loss": 151.1918, - "step": 25390 - }, - { - "epoch": 0.20523759888169749, - "grad_norm": 684.4984741210938, - "learning_rate": 4.839534189596228e-05, - "loss": 109.27, - "step": 25400 - }, - { - "epoch": 0.20531840108598162, - "grad_norm": 677.5164184570312, - "learning_rate": 4.839288010405998e-05, - "loss": 119.8391, - "step": 25410 - }, - { - "epoch": 0.20539920329026576, - "grad_norm": 1245.94775390625, - "learning_rate": 4.8390416487932733e-05, - "loss": 140.2648, - "step": 25420 - }, - { - "epoch": 0.2054800054945499, - "grad_norm": 712.5291137695312, - "learning_rate": 4.838795104777265e-05, - "loss": 151.7347, - "step": 25430 - }, - { - "epoch": 0.20556080769883403, - "grad_norm": 1312.07763671875, - "learning_rate": 4.8385483783771986e-05, - "loss": 141.6269, - "step": 25440 - }, - { - "epoch": 0.20564160990311817, - "grad_norm": 1205.7305908203125, - "learning_rate": 4.8383014696123144e-05, - "loss": 144.6157, - "step": 25450 - }, - { - "epoch": 0.2057224121074023, - "grad_norm": 907.8541870117188, - "learning_rate": 4.8380543785018677e-05, - "loss": 144.2005, - "step": 25460 - }, - { - "epoch": 0.2058032143116864, - "grad_norm": 2062.184326171875, - "learning_rate": 4.837807105065127e-05, - "loss": 175.2769, - "step": 25470 - }, - { - "epoch": 0.20588401651597055, - "grad_norm": 2498.91357421875, - "learning_rate": 4.837559649321374e-05, - "loss": 171.3787, - "step": 25480 - }, - { - "epoch": 0.20596481872025468, - "grad_norm": 1122.5225830078125, - "learning_rate": 4.837312011289907e-05, - "loss": 108.6965, - "step": 25490 - }, - { - "epoch": 0.20604562092453882, - "grad_norm": 956.6959228515625, - "learning_rate": 4.837064190990036e-05, - "loss": 126.9656, - "step": 25500 - }, - { - "epoch": 0.20612642312882296, - "grad_norm": 1437.0673828125, - "learning_rate": 4.836816188441089e-05, - "loss": 132.1063, - "step": 25510 - }, - { - "epoch": 0.2062072253331071, - "grad_norm": 1846.10107421875, - "learning_rate": 4.8365680036624026e-05, - "loss": 118.0759, - "step": 25520 - }, - { - "epoch": 0.20628802753739123, - "grad_norm": 1095.728759765625, - "learning_rate": 4.836319636673334e-05, - "loss": 164.069, - "step": 25530 - }, - { - "epoch": 0.20636882974167536, - "grad_norm": 790.3544921875, - "learning_rate": 4.8360710874932485e-05, - "loss": 139.6676, - "step": 25540 - }, - { - "epoch": 0.20644963194595947, - "grad_norm": 1117.2850341796875, - "learning_rate": 4.8358223561415304e-05, - "loss": 191.3539, - "step": 25550 - }, - { - "epoch": 0.2065304341502436, - "grad_norm": 864.4963989257812, - "learning_rate": 4.8355734426375753e-05, - "loss": 106.5522, - "step": 25560 - }, - { - "epoch": 0.20661123635452774, - "grad_norm": 1150.80419921875, - "learning_rate": 4.8353243470007944e-05, - "loss": 137.633, - "step": 25570 - }, - { - "epoch": 0.20669203855881188, - "grad_norm": 711.8440551757812, - "learning_rate": 4.835075069250613e-05, - "loss": 135.9585, - "step": 25580 - }, - { - "epoch": 0.20677284076309602, - "grad_norm": 834.6475219726562, - "learning_rate": 4.8348256094064695e-05, - "loss": 143.0081, - "step": 25590 - }, - { - "epoch": 0.20685364296738015, - "grad_norm": 890.3765869140625, - "learning_rate": 4.834575967487817e-05, - "loss": 136.8771, - "step": 25600 - }, - { - "epoch": 0.2069344451716643, - "grad_norm": 1103.1605224609375, - "learning_rate": 4.8343261435141244e-05, - "loss": 118.1462, - "step": 25610 - }, - { - "epoch": 0.20701524737594842, - "grad_norm": 793.703125, - "learning_rate": 4.834076137504873e-05, - "loss": 163.1901, - "step": 25620 - }, - { - "epoch": 0.20709604958023256, - "grad_norm": 1009.6405639648438, - "learning_rate": 4.833825949479558e-05, - "loss": 116.1526, - "step": 25630 - }, - { - "epoch": 0.20717685178451667, - "grad_norm": 1411.4498291015625, - "learning_rate": 4.833575579457691e-05, - "loss": 127.8893, - "step": 25640 - }, - { - "epoch": 0.2072576539888008, - "grad_norm": 1009.4644775390625, - "learning_rate": 4.833325027458795e-05, - "loss": 113.0567, - "step": 25650 - }, - { - "epoch": 0.20733845619308494, - "grad_norm": 1513.2891845703125, - "learning_rate": 4.83307429350241e-05, - "loss": 116.2465, - "step": 25660 - }, - { - "epoch": 0.20741925839736908, - "grad_norm": 983.2410888671875, - "learning_rate": 4.832823377608087e-05, - "loss": 125.5506, - "step": 25670 - }, - { - "epoch": 0.2075000606016532, - "grad_norm": 1268.5283203125, - "learning_rate": 4.8325722797953945e-05, - "loss": 117.4708, - "step": 25680 - }, - { - "epoch": 0.20758086280593735, - "grad_norm": 1357.982666015625, - "learning_rate": 4.8323210000839124e-05, - "loss": 135.2106, - "step": 25690 - }, - { - "epoch": 0.20766166501022149, - "grad_norm": 584.0740356445312, - "learning_rate": 4.832069538493237e-05, - "loss": 121.6634, - "step": 25700 - }, - { - "epoch": 0.20774246721450562, - "grad_norm": 1503.9981689453125, - "learning_rate": 4.831817895042977e-05, - "loss": 157.5844, - "step": 25710 - }, - { - "epoch": 0.20782326941878976, - "grad_norm": 731.34765625, - "learning_rate": 4.8315660697527566e-05, - "loss": 102.3727, - "step": 25720 - }, - { - "epoch": 0.20790407162307387, - "grad_norm": 1006.2964477539062, - "learning_rate": 4.8313140626422125e-05, - "loss": 117.4281, - "step": 25730 - }, - { - "epoch": 0.207984873827358, - "grad_norm": 1379.1805419921875, - "learning_rate": 4.831061873730999e-05, - "loss": 147.2082, - "step": 25740 - }, - { - "epoch": 0.20806567603164214, - "grad_norm": 709.5120849609375, - "learning_rate": 4.830809503038781e-05, - "loss": 140.2255, - "step": 25750 - }, - { - "epoch": 0.20814647823592627, - "grad_norm": 1492.2911376953125, - "learning_rate": 4.830556950585238e-05, - "loss": 136.4272, - "step": 25760 - }, - { - "epoch": 0.2082272804402104, - "grad_norm": 816.3794555664062, - "learning_rate": 4.830304216390066e-05, - "loss": 125.1439, - "step": 25770 - }, - { - "epoch": 0.20830808264449455, - "grad_norm": 710.224365234375, - "learning_rate": 4.8300513004729735e-05, - "loss": 107.5921, - "step": 25780 - }, - { - "epoch": 0.20838888484877868, - "grad_norm": 1284.1580810546875, - "learning_rate": 4.8297982028536826e-05, - "loss": 139.3743, - "step": 25790 - }, - { - "epoch": 0.20846968705306282, - "grad_norm": 1138.1129150390625, - "learning_rate": 4.829544923551931e-05, - "loss": 99.015, - "step": 25800 - }, - { - "epoch": 0.20855048925734693, - "grad_norm": 784.6410522460938, - "learning_rate": 4.82929146258747e-05, - "loss": 130.2286, - "step": 25810 - }, - { - "epoch": 0.20863129146163106, - "grad_norm": 1077.0406494140625, - "learning_rate": 4.829037819980065e-05, - "loss": 105.2139, - "step": 25820 - }, - { - "epoch": 0.2087120936659152, - "grad_norm": 825.64404296875, - "learning_rate": 4.828783995749495e-05, - "loss": 113.7267, - "step": 25830 - }, - { - "epoch": 0.20879289587019934, - "grad_norm": 977.2786865234375, - "learning_rate": 4.828529989915555e-05, - "loss": 106.4261, - "step": 25840 - }, - { - "epoch": 0.20887369807448347, - "grad_norm": 934.3041381835938, - "learning_rate": 4.828275802498051e-05, - "loss": 141.9984, - "step": 25850 - }, - { - "epoch": 0.2089545002787676, - "grad_norm": 699.7372436523438, - "learning_rate": 4.828021433516806e-05, - "loss": 106.2388, - "step": 25860 - }, - { - "epoch": 0.20903530248305174, - "grad_norm": 697.2062377929688, - "learning_rate": 4.827766882991657e-05, - "loss": 145.6762, - "step": 25870 - }, - { - "epoch": 0.20911610468733588, - "grad_norm": 1464.6607666015625, - "learning_rate": 4.827512150942454e-05, - "loss": 152.7667, - "step": 25880 - }, - { - "epoch": 0.20919690689162002, - "grad_norm": 1068.4786376953125, - "learning_rate": 4.82725723738906e-05, - "loss": 185.1981, - "step": 25890 - }, - { - "epoch": 0.20927770909590412, - "grad_norm": 995.4002685546875, - "learning_rate": 4.8270021423513554e-05, - "loss": 96.3248, - "step": 25900 - }, - { - "epoch": 0.20935851130018826, - "grad_norm": 974.28369140625, - "learning_rate": 4.8267468658492335e-05, - "loss": 119.5179, - "step": 25910 - }, - { - "epoch": 0.2094393135044724, - "grad_norm": 1118.69873046875, - "learning_rate": 4.826491407902599e-05, - "loss": 127.6396, - "step": 25920 - }, - { - "epoch": 0.20952011570875653, - "grad_norm": 879.0490112304688, - "learning_rate": 4.8262357685313754e-05, - "loss": 171.0668, - "step": 25930 - }, - { - "epoch": 0.20960091791304067, - "grad_norm": 814.1256713867188, - "learning_rate": 4.8259799477554965e-05, - "loss": 129.3575, - "step": 25940 - }, - { - "epoch": 0.2096817201173248, - "grad_norm": 588.3995971679688, - "learning_rate": 4.8257239455949124e-05, - "loss": 97.7855, - "step": 25950 - }, - { - "epoch": 0.20976252232160894, - "grad_norm": 932.3455200195312, - "learning_rate": 4.825467762069585e-05, - "loss": 172.8172, - "step": 25960 - }, - { - "epoch": 0.20984332452589308, - "grad_norm": 867.6666870117188, - "learning_rate": 4.825211397199495e-05, - "loss": 128.0637, - "step": 25970 - }, - { - "epoch": 0.2099241267301772, - "grad_norm": 826.07275390625, - "learning_rate": 4.824954851004633e-05, - "loss": 123.4536, - "step": 25980 - }, - { - "epoch": 0.21000492893446132, - "grad_norm": 2970.945556640625, - "learning_rate": 4.824698123505004e-05, - "loss": 122.9196, - "step": 25990 - }, - { - "epoch": 0.21008573113874546, - "grad_norm": 2031.5130615234375, - "learning_rate": 4.8244412147206284e-05, - "loss": 143.9969, - "step": 26000 - }, - { - "epoch": 0.2101665333430296, - "grad_norm": 788.4472045898438, - "learning_rate": 4.824184124671542e-05, - "loss": 115.8756, - "step": 26010 - }, - { - "epoch": 0.21024733554731373, - "grad_norm": 1001.9306030273438, - "learning_rate": 4.823926853377791e-05, - "loss": 115.5001, - "step": 26020 - }, - { - "epoch": 0.21032813775159787, - "grad_norm": 1247.80517578125, - "learning_rate": 4.8236694008594405e-05, - "loss": 110.787, - "step": 26030 - }, - { - "epoch": 0.210408939955882, - "grad_norm": 717.9345703125, - "learning_rate": 4.823411767136565e-05, - "loss": 124.2575, - "step": 26040 - }, - { - "epoch": 0.21048974216016614, - "grad_norm": 1611.0003662109375, - "learning_rate": 4.8231539522292564e-05, - "loss": 144.1445, - "step": 26050 - }, - { - "epoch": 0.21057054436445027, - "grad_norm": 1778.65283203125, - "learning_rate": 4.822895956157619e-05, - "loss": 127.7936, - "step": 26060 - }, - { - "epoch": 0.21065134656873438, - "grad_norm": 540.8101196289062, - "learning_rate": 4.822637778941772e-05, - "loss": 166.5897, - "step": 26070 - }, - { - "epoch": 0.21073214877301852, - "grad_norm": 1019.2218627929688, - "learning_rate": 4.822379420601849e-05, - "loss": 147.6264, - "step": 26080 - }, - { - "epoch": 0.21081295097730265, - "grad_norm": 3861.712646484375, - "learning_rate": 4.822120881157998e-05, - "loss": 149.0841, - "step": 26090 - }, - { - "epoch": 0.2108937531815868, - "grad_norm": 1602.623291015625, - "learning_rate": 4.821862160630378e-05, - "loss": 155.5116, - "step": 26100 - }, - { - "epoch": 0.21097455538587093, - "grad_norm": 1246.1142578125, - "learning_rate": 4.821603259039167e-05, - "loss": 147.062, - "step": 26110 - }, - { - "epoch": 0.21105535759015506, - "grad_norm": 675.6361083984375, - "learning_rate": 4.821344176404554e-05, - "loss": 128.0344, - "step": 26120 - }, - { - "epoch": 0.2111361597944392, - "grad_norm": 924.2776489257812, - "learning_rate": 4.821084912746742e-05, - "loss": 150.0589, - "step": 26130 - }, - { - "epoch": 0.21121696199872333, - "grad_norm": 1006.2265014648438, - "learning_rate": 4.8208254680859494e-05, - "loss": 116.4737, - "step": 26140 - }, - { - "epoch": 0.21129776420300747, - "grad_norm": 926.9533081054688, - "learning_rate": 4.820565842442408e-05, - "loss": 166.5869, - "step": 26150 - }, - { - "epoch": 0.21137856640729158, - "grad_norm": 1015.1038208007812, - "learning_rate": 4.820306035836365e-05, - "loss": 133.3756, - "step": 26160 - }, - { - "epoch": 0.21145936861157572, - "grad_norm": 871.2703247070312, - "learning_rate": 4.82004604828808e-05, - "loss": 113.9867, - "step": 26170 - }, - { - "epoch": 0.21154017081585985, - "grad_norm": 1194.6673583984375, - "learning_rate": 4.819785879817827e-05, - "loss": 159.1594, - "step": 26180 - }, - { - "epoch": 0.211620973020144, - "grad_norm": 1223.5343017578125, - "learning_rate": 4.8195255304458945e-05, - "loss": 183.4047, - "step": 26190 - }, - { - "epoch": 0.21170177522442812, - "grad_norm": 892.5057373046875, - "learning_rate": 4.8192650001925855e-05, - "loss": 116.7209, - "step": 26200 - }, - { - "epoch": 0.21178257742871226, - "grad_norm": 797.1787109375, - "learning_rate": 4.819004289078217e-05, - "loss": 149.5302, - "step": 26210 - }, - { - "epoch": 0.2118633796329964, - "grad_norm": 1227.952880859375, - "learning_rate": 4.818743397123119e-05, - "loss": 166.3836, - "step": 26220 - }, - { - "epoch": 0.21194418183728053, - "grad_norm": 1853.9193115234375, - "learning_rate": 4.8184823243476364e-05, - "loss": 108.0865, - "step": 26230 - }, - { - "epoch": 0.21202498404156464, - "grad_norm": 995.2237548828125, - "learning_rate": 4.8182210707721284e-05, - "loss": 104.2278, - "step": 26240 - }, - { - "epoch": 0.21210578624584878, - "grad_norm": 978.8897705078125, - "learning_rate": 4.817959636416969e-05, - "loss": 114.6236, - "step": 26250 - }, - { - "epoch": 0.2121865884501329, - "grad_norm": 1148.7147216796875, - "learning_rate": 4.8176980213025434e-05, - "loss": 144.7827, - "step": 26260 - }, - { - "epoch": 0.21226739065441705, - "grad_norm": 924.343017578125, - "learning_rate": 4.817436225449255e-05, - "loss": 107.1034, - "step": 26270 - }, - { - "epoch": 0.21234819285870118, - "grad_norm": 1518.0150146484375, - "learning_rate": 4.817174248877518e-05, - "loss": 128.9722, - "step": 26280 - }, - { - "epoch": 0.21242899506298532, - "grad_norm": 3798.2607421875, - "learning_rate": 4.816912091607762e-05, - "loss": 135.2327, - "step": 26290 - }, - { - "epoch": 0.21250979726726946, - "grad_norm": 1386.0970458984375, - "learning_rate": 4.81664975366043e-05, - "loss": 133.4084, - "step": 26300 - }, - { - "epoch": 0.2125905994715536, - "grad_norm": 813.0361328125, - "learning_rate": 4.8163872350559816e-05, - "loss": 128.386, - "step": 26310 - }, - { - "epoch": 0.21267140167583773, - "grad_norm": 1119.896484375, - "learning_rate": 4.8161245358148866e-05, - "loss": 162.116, - "step": 26320 - }, - { - "epoch": 0.21275220388012184, - "grad_norm": 1359.4432373046875, - "learning_rate": 4.815861655957632e-05, - "loss": 137.6646, - "step": 26330 - }, - { - "epoch": 0.21283300608440597, - "grad_norm": 689.84814453125, - "learning_rate": 4.815598595504717e-05, - "loss": 177.9907, - "step": 26340 - }, - { - "epoch": 0.2129138082886901, - "grad_norm": 1362.3492431640625, - "learning_rate": 4.8153353544766553e-05, - "loss": 142.146, - "step": 26350 - }, - { - "epoch": 0.21299461049297425, - "grad_norm": 652.8076171875, - "learning_rate": 4.8150719328939755e-05, - "loss": 102.2647, - "step": 26360 - }, - { - "epoch": 0.21307541269725838, - "grad_norm": 1369.128173828125, - "learning_rate": 4.81480833077722e-05, - "loss": 131.6185, - "step": 26370 - }, - { - "epoch": 0.21315621490154252, - "grad_norm": 1084.37744140625, - "learning_rate": 4.814544548146945e-05, - "loss": 142.2492, - "step": 26380 - }, - { - "epoch": 0.21323701710582665, - "grad_norm": 1083.496826171875, - "learning_rate": 4.814280585023721e-05, - "loss": 124.4456, - "step": 26390 - }, - { - "epoch": 0.2133178193101108, - "grad_norm": 1242.36376953125, - "learning_rate": 4.8140164414281306e-05, - "loss": 159.9246, - "step": 26400 - }, - { - "epoch": 0.21339862151439493, - "grad_norm": 1229.6868896484375, - "learning_rate": 4.813752117380774e-05, - "loss": 112.1197, - "step": 26410 - }, - { - "epoch": 0.21347942371867903, - "grad_norm": 1286.7855224609375, - "learning_rate": 4.813487612902264e-05, - "loss": 131.2134, - "step": 26420 - }, - { - "epoch": 0.21356022592296317, - "grad_norm": 693.5035400390625, - "learning_rate": 4.813222928013226e-05, - "loss": 103.498, - "step": 26430 - }, - { - "epoch": 0.2136410281272473, - "grad_norm": 1097.181640625, - "learning_rate": 4.812958062734302e-05, - "loss": 118.1474, - "step": 26440 - }, - { - "epoch": 0.21372183033153144, - "grad_norm": 632.88232421875, - "learning_rate": 4.812693017086145e-05, - "loss": 133.1473, - "step": 26450 - }, - { - "epoch": 0.21380263253581558, - "grad_norm": 922.8646850585938, - "learning_rate": 4.812427791089426e-05, - "loss": 149.3375, - "step": 26460 - }, - { - "epoch": 0.21388343474009971, - "grad_norm": 608.972900390625, - "learning_rate": 4.812162384764826e-05, - "loss": 121.0625, - "step": 26470 - }, - { - "epoch": 0.21396423694438385, - "grad_norm": 1129.250732421875, - "learning_rate": 4.811896798133042e-05, - "loss": 139.7497, - "step": 26480 - }, - { - "epoch": 0.214045039148668, - "grad_norm": 1012.6525268554688, - "learning_rate": 4.811631031214786e-05, - "loss": 129.4566, - "step": 26490 - }, - { - "epoch": 0.2141258413529521, - "grad_norm": 880.7243041992188, - "learning_rate": 4.8113650840307834e-05, - "loss": 137.6255, - "step": 26500 - }, - { - "epoch": 0.21420664355723623, - "grad_norm": 2069.876708984375, - "learning_rate": 4.8110989566017716e-05, - "loss": 127.9559, - "step": 26510 - }, - { - "epoch": 0.21428744576152037, - "grad_norm": 751.013916015625, - "learning_rate": 4.810832648948505e-05, - "loss": 107.5302, - "step": 26520 - }, - { - "epoch": 0.2143682479658045, - "grad_norm": 2043.9150390625, - "learning_rate": 4.810566161091751e-05, - "loss": 135.8175, - "step": 26530 - }, - { - "epoch": 0.21444905017008864, - "grad_norm": 986.4581909179688, - "learning_rate": 4.810299493052289e-05, - "loss": 144.5186, - "step": 26540 - }, - { - "epoch": 0.21452985237437278, - "grad_norm": 734.1927490234375, - "learning_rate": 4.810032644850917e-05, - "loss": 108.4408, - "step": 26550 - }, - { - "epoch": 0.2146106545786569, - "grad_norm": 1744.6483154296875, - "learning_rate": 4.809765616508443e-05, - "loss": 149.5238, - "step": 26560 - }, - { - "epoch": 0.21469145678294105, - "grad_norm": 1414.4942626953125, - "learning_rate": 4.8094984080456904e-05, - "loss": 113.2241, - "step": 26570 - }, - { - "epoch": 0.21477225898722518, - "grad_norm": 929.7769165039062, - "learning_rate": 4.809231019483497e-05, - "loss": 173.2804, - "step": 26580 - }, - { - "epoch": 0.2148530611915093, - "grad_norm": 1248.590576171875, - "learning_rate": 4.808963450842713e-05, - "loss": 107.3851, - "step": 26590 - }, - { - "epoch": 0.21493386339579343, - "grad_norm": 1366.1041259765625, - "learning_rate": 4.808695702144206e-05, - "loss": 141.0534, - "step": 26600 - }, - { - "epoch": 0.21501466560007756, - "grad_norm": 1050.7523193359375, - "learning_rate": 4.8084277734088544e-05, - "loss": 122.0444, - "step": 26610 - }, - { - "epoch": 0.2150954678043617, - "grad_norm": 684.057861328125, - "learning_rate": 4.808159664657552e-05, - "loss": 121.2964, - "step": 26620 - }, - { - "epoch": 0.21517627000864584, - "grad_norm": 1231.421875, - "learning_rate": 4.8078913759112066e-05, - "loss": 129.7365, - "step": 26630 - }, - { - "epoch": 0.21525707221292997, - "grad_norm": 691.0001831054688, - "learning_rate": 4.8076229071907397e-05, - "loss": 96.1693, - "step": 26640 - }, - { - "epoch": 0.2153378744172141, - "grad_norm": 790.1228637695312, - "learning_rate": 4.8073542585170877e-05, - "loss": 124.796, - "step": 26650 - }, - { - "epoch": 0.21541867662149825, - "grad_norm": 1218.955078125, - "learning_rate": 4.8070854299111994e-05, - "loss": 168.5747, - "step": 26660 - }, - { - "epoch": 0.21549947882578235, - "grad_norm": 600.30859375, - "learning_rate": 4.8068164213940393e-05, - "loss": 122.3959, - "step": 26670 - }, - { - "epoch": 0.2155802810300665, - "grad_norm": 1296.98681640625, - "learning_rate": 4.8065472329865854e-05, - "loss": 134.1798, - "step": 26680 - }, - { - "epoch": 0.21566108323435063, - "grad_norm": 927.8102416992188, - "learning_rate": 4.8062778647098284e-05, - "loss": 118.0701, - "step": 26690 - }, - { - "epoch": 0.21574188543863476, - "grad_norm": 955.7800903320312, - "learning_rate": 4.8060083165847754e-05, - "loss": 137.0455, - "step": 26700 - }, - { - "epoch": 0.2158226876429189, - "grad_norm": 981.5982055664062, - "learning_rate": 4.805738588632446e-05, - "loss": 170.17, - "step": 26710 - }, - { - "epoch": 0.21590348984720303, - "grad_norm": 687.5528564453125, - "learning_rate": 4.805468680873874e-05, - "loss": 134.7222, - "step": 26720 - }, - { - "epoch": 0.21598429205148717, - "grad_norm": 676.2338256835938, - "learning_rate": 4.805198593330107e-05, - "loss": 120.8275, - "step": 26730 - }, - { - "epoch": 0.2160650942557713, - "grad_norm": 725.0357055664062, - "learning_rate": 4.8049283260222075e-05, - "loss": 117.501, - "step": 26740 - }, - { - "epoch": 0.21614589646005544, - "grad_norm": 910.6663818359375, - "learning_rate": 4.8046578789712515e-05, - "loss": 139.2863, - "step": 26750 - }, - { - "epoch": 0.21622669866433955, - "grad_norm": 784.3283081054688, - "learning_rate": 4.8043872521983294e-05, - "loss": 122.1468, - "step": 26760 - }, - { - "epoch": 0.2163075008686237, - "grad_norm": 807.3689575195312, - "learning_rate": 4.804116445724543e-05, - "loss": 139.193, - "step": 26770 - }, - { - "epoch": 0.21638830307290782, - "grad_norm": 1669.650390625, - "learning_rate": 4.803845459571014e-05, - "loss": 146.6025, - "step": 26780 - }, - { - "epoch": 0.21646910527719196, - "grad_norm": 655.7134399414062, - "learning_rate": 4.8035742937588724e-05, - "loss": 104.5035, - "step": 26790 - }, - { - "epoch": 0.2165499074814761, - "grad_norm": 1027.62060546875, - "learning_rate": 4.803302948309264e-05, - "loss": 130.3401, - "step": 26800 - }, - { - "epoch": 0.21663070968576023, - "grad_norm": 1076.3868408203125, - "learning_rate": 4.803031423243349e-05, - "loss": 128.1202, - "step": 26810 - }, - { - "epoch": 0.21671151189004437, - "grad_norm": 900.8707885742188, - "learning_rate": 4.8027597185823016e-05, - "loss": 172.1492, - "step": 26820 - }, - { - "epoch": 0.2167923140943285, - "grad_norm": 1780.7559814453125, - "learning_rate": 4.802487834347311e-05, - "loss": 129.1261, - "step": 26830 - }, - { - "epoch": 0.21687311629861264, - "grad_norm": 1499.35546875, - "learning_rate": 4.802215770559577e-05, - "loss": 131.0699, - "step": 26840 - }, - { - "epoch": 0.21695391850289675, - "grad_norm": 878.696044921875, - "learning_rate": 4.801943527240318e-05, - "loss": 132.4833, - "step": 26850 - }, - { - "epoch": 0.21703472070718088, - "grad_norm": 1091.121337890625, - "learning_rate": 4.801671104410763e-05, - "loss": 155.2405, - "step": 26860 - }, - { - "epoch": 0.21711552291146502, - "grad_norm": 923.06494140625, - "learning_rate": 4.801398502092156e-05, - "loss": 137.8175, - "step": 26870 - }, - { - "epoch": 0.21719632511574916, - "grad_norm": 794.525634765625, - "learning_rate": 4.8011257203057556e-05, - "loss": 97.5109, - "step": 26880 - }, - { - "epoch": 0.2172771273200333, - "grad_norm": 877.32275390625, - "learning_rate": 4.800852759072833e-05, - "loss": 110.9471, - "step": 26890 - }, - { - "epoch": 0.21735792952431743, - "grad_norm": 1468.381591796875, - "learning_rate": 4.800579618414676e-05, - "loss": 153.6146, - "step": 26900 - }, - { - "epoch": 0.21743873172860156, - "grad_norm": 880.5524291992188, - "learning_rate": 4.800306298352583e-05, - "loss": 135.6863, - "step": 26910 - }, - { - "epoch": 0.2175195339328857, - "grad_norm": 934.380859375, - "learning_rate": 4.800032798907869e-05, - "loss": 203.4798, - "step": 26920 - }, - { - "epoch": 0.2176003361371698, - "grad_norm": 907.826171875, - "learning_rate": 4.799759120101861e-05, - "loss": 134.1106, - "step": 26930 - }, - { - "epoch": 0.21768113834145394, - "grad_norm": 1250.1951904296875, - "learning_rate": 4.7994852619559016e-05, - "loss": 119.1052, - "step": 26940 - }, - { - "epoch": 0.21776194054573808, - "grad_norm": 1387.033203125, - "learning_rate": 4.799211224491348e-05, - "loss": 132.6458, - "step": 26950 - }, - { - "epoch": 0.21784274275002222, - "grad_norm": 1170.08203125, - "learning_rate": 4.798937007729568e-05, - "loss": 90.3192, - "step": 26960 - }, - { - "epoch": 0.21792354495430635, - "grad_norm": 2206.882568359375, - "learning_rate": 4.798662611691947e-05, - "loss": 143.2918, - "step": 26970 - }, - { - "epoch": 0.2180043471585905, - "grad_norm": 1047.681396484375, - "learning_rate": 4.798388036399883e-05, - "loss": 94.9052, - "step": 26980 - }, - { - "epoch": 0.21808514936287463, - "grad_norm": 1360.6759033203125, - "learning_rate": 4.7981132818747876e-05, - "loss": 124.3164, - "step": 26990 - }, - { - "epoch": 0.21816595156715876, - "grad_norm": 985.3904418945312, - "learning_rate": 4.797838348138086e-05, - "loss": 100.3646, - "step": 27000 - }, - { - "epoch": 0.2182467537714429, - "grad_norm": 967.2728881835938, - "learning_rate": 4.7975632352112195e-05, - "loss": 138.4862, - "step": 27010 - }, - { - "epoch": 0.218327555975727, - "grad_norm": 1026.8212890625, - "learning_rate": 4.797287943115641e-05, - "loss": 102.4904, - "step": 27020 - }, - { - "epoch": 0.21840835818001114, - "grad_norm": 752.27294921875, - "learning_rate": 4.7970124718728193e-05, - "loss": 152.0597, - "step": 27030 - }, - { - "epoch": 0.21848916038429528, - "grad_norm": 1311.3223876953125, - "learning_rate": 4.796736821504235e-05, - "loss": 120.0449, - "step": 27040 - }, - { - "epoch": 0.21856996258857941, - "grad_norm": 1012.3485717773438, - "learning_rate": 4.796460992031385e-05, - "loss": 134.3791, - "step": 27050 - }, - { - "epoch": 0.21865076479286355, - "grad_norm": 1459.735595703125, - "learning_rate": 4.7961849834757786e-05, - "loss": 184.9713, - "step": 27060 - }, - { - "epoch": 0.2187315669971477, - "grad_norm": 831.3568115234375, - "learning_rate": 4.7959087958589386e-05, - "loss": 140.2156, - "step": 27070 - }, - { - "epoch": 0.21881236920143182, - "grad_norm": 1032.8001708984375, - "learning_rate": 4.795632429202405e-05, - "loss": 115.3522, - "step": 27080 - }, - { - "epoch": 0.21889317140571596, - "grad_norm": 1081.9893798828125, - "learning_rate": 4.795355883527727e-05, - "loss": 112.2656, - "step": 27090 - }, - { - "epoch": 0.2189739736100001, - "grad_norm": 1388.781494140625, - "learning_rate": 4.79507915885647e-05, - "loss": 123.8324, - "step": 27100 - }, - { - "epoch": 0.2190547758142842, - "grad_norm": 1004.865478515625, - "learning_rate": 4.794802255210217e-05, - "loss": 120.2996, - "step": 27110 - }, - { - "epoch": 0.21913557801856834, - "grad_norm": 672.1980590820312, - "learning_rate": 4.794525172610558e-05, - "loss": 131.216, - "step": 27120 - }, - { - "epoch": 0.21921638022285247, - "grad_norm": 1661.7108154296875, - "learning_rate": 4.7942479110791015e-05, - "loss": 148.4288, - "step": 27130 - }, - { - "epoch": 0.2192971824271366, - "grad_norm": 881.720947265625, - "learning_rate": 4.793970470637469e-05, - "loss": 138.7089, - "step": 27140 - }, - { - "epoch": 0.21937798463142075, - "grad_norm": 1953.3299560546875, - "learning_rate": 4.7936928513072964e-05, - "loss": 116.5807, - "step": 27150 - }, - { - "epoch": 0.21945878683570488, - "grad_norm": 918.8439331054688, - "learning_rate": 4.793415053110233e-05, - "loss": 91.1989, - "step": 27160 - }, - { - "epoch": 0.21953958903998902, - "grad_norm": 993.3352661132812, - "learning_rate": 4.793137076067942e-05, - "loss": 114.7209, - "step": 27170 - }, - { - "epoch": 0.21962039124427316, - "grad_norm": 1023.3936157226562, - "learning_rate": 4.792858920202099e-05, - "loss": 134.3778, - "step": 27180 - }, - { - "epoch": 0.21970119344855726, - "grad_norm": 792.4373779296875, - "learning_rate": 4.7925805855343975e-05, - "loss": 142.2858, - "step": 27190 - }, - { - "epoch": 0.2197819956528414, - "grad_norm": 613.7577514648438, - "learning_rate": 4.7923020720865414e-05, - "loss": 80.3069, - "step": 27200 - }, - { - "epoch": 0.21986279785712554, - "grad_norm": 1159.410400390625, - "learning_rate": 4.792023379880249e-05, - "loss": 161.284, - "step": 27210 - }, - { - "epoch": 0.21994360006140967, - "grad_norm": 836.266845703125, - "learning_rate": 4.791744508937256e-05, - "loss": 135.635, - "step": 27220 - }, - { - "epoch": 0.2200244022656938, - "grad_norm": 890.9221801757812, - "learning_rate": 4.7914654592793065e-05, - "loss": 115.7453, - "step": 27230 - }, - { - "epoch": 0.22010520446997794, - "grad_norm": 1169.57080078125, - "learning_rate": 4.791186230928163e-05, - "loss": 109.5156, - "step": 27240 - }, - { - "epoch": 0.22018600667426208, - "grad_norm": 754.0150146484375, - "learning_rate": 4.790906823905599e-05, - "loss": 158.3504, - "step": 27250 - }, - { - "epoch": 0.22026680887854622, - "grad_norm": 1584.8555908203125, - "learning_rate": 4.790627238233405e-05, - "loss": 108.4285, - "step": 27260 - }, - { - "epoch": 0.22034761108283035, - "grad_norm": 578.4071044921875, - "learning_rate": 4.790347473933382e-05, - "loss": 110.3285, - "step": 27270 - }, - { - "epoch": 0.22042841328711446, - "grad_norm": 1166.0626220703125, - "learning_rate": 4.7900675310273466e-05, - "loss": 133.5697, - "step": 27280 - }, - { - "epoch": 0.2205092154913986, - "grad_norm": 739.291015625, - "learning_rate": 4.789787409537131e-05, - "loss": 108.0604, - "step": 27290 - }, - { - "epoch": 0.22059001769568273, - "grad_norm": 1197.711181640625, - "learning_rate": 4.789507109484579e-05, - "loss": 179.8366, - "step": 27300 - }, - { - "epoch": 0.22067081989996687, - "grad_norm": 1448.0531005859375, - "learning_rate": 4.789226630891548e-05, - "loss": 111.0121, - "step": 27310 - }, - { - "epoch": 0.220751622104251, - "grad_norm": 1181.0787353515625, - "learning_rate": 4.78894597377991e-05, - "loss": 142.5279, - "step": 27320 - }, - { - "epoch": 0.22083242430853514, - "grad_norm": 1266.8050537109375, - "learning_rate": 4.788665138171553e-05, - "loss": 134.3341, - "step": 27330 - }, - { - "epoch": 0.22091322651281928, - "grad_norm": 1248.870361328125, - "learning_rate": 4.7883841240883766e-05, - "loss": 154.5194, - "step": 27340 - }, - { - "epoch": 0.2209940287171034, - "grad_norm": 1024.0941162109375, - "learning_rate": 4.788102931552294e-05, - "loss": 101.4173, - "step": 27350 - }, - { - "epoch": 0.22107483092138752, - "grad_norm": 1009.3390502929688, - "learning_rate": 4.7878215605852336e-05, - "loss": 130.9474, - "step": 27360 - }, - { - "epoch": 0.22115563312567166, - "grad_norm": 551.6019897460938, - "learning_rate": 4.787540011209138e-05, - "loss": 126.8904, - "step": 27370 - }, - { - "epoch": 0.2212364353299558, - "grad_norm": 4834.69482421875, - "learning_rate": 4.787258283445962e-05, - "loss": 159.343, - "step": 27380 - }, - { - "epoch": 0.22131723753423993, - "grad_norm": 696.3270263671875, - "learning_rate": 4.7869763773176756e-05, - "loss": 129.8717, - "step": 27390 - }, - { - "epoch": 0.22139803973852407, - "grad_norm": 1361.56005859375, - "learning_rate": 4.7866942928462625e-05, - "loss": 137.8948, - "step": 27400 - }, - { - "epoch": 0.2214788419428082, - "grad_norm": 1086.3304443359375, - "learning_rate": 4.7864120300537206e-05, - "loss": 123.468, - "step": 27410 - }, - { - "epoch": 0.22155964414709234, - "grad_norm": 812.8670654296875, - "learning_rate": 4.786129588962061e-05, - "loss": 124.0909, - "step": 27420 - }, - { - "epoch": 0.22164044635137647, - "grad_norm": 867.7353515625, - "learning_rate": 4.785846969593308e-05, - "loss": 142.0064, - "step": 27430 - }, - { - "epoch": 0.2217212485556606, - "grad_norm": 744.391845703125, - "learning_rate": 4.7855641719695023e-05, - "loss": 127.3905, - "step": 27440 - }, - { - "epoch": 0.22180205075994472, - "grad_norm": 703.9810180664062, - "learning_rate": 4.785281196112698e-05, - "loss": 125.9367, - "step": 27450 - }, - { - "epoch": 0.22188285296422885, - "grad_norm": 1147.4344482421875, - "learning_rate": 4.7849980420449594e-05, - "loss": 106.5464, - "step": 27460 - }, - { - "epoch": 0.221963655168513, - "grad_norm": 901.2579345703125, - "learning_rate": 4.784714709788368e-05, - "loss": 168.9338, - "step": 27470 - }, - { - "epoch": 0.22204445737279713, - "grad_norm": 668.369140625, - "learning_rate": 4.7844311993650205e-05, - "loss": 107.8617, - "step": 27480 - }, - { - "epoch": 0.22212525957708126, - "grad_norm": 1104.68701171875, - "learning_rate": 4.7841475107970244e-05, - "loss": 147.408, - "step": 27490 - }, - { - "epoch": 0.2222060617813654, - "grad_norm": 2415.625, - "learning_rate": 4.783863644106502e-05, - "loss": 150.1873, - "step": 27500 - }, - { - "epoch": 0.22228686398564954, - "grad_norm": 1233.0235595703125, - "learning_rate": 4.783579599315591e-05, - "loss": 114.4763, - "step": 27510 - }, - { - "epoch": 0.22236766618993367, - "grad_norm": 756.919189453125, - "learning_rate": 4.7832953764464405e-05, - "loss": 139.5418, - "step": 27520 - }, - { - "epoch": 0.2224484683942178, - "grad_norm": 1683.7320556640625, - "learning_rate": 4.783010975521216e-05, - "loss": 114.8215, - "step": 27530 - }, - { - "epoch": 0.22252927059850192, - "grad_norm": 962.6278686523438, - "learning_rate": 4.782726396562094e-05, - "loss": 91.6996, - "step": 27540 - }, - { - "epoch": 0.22261007280278605, - "grad_norm": 963.0123291015625, - "learning_rate": 4.7824416395912686e-05, - "loss": 144.6082, - "step": 27550 - }, - { - "epoch": 0.2226908750070702, - "grad_norm": 1041.0611572265625, - "learning_rate": 4.782156704630944e-05, - "loss": 126.3812, - "step": 27560 - }, - { - "epoch": 0.22277167721135432, - "grad_norm": 886.5843505859375, - "learning_rate": 4.781871591703341e-05, - "loss": 119.2066, - "step": 27570 - }, - { - "epoch": 0.22285247941563846, - "grad_norm": 1099.4410400390625, - "learning_rate": 4.781586300830693e-05, - "loss": 161.6945, - "step": 27580 - }, - { - "epoch": 0.2229332816199226, - "grad_norm": 1446.4217529296875, - "learning_rate": 4.781300832035247e-05, - "loss": 114.2895, - "step": 27590 - }, - { - "epoch": 0.22301408382420673, - "grad_norm": 1160.1856689453125, - "learning_rate": 4.781015185339266e-05, - "loss": 138.825, - "step": 27600 - }, - { - "epoch": 0.22309488602849087, - "grad_norm": 1239.45751953125, - "learning_rate": 4.780729360765024e-05, - "loss": 99.6512, - "step": 27610 - }, - { - "epoch": 0.22317568823277498, - "grad_norm": 728.0436401367188, - "learning_rate": 4.78044335833481e-05, - "loss": 109.644, - "step": 27620 - }, - { - "epoch": 0.2232564904370591, - "grad_norm": 808.1099853515625, - "learning_rate": 4.780157178070928e-05, - "loss": 128.03, - "step": 27630 - }, - { - "epoch": 0.22333729264134325, - "grad_norm": 1795.611328125, - "learning_rate": 4.779870819995694e-05, - "loss": 167.6255, - "step": 27640 - }, - { - "epoch": 0.22341809484562739, - "grad_norm": 1749.26171875, - "learning_rate": 4.77958428413144e-05, - "loss": 119.4188, - "step": 27650 - }, - { - "epoch": 0.22349889704991152, - "grad_norm": 1491.797607421875, - "learning_rate": 4.779297570500509e-05, - "loss": 144.7149, - "step": 27660 - }, - { - "epoch": 0.22357969925419566, - "grad_norm": 1133.6943359375, - "learning_rate": 4.7790106791252614e-05, - "loss": 121.6786, - "step": 27670 - }, - { - "epoch": 0.2236605014584798, - "grad_norm": 1069.1954345703125, - "learning_rate": 4.7787236100280685e-05, - "loss": 162.1701, - "step": 27680 - }, - { - "epoch": 0.22374130366276393, - "grad_norm": 788.477783203125, - "learning_rate": 4.7784363632313166e-05, - "loss": 123.729, - "step": 27690 - }, - { - "epoch": 0.22382210586704807, - "grad_norm": 1368.757080078125, - "learning_rate": 4.778148938757406e-05, - "loss": 119.5443, - "step": 27700 - }, - { - "epoch": 0.22390290807133217, - "grad_norm": 862.5368041992188, - "learning_rate": 4.7778613366287505e-05, - "loss": 141.0422, - "step": 27710 - }, - { - "epoch": 0.2239837102756163, - "grad_norm": 1794.826416015625, - "learning_rate": 4.7775735568677775e-05, - "loss": 117.5449, - "step": 27720 - }, - { - "epoch": 0.22406451247990045, - "grad_norm": 569.207763671875, - "learning_rate": 4.777285599496929e-05, - "loss": 98.2038, - "step": 27730 - }, - { - "epoch": 0.22414531468418458, - "grad_norm": 1018.3662109375, - "learning_rate": 4.776997464538662e-05, - "loss": 131.7552, - "step": 27740 - }, - { - "epoch": 0.22422611688846872, - "grad_norm": 3829.990966796875, - "learning_rate": 4.776709152015443e-05, - "loss": 145.4028, - "step": 27750 - }, - { - "epoch": 0.22430691909275285, - "grad_norm": 1251.864501953125, - "learning_rate": 4.776420661949758e-05, - "loss": 149.958, - "step": 27760 - }, - { - "epoch": 0.224387721297037, - "grad_norm": 1502.33154296875, - "learning_rate": 4.776131994364102e-05, - "loss": 130.1318, - "step": 27770 - }, - { - "epoch": 0.22446852350132113, - "grad_norm": 868.755859375, - "learning_rate": 4.775843149280986e-05, - "loss": 130.6259, - "step": 27780 - }, - { - "epoch": 0.22454932570560526, - "grad_norm": 742.2652587890625, - "learning_rate": 4.775554126722935e-05, - "loss": 101.3733, - "step": 27790 - }, - { - "epoch": 0.22463012790988937, - "grad_norm": 1067.2916259765625, - "learning_rate": 4.775264926712489e-05, - "loss": 123.7865, - "step": 27800 - }, - { - "epoch": 0.2247109301141735, - "grad_norm": 700.7488403320312, - "learning_rate": 4.774975549272199e-05, - "loss": 108.8524, - "step": 27810 - }, - { - "epoch": 0.22479173231845764, - "grad_norm": 746.498779296875, - "learning_rate": 4.7746859944246325e-05, - "loss": 143.1128, - "step": 27820 - }, - { - "epoch": 0.22487253452274178, - "grad_norm": 1054.9879150390625, - "learning_rate": 4.7743962621923674e-05, - "loss": 156.6024, - "step": 27830 - }, - { - "epoch": 0.22495333672702592, - "grad_norm": 787.2058715820312, - "learning_rate": 4.7741063525980004e-05, - "loss": 123.9928, - "step": 27840 - }, - { - "epoch": 0.22503413893131005, - "grad_norm": 2773.7568359375, - "learning_rate": 4.773816265664136e-05, - "loss": 170.3954, - "step": 27850 - }, - { - "epoch": 0.2251149411355942, - "grad_norm": 1088.312744140625, - "learning_rate": 4.7735260014133986e-05, - "loss": 141.3007, - "step": 27860 - }, - { - "epoch": 0.22519574333987832, - "grad_norm": 802.5028686523438, - "learning_rate": 4.773235559868422e-05, - "loss": 124.4787, - "step": 27870 - }, - { - "epoch": 0.22527654554416243, - "grad_norm": 1310.7073974609375, - "learning_rate": 4.772944941051856e-05, - "loss": 131.9987, - "step": 27880 - }, - { - "epoch": 0.22535734774844657, - "grad_norm": 1221.5269775390625, - "learning_rate": 4.772654144986364e-05, - "loss": 132.4321, - "step": 27890 - }, - { - "epoch": 0.2254381499527307, - "grad_norm": 567.3670043945312, - "learning_rate": 4.772363171694622e-05, - "loss": 120.118, - "step": 27900 - }, - { - "epoch": 0.22551895215701484, - "grad_norm": 598.8217163085938, - "learning_rate": 4.772072021199321e-05, - "loss": 120.206, - "step": 27910 - }, - { - "epoch": 0.22559975436129898, - "grad_norm": 1259.5390625, - "learning_rate": 4.7717806935231665e-05, - "loss": 127.624, - "step": 27920 - }, - { - "epoch": 0.2256805565655831, - "grad_norm": 1135.011962890625, - "learning_rate": 4.7714891886888756e-05, - "loss": 117.8361, - "step": 27930 - }, - { - "epoch": 0.22576135876986725, - "grad_norm": 992.67138671875, - "learning_rate": 4.771197506719181e-05, - "loss": 143.1624, - "step": 27940 - }, - { - "epoch": 0.22584216097415138, - "grad_norm": 1266.8870849609375, - "learning_rate": 4.770905647636828e-05, - "loss": 132.7521, - "step": 27950 - }, - { - "epoch": 0.22592296317843552, - "grad_norm": 1239.60986328125, - "learning_rate": 4.770613611464577e-05, - "loss": 111.2638, - "step": 27960 - }, - { - "epoch": 0.22600376538271963, - "grad_norm": 700.7466430664062, - "learning_rate": 4.7703213982252016e-05, - "loss": 132.109, - "step": 27970 - }, - { - "epoch": 0.22608456758700377, - "grad_norm": 1049.5318603515625, - "learning_rate": 4.7700290079414896e-05, - "loss": 135.4894, - "step": 27980 - }, - { - "epoch": 0.2261653697912879, - "grad_norm": 1853.1363525390625, - "learning_rate": 4.769736440636241e-05, - "loss": 98.3203, - "step": 27990 - }, - { - "epoch": 0.22624617199557204, - "grad_norm": 481.7191467285156, - "learning_rate": 4.769443696332272e-05, - "loss": 107.8204, - "step": 28000 - }, - { - "epoch": 0.22632697419985617, - "grad_norm": 935.1353149414062, - "learning_rate": 4.769150775052411e-05, - "loss": 114.9302, - "step": 28010 - }, - { - "epoch": 0.2264077764041403, - "grad_norm": 1012.4119262695312, - "learning_rate": 4.7688576768194994e-05, - "loss": 107.6247, - "step": 28020 - }, - { - "epoch": 0.22648857860842445, - "grad_norm": 1244.735595703125, - "learning_rate": 4.7685644016563956e-05, - "loss": 148.6265, - "step": 28030 - }, - { - "epoch": 0.22656938081270858, - "grad_norm": 638.5089111328125, - "learning_rate": 4.768270949585968e-05, - "loss": 138.1391, - "step": 28040 - }, - { - "epoch": 0.2266501830169927, - "grad_norm": 1371.5699462890625, - "learning_rate": 4.767977320631103e-05, - "loss": 118.9907, - "step": 28050 - }, - { - "epoch": 0.22673098522127683, - "grad_norm": 871.253173828125, - "learning_rate": 4.767683514814696e-05, - "loss": 89.4048, - "step": 28060 - }, - { - "epoch": 0.22681178742556096, - "grad_norm": 1172.1558837890625, - "learning_rate": 4.767389532159659e-05, - "loss": 114.1923, - "step": 28070 - }, - { - "epoch": 0.2268925896298451, - "grad_norm": 1259.46337890625, - "learning_rate": 4.767095372688918e-05, - "loss": 125.5203, - "step": 28080 - }, - { - "epoch": 0.22697339183412923, - "grad_norm": 1035.0037841796875, - "learning_rate": 4.7668010364254124e-05, - "loss": 132.2316, - "step": 28090 - }, - { - "epoch": 0.22705419403841337, - "grad_norm": 1134.89013671875, - "learning_rate": 4.7665065233920945e-05, - "loss": 143.1766, - "step": 28100 - }, - { - "epoch": 0.2271349962426975, - "grad_norm": 1254.23046875, - "learning_rate": 4.766211833611931e-05, - "loss": 108.7819, - "step": 28110 - }, - { - "epoch": 0.22721579844698164, - "grad_norm": 735.98046875, - "learning_rate": 4.765916967107903e-05, - "loss": 110.1949, - "step": 28120 - }, - { - "epoch": 0.22729660065126578, - "grad_norm": 1707.35693359375, - "learning_rate": 4.7656219239030046e-05, - "loss": 117.764, - "step": 28130 - }, - { - "epoch": 0.2273774028555499, - "grad_norm": 2147.052978515625, - "learning_rate": 4.7653267040202436e-05, - "loss": 120.4257, - "step": 28140 - }, - { - "epoch": 0.22745820505983402, - "grad_norm": 1066.6827392578125, - "learning_rate": 4.7650313074826425e-05, - "loss": 119.0304, - "step": 28150 - }, - { - "epoch": 0.22753900726411816, - "grad_norm": 850.0482177734375, - "learning_rate": 4.764735734313236e-05, - "loss": 139.2531, - "step": 28160 - }, - { - "epoch": 0.2276198094684023, - "grad_norm": 1067.7423095703125, - "learning_rate": 4.764439984535074e-05, - "loss": 111.0478, - "step": 28170 - }, - { - "epoch": 0.22770061167268643, - "grad_norm": 880.06591796875, - "learning_rate": 4.764144058171219e-05, - "loss": 97.5755, - "step": 28180 - }, - { - "epoch": 0.22778141387697057, - "grad_norm": 978.5946655273438, - "learning_rate": 4.763847955244749e-05, - "loss": 133.401, - "step": 28190 - }, - { - "epoch": 0.2278622160812547, - "grad_norm": 1116.4317626953125, - "learning_rate": 4.763551675778755e-05, - "loss": 148.2698, - "step": 28200 - }, - { - "epoch": 0.22794301828553884, - "grad_norm": 793.1322631835938, - "learning_rate": 4.76325521979634e-05, - "loss": 122.9886, - "step": 28210 - }, - { - "epoch": 0.22802382048982298, - "grad_norm": 1084.8419189453125, - "learning_rate": 4.7629585873206226e-05, - "loss": 135.7087, - "step": 28220 - }, - { - "epoch": 0.22810462269410708, - "grad_norm": 958.3575439453125, - "learning_rate": 4.7626617783747364e-05, - "loss": 128.169, - "step": 28230 - }, - { - "epoch": 0.22818542489839122, - "grad_norm": 1227.17822265625, - "learning_rate": 4.762364792981825e-05, - "loss": 132.7136, - "step": 28240 - }, - { - "epoch": 0.22826622710267536, - "grad_norm": 1051.424072265625, - "learning_rate": 4.762067631165049e-05, - "loss": 208.9576, - "step": 28250 - }, - { - "epoch": 0.2283470293069595, - "grad_norm": 930.3882446289062, - "learning_rate": 4.761770292947582e-05, - "loss": 147.4814, - "step": 28260 - }, - { - "epoch": 0.22842783151124363, - "grad_norm": 728.6051025390625, - "learning_rate": 4.76147277835261e-05, - "loss": 139.1829, - "step": 28270 - }, - { - "epoch": 0.22850863371552776, - "grad_norm": 1714.36279296875, - "learning_rate": 4.7611750874033356e-05, - "loss": 123.814, - "step": 28280 - }, - { - "epoch": 0.2285894359198119, - "grad_norm": 735.8311767578125, - "learning_rate": 4.760877220122971e-05, - "loss": 109.679, - "step": 28290 - }, - { - "epoch": 0.22867023812409604, - "grad_norm": 1783.2615966796875, - "learning_rate": 4.760579176534747e-05, - "loss": 126.1644, - "step": 28300 - }, - { - "epoch": 0.22875104032838015, - "grad_norm": 1475.021728515625, - "learning_rate": 4.760280956661903e-05, - "loss": 108.4211, - "step": 28310 - }, - { - "epoch": 0.22883184253266428, - "grad_norm": 991.5218505859375, - "learning_rate": 4.759982560527698e-05, - "loss": 151.8106, - "step": 28320 - }, - { - "epoch": 0.22891264473694842, - "grad_norm": 809.6456298828125, - "learning_rate": 4.7596839881553976e-05, - "loss": 135.7544, - "step": 28330 - }, - { - "epoch": 0.22899344694123255, - "grad_norm": 989.7752075195312, - "learning_rate": 4.759385239568289e-05, - "loss": 126.3401, - "step": 28340 - }, - { - "epoch": 0.2290742491455167, - "grad_norm": 1251.4195556640625, - "learning_rate": 4.7590863147896666e-05, - "loss": 101.4083, - "step": 28350 - }, - { - "epoch": 0.22915505134980083, - "grad_norm": 1239.4796142578125, - "learning_rate": 4.758787213842842e-05, - "loss": 135.2459, - "step": 28360 - }, - { - "epoch": 0.22923585355408496, - "grad_norm": 889.4104614257812, - "learning_rate": 4.7584879367511395e-05, - "loss": 118.2603, - "step": 28370 - }, - { - "epoch": 0.2293166557583691, - "grad_norm": 1136.5712890625, - "learning_rate": 4.758188483537898e-05, - "loss": 159.7417, - "step": 28380 - }, - { - "epoch": 0.22939745796265323, - "grad_norm": 1187.0328369140625, - "learning_rate": 4.7578888542264686e-05, - "loss": 120.4386, - "step": 28390 - }, - { - "epoch": 0.22947826016693734, - "grad_norm": 865.9318237304688, - "learning_rate": 4.7575890488402185e-05, - "loss": 88.3281, - "step": 28400 - }, - { - "epoch": 0.22955906237122148, - "grad_norm": 1016.8959350585938, - "learning_rate": 4.757289067402525e-05, - "loss": 121.7153, - "step": 28410 - }, - { - "epoch": 0.22963986457550561, - "grad_norm": 750.9944458007812, - "learning_rate": 4.7569889099367824e-05, - "loss": 127.007, - "step": 28420 - }, - { - "epoch": 0.22972066677978975, - "grad_norm": 2341.662109375, - "learning_rate": 4.756688576466398e-05, - "loss": 154.873, - "step": 28430 - }, - { - "epoch": 0.2298014689840739, - "grad_norm": 1157.162353515625, - "learning_rate": 4.756388067014792e-05, - "loss": 150.8823, - "step": 28440 - }, - { - "epoch": 0.22988227118835802, - "grad_norm": 979.4419555664062, - "learning_rate": 4.7560873816053984e-05, - "loss": 121.463, - "step": 28450 - }, - { - "epoch": 0.22996307339264216, - "grad_norm": 837.5225219726562, - "learning_rate": 4.7557865202616656e-05, - "loss": 112.0781, - "step": 28460 - }, - { - "epoch": 0.2300438755969263, - "grad_norm": 1255.1954345703125, - "learning_rate": 4.755485483007056e-05, - "loss": 172.7516, - "step": 28470 - }, - { - "epoch": 0.2301246778012104, - "grad_norm": 1812.2564697265625, - "learning_rate": 4.7551842698650436e-05, - "loss": 149.2239, - "step": 28480 - }, - { - "epoch": 0.23020548000549454, - "grad_norm": 1386.5208740234375, - "learning_rate": 4.7548828808591195e-05, - "loss": 134.5843, - "step": 28490 - }, - { - "epoch": 0.23028628220977868, - "grad_norm": 604.06298828125, - "learning_rate": 4.754581316012785e-05, - "loss": 114.6172, - "step": 28500 - }, - { - "epoch": 0.2303670844140628, - "grad_norm": 766.1400146484375, - "learning_rate": 4.7542795753495574e-05, - "loss": 109.8613, - "step": 28510 - }, - { - "epoch": 0.23044788661834695, - "grad_norm": 995.7777709960938, - "learning_rate": 4.753977658892967e-05, - "loss": 112.1638, - "step": 28520 - }, - { - "epoch": 0.23052868882263108, - "grad_norm": 902.699951171875, - "learning_rate": 4.753675566666558e-05, - "loss": 138.5148, - "step": 28530 - }, - { - "epoch": 0.23060949102691522, - "grad_norm": 1247.79052734375, - "learning_rate": 4.753373298693888e-05, - "loss": 164.4383, - "step": 28540 - }, - { - "epoch": 0.23069029323119936, - "grad_norm": 1117.5296630859375, - "learning_rate": 4.7530708549985287e-05, - "loss": 83.1662, - "step": 28550 - }, - { - "epoch": 0.2307710954354835, - "grad_norm": 1614.6134033203125, - "learning_rate": 4.752768235604065e-05, - "loss": 137.1905, - "step": 28560 - }, - { - "epoch": 0.2308518976397676, - "grad_norm": 1606.5859375, - "learning_rate": 4.752465440534096e-05, - "loss": 142.3019, - "step": 28570 - }, - { - "epoch": 0.23093269984405174, - "grad_norm": 1640.67724609375, - "learning_rate": 4.752162469812234e-05, - "loss": 120.2609, - "step": 28580 - }, - { - "epoch": 0.23101350204833587, - "grad_norm": 1339.604736328125, - "learning_rate": 4.7518593234621056e-05, - "loss": 128.9936, - "step": 28590 - }, - { - "epoch": 0.23109430425262, - "grad_norm": 1168.5211181640625, - "learning_rate": 4.7515560015073514e-05, - "loss": 117.2202, - "step": 28600 - }, - { - "epoch": 0.23117510645690414, - "grad_norm": 1296.6881103515625, - "learning_rate": 4.751252503971624e-05, - "loss": 146.2023, - "step": 28610 - }, - { - "epoch": 0.23125590866118828, - "grad_norm": 1589.9830322265625, - "learning_rate": 4.7509488308785905e-05, - "loss": 147.3195, - "step": 28620 - }, - { - "epoch": 0.23133671086547242, - "grad_norm": 867.3526000976562, - "learning_rate": 4.750644982251933e-05, - "loss": 100.155, - "step": 28630 - }, - { - "epoch": 0.23141751306975655, - "grad_norm": 686.5694580078125, - "learning_rate": 4.750340958115346e-05, - "loss": 141.5142, - "step": 28640 - }, - { - "epoch": 0.2314983152740407, - "grad_norm": 1165.619384765625, - "learning_rate": 4.750036758492537e-05, - "loss": 131.7647, - "step": 28650 - }, - { - "epoch": 0.2315791174783248, - "grad_norm": 1237.009033203125, - "learning_rate": 4.749732383407229e-05, - "loss": 175.7308, - "step": 28660 - }, - { - "epoch": 0.23165991968260893, - "grad_norm": 1052.8013916015625, - "learning_rate": 4.7494278328831584e-05, - "loss": 147.8926, - "step": 28670 - }, - { - "epoch": 0.23174072188689307, - "grad_norm": 1372.200927734375, - "learning_rate": 4.749123106944073e-05, - "loss": 134.1946, - "step": 28680 - }, - { - "epoch": 0.2318215240911772, - "grad_norm": 1009.112060546875, - "learning_rate": 4.7488182056137374e-05, - "loss": 113.4072, - "step": 28690 - }, - { - "epoch": 0.23190232629546134, - "grad_norm": 1030.7261962890625, - "learning_rate": 4.7485131289159276e-05, - "loss": 139.543, - "step": 28700 - }, - { - "epoch": 0.23198312849974548, - "grad_norm": 879.779541015625, - "learning_rate": 4.7482078768744345e-05, - "loss": 151.9607, - "step": 28710 - }, - { - "epoch": 0.23206393070402961, - "grad_norm": 1353.03369140625, - "learning_rate": 4.747902449513063e-05, - "loss": 112.0666, - "step": 28720 - }, - { - "epoch": 0.23214473290831375, - "grad_norm": 916.7716674804688, - "learning_rate": 4.7475968468556295e-05, - "loss": 144.2921, - "step": 28730 - }, - { - "epoch": 0.23222553511259786, - "grad_norm": 1142.4591064453125, - "learning_rate": 4.7472910689259655e-05, - "loss": 149.8234, - "step": 28740 - }, - { - "epoch": 0.232306337316882, - "grad_norm": 1392.4619140625, - "learning_rate": 4.7469851157479177e-05, - "loss": 133.1573, - "step": 28750 - }, - { - "epoch": 0.23238713952116613, - "grad_norm": 1067.5828857421875, - "learning_rate": 4.7466789873453444e-05, - "loss": 142.0523, - "step": 28760 - }, - { - "epoch": 0.23246794172545027, - "grad_norm": 586.7354125976562, - "learning_rate": 4.746372683742117e-05, - "loss": 115.9923, - "step": 28770 - }, - { - "epoch": 0.2325487439297344, - "grad_norm": 1031.025390625, - "learning_rate": 4.746066204962123e-05, - "loss": 127.9179, - "step": 28780 - }, - { - "epoch": 0.23262954613401854, - "grad_norm": 756.3115844726562, - "learning_rate": 4.745759551029261e-05, - "loss": 159.1566, - "step": 28790 - }, - { - "epoch": 0.23271034833830267, - "grad_norm": 802.1080322265625, - "learning_rate": 4.745452721967446e-05, - "loss": 130.4161, - "step": 28800 - }, - { - "epoch": 0.2327911505425868, - "grad_norm": 2123.100830078125, - "learning_rate": 4.745145717800605e-05, - "loss": 142.188, - "step": 28810 - }, - { - "epoch": 0.23287195274687095, - "grad_norm": 1362.030029296875, - "learning_rate": 4.744838538552677e-05, - "loss": 118.0047, - "step": 28820 - }, - { - "epoch": 0.23295275495115506, - "grad_norm": 1351.5509033203125, - "learning_rate": 4.744531184247619e-05, - "loss": 122.6793, - "step": 28830 - }, - { - "epoch": 0.2330335571554392, - "grad_norm": 2164.05224609375, - "learning_rate": 4.744223654909397e-05, - "loss": 152.711, - "step": 28840 - }, - { - "epoch": 0.23311435935972333, - "grad_norm": 1015.6570434570312, - "learning_rate": 4.743915950561994e-05, - "loss": 133.6094, - "step": 28850 - }, - { - "epoch": 0.23319516156400746, - "grad_norm": 2003.78662109375, - "learning_rate": 4.743608071229405e-05, - "loss": 118.8274, - "step": 28860 - }, - { - "epoch": 0.2332759637682916, - "grad_norm": 2532.417236328125, - "learning_rate": 4.743300016935639e-05, - "loss": 128.9759, - "step": 28870 - }, - { - "epoch": 0.23335676597257574, - "grad_norm": 867.998046875, - "learning_rate": 4.742991787704719e-05, - "loss": 106.416, - "step": 28880 - }, - { - "epoch": 0.23343756817685987, - "grad_norm": 1680.5518798828125, - "learning_rate": 4.7426833835606806e-05, - "loss": 163.9655, - "step": 28890 - }, - { - "epoch": 0.233518370381144, - "grad_norm": 747.9049682617188, - "learning_rate": 4.742374804527575e-05, - "loss": 177.0644, - "step": 28900 - }, - { - "epoch": 0.23359917258542814, - "grad_norm": 1135.5902099609375, - "learning_rate": 4.742066050629465e-05, - "loss": 137.9824, - "step": 28910 - }, - { - "epoch": 0.23367997478971225, - "grad_norm": 575.887451171875, - "learning_rate": 4.741757121890428e-05, - "loss": 128.3113, - "step": 28920 - }, - { - "epoch": 0.2337607769939964, - "grad_norm": 1768.8681640625, - "learning_rate": 4.741448018334555e-05, - "loss": 174.0802, - "step": 28930 - }, - { - "epoch": 0.23384157919828052, - "grad_norm": 870.5559692382812, - "learning_rate": 4.741138739985951e-05, - "loss": 116.884, - "step": 28940 - }, - { - "epoch": 0.23392238140256466, - "grad_norm": 927.0210571289062, - "learning_rate": 4.740829286868733e-05, - "loss": 128.0433, - "step": 28950 - }, - { - "epoch": 0.2340031836068488, - "grad_norm": 899.3349609375, - "learning_rate": 4.740519659007033e-05, - "loss": 123.8409, - "step": 28960 - }, - { - "epoch": 0.23408398581113293, - "grad_norm": 1515.8046875, - "learning_rate": 4.7402098564249974e-05, - "loss": 148.106, - "step": 28970 - }, - { - "epoch": 0.23416478801541707, - "grad_norm": 1051.9295654296875, - "learning_rate": 4.739899879146785e-05, - "loss": 98.266, - "step": 28980 - }, - { - "epoch": 0.2342455902197012, - "grad_norm": 934.8062744140625, - "learning_rate": 4.739589727196568e-05, - "loss": 101.263, - "step": 28990 - }, - { - "epoch": 0.2343263924239853, - "grad_norm": 1219.4715576171875, - "learning_rate": 4.7392794005985326e-05, - "loss": 123.3057, - "step": 29000 - }, - { - "epoch": 0.23440719462826945, - "grad_norm": 1818.0650634765625, - "learning_rate": 4.7389688993768786e-05, - "loss": 136.0193, - "step": 29010 - }, - { - "epoch": 0.23448799683255359, - "grad_norm": 1399.0958251953125, - "learning_rate": 4.7386582235558205e-05, - "loss": 136.5638, - "step": 29020 - }, - { - "epoch": 0.23456879903683772, - "grad_norm": 920.4732666015625, - "learning_rate": 4.738347373159585e-05, - "loss": 132.8821, - "step": 29030 - }, - { - "epoch": 0.23464960124112186, - "grad_norm": 1033.1845703125, - "learning_rate": 4.738036348212412e-05, - "loss": 140.0217, - "step": 29040 - }, - { - "epoch": 0.234730403445406, - "grad_norm": 711.9341430664062, - "learning_rate": 4.737725148738557e-05, - "loss": 140.1641, - "step": 29050 - }, - { - "epoch": 0.23481120564969013, - "grad_norm": 688.1538696289062, - "learning_rate": 4.737413774762287e-05, - "loss": 77.516, - "step": 29060 - }, - { - "epoch": 0.23489200785397427, - "grad_norm": 703.8880615234375, - "learning_rate": 4.737102226307884e-05, - "loss": 104.5649, - "step": 29070 - }, - { - "epoch": 0.2349728100582584, - "grad_norm": 1385.0677490234375, - "learning_rate": 4.7367905033996445e-05, - "loss": 129.1833, - "step": 29080 - }, - { - "epoch": 0.2350536122625425, - "grad_norm": 704.5531005859375, - "learning_rate": 4.736478606061875e-05, - "loss": 118.4485, - "step": 29090 - }, - { - "epoch": 0.23513441446682665, - "grad_norm": 1189.0513916015625, - "learning_rate": 4.7361665343189e-05, - "loss": 113.3991, - "step": 29100 - }, - { - "epoch": 0.23521521667111078, - "grad_norm": 1045.0181884765625, - "learning_rate": 4.735854288195054e-05, - "loss": 127.079, - "step": 29110 - }, - { - "epoch": 0.23529601887539492, - "grad_norm": 1753.1556396484375, - "learning_rate": 4.735541867714687e-05, - "loss": 153.1194, - "step": 29120 - }, - { - "epoch": 0.23537682107967905, - "grad_norm": 1342.6064453125, - "learning_rate": 4.735229272902162e-05, - "loss": 220.2364, - "step": 29130 - }, - { - "epoch": 0.2354576232839632, - "grad_norm": 1594.6724853515625, - "learning_rate": 4.734916503781856e-05, - "loss": 110.0318, - "step": 29140 - }, - { - "epoch": 0.23553842548824733, - "grad_norm": 4604.31787109375, - "learning_rate": 4.73460356037816e-05, - "loss": 137.5645, - "step": 29150 - }, - { - "epoch": 0.23561922769253146, - "grad_norm": 1225.521240234375, - "learning_rate": 4.7342904427154766e-05, - "loss": 105.8574, - "step": 29160 - }, - { - "epoch": 0.23570002989681557, - "grad_norm": 1182.8037109375, - "learning_rate": 4.733977150818225e-05, - "loss": 136.7931, - "step": 29170 - }, - { - "epoch": 0.2357808321010997, - "grad_norm": 1909.0870361328125, - "learning_rate": 4.733663684710835e-05, - "loss": 141.2888, - "step": 29180 - }, - { - "epoch": 0.23586163430538384, - "grad_norm": 1045.3524169921875, - "learning_rate": 4.733350044417752e-05, - "loss": 133.0453, - "step": 29190 - }, - { - "epoch": 0.23594243650966798, - "grad_norm": 867.0052490234375, - "learning_rate": 4.733036229963435e-05, - "loss": 147.6335, - "step": 29200 - }, - { - "epoch": 0.23602323871395212, - "grad_norm": 1676.390625, - "learning_rate": 4.7327222413723536e-05, - "loss": 129.1215, - "step": 29210 - }, - { - "epoch": 0.23610404091823625, - "grad_norm": 694.4156494140625, - "learning_rate": 4.732408078668995e-05, - "loss": 120.3144, - "step": 29220 - }, - { - "epoch": 0.2361848431225204, - "grad_norm": 1407.293212890625, - "learning_rate": 4.732093741877859e-05, - "loss": 132.2767, - "step": 29230 - }, - { - "epoch": 0.23626564532680452, - "grad_norm": 604.6437377929688, - "learning_rate": 4.731779231023456e-05, - "loss": 101.5957, - "step": 29240 - }, - { - "epoch": 0.23634644753108866, - "grad_norm": 1028.5404052734375, - "learning_rate": 4.731464546130314e-05, - "loss": 111.1335, - "step": 29250 - }, - { - "epoch": 0.23642724973537277, - "grad_norm": 1072.7413330078125, - "learning_rate": 4.731149687222972e-05, - "loss": 117.0833, - "step": 29260 - }, - { - "epoch": 0.2365080519396569, - "grad_norm": 5062.89013671875, - "learning_rate": 4.730834654325984e-05, - "loss": 188.8955, - "step": 29270 - }, - { - "epoch": 0.23658885414394104, - "grad_norm": 1982.9129638671875, - "learning_rate": 4.730519447463916e-05, - "loss": 139.1706, - "step": 29280 - }, - { - "epoch": 0.23666965634822518, - "grad_norm": 1061.4493408203125, - "learning_rate": 4.730204066661349e-05, - "loss": 168.8176, - "step": 29290 - }, - { - "epoch": 0.2367504585525093, - "grad_norm": 937.4547729492188, - "learning_rate": 4.7298885119428773e-05, - "loss": 187.8752, - "step": 29300 - }, - { - "epoch": 0.23683126075679345, - "grad_norm": 2171.254150390625, - "learning_rate": 4.729572783333108e-05, - "loss": 154.5713, - "step": 29310 - }, - { - "epoch": 0.23691206296107759, - "grad_norm": 680.1090698242188, - "learning_rate": 4.729256880856662e-05, - "loss": 124.9596, - "step": 29320 - }, - { - "epoch": 0.23699286516536172, - "grad_norm": 1162.4727783203125, - "learning_rate": 4.728940804538176e-05, - "loss": 111.2394, - "step": 29330 - }, - { - "epoch": 0.23707366736964586, - "grad_norm": 944.7142333984375, - "learning_rate": 4.728624554402295e-05, - "loss": 102.1601, - "step": 29340 - }, - { - "epoch": 0.23715446957392997, - "grad_norm": 980.4320678710938, - "learning_rate": 4.728308130473683e-05, - "loss": 97.2322, - "step": 29350 - }, - { - "epoch": 0.2372352717782141, - "grad_norm": 686.7937622070312, - "learning_rate": 4.7279915327770155e-05, - "loss": 112.1569, - "step": 29360 - }, - { - "epoch": 0.23731607398249824, - "grad_norm": 2041.260986328125, - "learning_rate": 4.727674761336981e-05, - "loss": 184.7061, - "step": 29370 - }, - { - "epoch": 0.23739687618678237, - "grad_norm": 912.5013427734375, - "learning_rate": 4.727357816178282e-05, - "loss": 133.7734, - "step": 29380 - }, - { - "epoch": 0.2374776783910665, - "grad_norm": 646.3809814453125, - "learning_rate": 4.727040697325634e-05, - "loss": 110.0211, - "step": 29390 - }, - { - "epoch": 0.23755848059535065, - "grad_norm": 1152.2921142578125, - "learning_rate": 4.7267234048037664e-05, - "loss": 138.353, - "step": 29400 - }, - { - "epoch": 0.23763928279963478, - "grad_norm": 696.5980224609375, - "learning_rate": 4.7264059386374236e-05, - "loss": 108.8602, - "step": 29410 - }, - { - "epoch": 0.23772008500391892, - "grad_norm": 1130.272705078125, - "learning_rate": 4.7260882988513624e-05, - "loss": 141.4464, - "step": 29420 - }, - { - "epoch": 0.23780088720820303, - "grad_norm": 889.0077514648438, - "learning_rate": 4.725770485470351e-05, - "loss": 123.6045, - "step": 29430 - }, - { - "epoch": 0.23788168941248716, - "grad_norm": 1103.80224609375, - "learning_rate": 4.725452498519175e-05, - "loss": 118.2007, - "step": 29440 - }, - { - "epoch": 0.2379624916167713, - "grad_norm": 914.1157836914062, - "learning_rate": 4.725134338022631e-05, - "loss": 117.6762, - "step": 29450 - }, - { - "epoch": 0.23804329382105544, - "grad_norm": 632.98828125, - "learning_rate": 4.7248160040055304e-05, - "loss": 125.2305, - "step": 29460 - }, - { - "epoch": 0.23812409602533957, - "grad_norm": 690.5091552734375, - "learning_rate": 4.7244974964926965e-05, - "loss": 117.8819, - "step": 29470 - }, - { - "epoch": 0.2382048982296237, - "grad_norm": 1017.4037475585938, - "learning_rate": 4.724178815508967e-05, - "loss": 117.0019, - "step": 29480 - }, - { - "epoch": 0.23828570043390784, - "grad_norm": 1045.8836669921875, - "learning_rate": 4.723859961079195e-05, - "loss": 122.3543, - "step": 29490 - }, - { - "epoch": 0.23836650263819198, - "grad_norm": 1498.415771484375, - "learning_rate": 4.723540933228244e-05, - "loss": 193.663, - "step": 29500 - }, - { - "epoch": 0.23844730484247612, - "grad_norm": 655.9730834960938, - "learning_rate": 4.723221731980993e-05, - "loss": 106.442, - "step": 29510 - }, - { - "epoch": 0.23852810704676022, - "grad_norm": 757.1959838867188, - "learning_rate": 4.722902357362333e-05, - "loss": 107.8532, - "step": 29520 - }, - { - "epoch": 0.23860890925104436, - "grad_norm": 911.9034423828125, - "learning_rate": 4.722582809397171e-05, - "loss": 106.3072, - "step": 29530 - }, - { - "epoch": 0.2386897114553285, - "grad_norm": 853.9882202148438, - "learning_rate": 4.722263088110426e-05, - "loss": 125.8871, - "step": 29540 - }, - { - "epoch": 0.23877051365961263, - "grad_norm": 978.88818359375, - "learning_rate": 4.721943193527029e-05, - "loss": 128.243, - "step": 29550 - }, - { - "epoch": 0.23885131586389677, - "grad_norm": 1487.79248046875, - "learning_rate": 4.721623125671927e-05, - "loss": 135.0843, - "step": 29560 - }, - { - "epoch": 0.2389321180681809, - "grad_norm": 903.4834594726562, - "learning_rate": 4.721302884570079e-05, - "loss": 131.1085, - "step": 29570 - }, - { - "epoch": 0.23901292027246504, - "grad_norm": 572.0838623046875, - "learning_rate": 4.720982470246459e-05, - "loss": 118.5991, - "step": 29580 - }, - { - "epoch": 0.23909372247674918, - "grad_norm": 958.1123657226562, - "learning_rate": 4.7206618827260534e-05, - "loss": 87.2824, - "step": 29590 - }, - { - "epoch": 0.23917452468103328, - "grad_norm": 631.7835693359375, - "learning_rate": 4.720341122033862e-05, - "loss": 128.0944, - "step": 29600 - }, - { - "epoch": 0.23925532688531742, - "grad_norm": 1153.185546875, - "learning_rate": 4.720020188194897e-05, - "loss": 112.3794, - "step": 29610 - }, - { - "epoch": 0.23933612908960156, - "grad_norm": 1256.6746826171875, - "learning_rate": 4.719699081234188e-05, - "loss": 124.2718, - "step": 29620 - }, - { - "epoch": 0.2394169312938857, - "grad_norm": 680.5394287109375, - "learning_rate": 4.719377801176774e-05, - "loss": 127.5146, - "step": 29630 - }, - { - "epoch": 0.23949773349816983, - "grad_norm": 1191.853515625, - "learning_rate": 4.7190563480477095e-05, - "loss": 106.8556, - "step": 29640 - }, - { - "epoch": 0.23957853570245397, - "grad_norm": 1467.0565185546875, - "learning_rate": 4.718734721872062e-05, - "loss": 108.4196, - "step": 29650 - }, - { - "epoch": 0.2396593379067381, - "grad_norm": 1265.976318359375, - "learning_rate": 4.718412922674913e-05, - "loss": 124.4224, - "step": 29660 - }, - { - "epoch": 0.23974014011102224, - "grad_norm": 1163.871826171875, - "learning_rate": 4.718090950481356e-05, - "loss": 153.3159, - "step": 29670 - }, - { - "epoch": 0.23982094231530637, - "grad_norm": 787.7110595703125, - "learning_rate": 4.717768805316501e-05, - "loss": 125.5779, - "step": 29680 - }, - { - "epoch": 0.23990174451959048, - "grad_norm": 1298.0185546875, - "learning_rate": 4.717446487205466e-05, - "loss": 126.0521, - "step": 29690 - }, - { - "epoch": 0.23998254672387462, - "grad_norm": 886.8539428710938, - "learning_rate": 4.71712399617339e-05, - "loss": 151.3869, - "step": 29700 - }, - { - "epoch": 0.24006334892815875, - "grad_norm": 686.7147827148438, - "learning_rate": 4.716801332245419e-05, - "loss": 89.7155, - "step": 29710 - }, - { - "epoch": 0.2401441511324429, - "grad_norm": 687.3280639648438, - "learning_rate": 4.7164784954467166e-05, - "loss": 119.7646, - "step": 29720 - }, - { - "epoch": 0.24022495333672703, - "grad_norm": 843.232421875, - "learning_rate": 4.716155485802457e-05, - "loss": 100.9375, - "step": 29730 - }, - { - "epoch": 0.24030575554101116, - "grad_norm": 1230.372802734375, - "learning_rate": 4.715832303337829e-05, - "loss": 106.6527, - "step": 29740 - }, - { - "epoch": 0.2403865577452953, - "grad_norm": 981.0775146484375, - "learning_rate": 4.715508948078037e-05, - "loss": 141.3308, - "step": 29750 - }, - { - "epoch": 0.24046735994957943, - "grad_norm": 681.467529296875, - "learning_rate": 4.715185420048295e-05, - "loss": 108.3358, - "step": 29760 - }, - { - "epoch": 0.24054816215386357, - "grad_norm": 953.4138793945312, - "learning_rate": 4.714861719273833e-05, - "loss": 121.6345, - "step": 29770 - }, - { - "epoch": 0.24062896435814768, - "grad_norm": 948.9532470703125, - "learning_rate": 4.714537845779894e-05, - "loss": 104.2814, - "step": 29780 - }, - { - "epoch": 0.24070976656243182, - "grad_norm": 622.6708984375, - "learning_rate": 4.7142137995917336e-05, - "loss": 108.6086, - "step": 29790 - }, - { - "epoch": 0.24079056876671595, - "grad_norm": 1390.5504150390625, - "learning_rate": 4.713889580734623e-05, - "loss": 112.2751, - "step": 29800 - }, - { - "epoch": 0.2408713709710001, - "grad_norm": 1449.7113037109375, - "learning_rate": 4.713565189233844e-05, - "loss": 150.2417, - "step": 29810 - }, - { - "epoch": 0.24095217317528422, - "grad_norm": 1058.4156494140625, - "learning_rate": 4.7132406251146935e-05, - "loss": 114.6706, - "step": 29820 - }, - { - "epoch": 0.24103297537956836, - "grad_norm": 691.94189453125, - "learning_rate": 4.712915888402483e-05, - "loss": 129.1936, - "step": 29830 - }, - { - "epoch": 0.2411137775838525, - "grad_norm": 1067.644287109375, - "learning_rate": 4.712590979122534e-05, - "loss": 103.1055, - "step": 29840 - }, - { - "epoch": 0.24119457978813663, - "grad_norm": 1014.9259033203125, - "learning_rate": 4.712265897300186e-05, - "loss": 101.1149, - "step": 29850 - }, - { - "epoch": 0.24127538199242074, - "grad_norm": 1477.2049560546875, - "learning_rate": 4.7119406429607885e-05, - "loss": 145.2585, - "step": 29860 - }, - { - "epoch": 0.24135618419670488, - "grad_norm": 1495.74951171875, - "learning_rate": 4.7116152161297045e-05, - "loss": 123.018, - "step": 29870 - }, - { - "epoch": 0.241436986400989, - "grad_norm": 476.7928161621094, - "learning_rate": 4.711289616832312e-05, - "loss": 94.0225, - "step": 29880 - }, - { - "epoch": 0.24151778860527315, - "grad_norm": 917.6282348632812, - "learning_rate": 4.710963845094003e-05, - "loss": 149.6066, - "step": 29890 - }, - { - "epoch": 0.24159859080955728, - "grad_norm": 3788.38720703125, - "learning_rate": 4.710637900940181e-05, - "loss": 137.476, - "step": 29900 - }, - { - "epoch": 0.24167939301384142, - "grad_norm": 1132.5015869140625, - "learning_rate": 4.710311784396264e-05, - "loss": 97.4734, - "step": 29910 - }, - { - "epoch": 0.24176019521812556, - "grad_norm": 1217.09375, - "learning_rate": 4.709985495487682e-05, - "loss": 155.7918, - "step": 29920 - }, - { - "epoch": 0.2418409974224097, - "grad_norm": 1372.134521484375, - "learning_rate": 4.709659034239883e-05, - "loss": 140.2875, - "step": 29930 - }, - { - "epoch": 0.24192179962669383, - "grad_norm": 982.9510498046875, - "learning_rate": 4.7093324006783214e-05, - "loss": 125.7354, - "step": 29940 - }, - { - "epoch": 0.24200260183097794, - "grad_norm": 1473.0313720703125, - "learning_rate": 4.7090055948284706e-05, - "loss": 122.847, - "step": 29950 - }, - { - "epoch": 0.24208340403526207, - "grad_norm": 1208.8023681640625, - "learning_rate": 4.708678616715815e-05, - "loss": 129.1784, - "step": 29960 - }, - { - "epoch": 0.2421642062395462, - "grad_norm": 1903.4127197265625, - "learning_rate": 4.7083514663658536e-05, - "loss": 153.904, - "step": 29970 - }, - { - "epoch": 0.24224500844383035, - "grad_norm": 852.7686767578125, - "learning_rate": 4.708024143804097e-05, - "loss": 137.6562, - "step": 29980 - }, - { - "epoch": 0.24232581064811448, - "grad_norm": 1105.1591796875, - "learning_rate": 4.707696649056073e-05, - "loss": 145.7228, - "step": 29990 - }, - { - "epoch": 0.24240661285239862, - "grad_norm": 777.6072998046875, - "learning_rate": 4.707368982147318e-05, - "loss": 139.5429, - "step": 30000 - }, - { - "epoch": 0.24248741505668275, - "grad_norm": 877.15380859375, - "learning_rate": 4.707041143103384e-05, - "loss": 124.8257, - "step": 30010 - }, - { - "epoch": 0.2425682172609669, - "grad_norm": 644.2935180664062, - "learning_rate": 4.706713131949839e-05, - "loss": 116.7116, - "step": 30020 - }, - { - "epoch": 0.24264901946525103, - "grad_norm": 724.2484130859375, - "learning_rate": 4.70638494871226e-05, - "loss": 107.8035, - "step": 30030 - }, - { - "epoch": 0.24272982166953513, - "grad_norm": 1152.3394775390625, - "learning_rate": 4.7060565934162394e-05, - "loss": 148.7312, - "step": 30040 - }, - { - "epoch": 0.24281062387381927, - "grad_norm": 1203.0445556640625, - "learning_rate": 4.7057280660873835e-05, - "loss": 130.5981, - "step": 30050 - }, - { - "epoch": 0.2428914260781034, - "grad_norm": 785.3749389648438, - "learning_rate": 4.705399366751312e-05, - "loss": 126.0415, - "step": 30060 - }, - { - "epoch": 0.24297222828238754, - "grad_norm": 873.1101684570312, - "learning_rate": 4.705070495433657e-05, - "loss": 130.5768, - "step": 30070 - }, - { - "epoch": 0.24305303048667168, - "grad_norm": 917.8280639648438, - "learning_rate": 4.7047414521600644e-05, - "loss": 109.1206, - "step": 30080 - }, - { - "epoch": 0.24313383269095581, - "grad_norm": 726.4758911132812, - "learning_rate": 4.704412236956193e-05, - "loss": 113.6781, - "step": 30090 - }, - { - "epoch": 0.24321463489523995, - "grad_norm": 900.5794677734375, - "learning_rate": 4.704082849847718e-05, - "loss": 141.0788, - "step": 30100 - }, - { - "epoch": 0.2432954370995241, - "grad_norm": 790.451416015625, - "learning_rate": 4.703753290860323e-05, - "loss": 89.4316, - "step": 30110 - }, - { - "epoch": 0.2433762393038082, - "grad_norm": 1578.204345703125, - "learning_rate": 4.70342356001971e-05, - "loss": 151.0826, - "step": 30120 - }, - { - "epoch": 0.24345704150809233, - "grad_norm": 942.3500366210938, - "learning_rate": 4.703093657351591e-05, - "loss": 140.4288, - "step": 30130 - }, - { - "epoch": 0.24353784371237647, - "grad_norm": 1478.4281005859375, - "learning_rate": 4.702763582881692e-05, - "loss": 118.3542, - "step": 30140 - }, - { - "epoch": 0.2436186459166606, - "grad_norm": 633.3035888671875, - "learning_rate": 4.702433336635753e-05, - "loss": 102.6222, - "step": 30150 - }, - { - "epoch": 0.24369944812094474, - "grad_norm": 844.0042114257812, - "learning_rate": 4.702102918639528e-05, - "loss": 97.0962, - "step": 30160 - }, - { - "epoch": 0.24378025032522888, - "grad_norm": 601.2619018554688, - "learning_rate": 4.701772328918784e-05, - "loss": 117.5583, - "step": 30170 - }, - { - "epoch": 0.243861052529513, - "grad_norm": 1324.6904296875, - "learning_rate": 4.7014415674993e-05, - "loss": 121.7755, - "step": 30180 - }, - { - "epoch": 0.24394185473379715, - "grad_norm": 1094.88720703125, - "learning_rate": 4.70111063440687e-05, - "loss": 106.0576, - "step": 30190 - }, - { - "epoch": 0.24402265693808128, - "grad_norm": 1200.58203125, - "learning_rate": 4.7007795296673006e-05, - "loss": 97.3833, - "step": 30200 - }, - { - "epoch": 0.2441034591423654, - "grad_norm": 1169.1607666015625, - "learning_rate": 4.700448253306412e-05, - "loss": 127.1184, - "step": 30210 - }, - { - "epoch": 0.24418426134664953, - "grad_norm": 876.1979370117188, - "learning_rate": 4.700116805350039e-05, - "loss": 139.5513, - "step": 30220 - }, - { - "epoch": 0.24426506355093366, - "grad_norm": 1631.723876953125, - "learning_rate": 4.699785185824026e-05, - "loss": 128.253, - "step": 30230 - }, - { - "epoch": 0.2443458657552178, - "grad_norm": 1026.825439453125, - "learning_rate": 4.699453394754236e-05, - "loss": 92.297, - "step": 30240 - }, - { - "epoch": 0.24442666795950194, - "grad_norm": 1209.7313232421875, - "learning_rate": 4.6991214321665414e-05, - "loss": 126.4744, - "step": 30250 - }, - { - "epoch": 0.24450747016378607, - "grad_norm": 1183.0413818359375, - "learning_rate": 4.6987892980868296e-05, - "loss": 106.1105, - "step": 30260 - }, - { - "epoch": 0.2445882723680702, - "grad_norm": 787.1463623046875, - "learning_rate": 4.6984569925410016e-05, - "loss": 144.5227, - "step": 30270 - }, - { - "epoch": 0.24466907457235434, - "grad_norm": 1382.62744140625, - "learning_rate": 4.69812451555497e-05, - "loss": 148.0399, - "step": 30280 - }, - { - "epoch": 0.24474987677663845, - "grad_norm": 1172.5582275390625, - "learning_rate": 4.697791867154663e-05, - "loss": 94.4989, - "step": 30290 - }, - { - "epoch": 0.2448306789809226, - "grad_norm": 1119.9525146484375, - "learning_rate": 4.6974590473660216e-05, - "loss": 112.2652, - "step": 30300 - }, - { - "epoch": 0.24491148118520673, - "grad_norm": 609.7570190429688, - "learning_rate": 4.697126056214999e-05, - "loss": 84.4894, - "step": 30310 - }, - { - "epoch": 0.24499228338949086, - "grad_norm": 802.739990234375, - "learning_rate": 4.696792893727562e-05, - "loss": 162.1047, - "step": 30320 - }, - { - "epoch": 0.245073085593775, - "grad_norm": 515.0081176757812, - "learning_rate": 4.6964595599296926e-05, - "loss": 76.9925, - "step": 30330 - }, - { - "epoch": 0.24515388779805913, - "grad_norm": 625.3333740234375, - "learning_rate": 4.696126054847385e-05, - "loss": 120.5736, - "step": 30340 - }, - { - "epoch": 0.24523469000234327, - "grad_norm": 3491.92626953125, - "learning_rate": 4.6957923785066445e-05, - "loss": 136.2436, - "step": 30350 - }, - { - "epoch": 0.2453154922066274, - "grad_norm": 1086.0543212890625, - "learning_rate": 4.695458530933494e-05, - "loss": 134.1265, - "step": 30360 - }, - { - "epoch": 0.24539629441091154, - "grad_norm": 1090.56787109375, - "learning_rate": 4.6951245121539675e-05, - "loss": 156.7844, - "step": 30370 - }, - { - "epoch": 0.24547709661519565, - "grad_norm": 833.1397705078125, - "learning_rate": 4.694790322194111e-05, - "loss": 103.7849, - "step": 30380 - }, - { - "epoch": 0.2455578988194798, - "grad_norm": 989.0213012695312, - "learning_rate": 4.694455961079987e-05, - "loss": 142.2637, - "step": 30390 - }, - { - "epoch": 0.24563870102376392, - "grad_norm": 1007.1926879882812, - "learning_rate": 4.694121428837668e-05, - "loss": 128.0273, - "step": 30400 - }, - { - "epoch": 0.24571950322804806, - "grad_norm": 992.1776123046875, - "learning_rate": 4.693786725493242e-05, - "loss": 110.8111, - "step": 30410 - }, - { - "epoch": 0.2458003054323322, - "grad_norm": 833.3764038085938, - "learning_rate": 4.693451851072811e-05, - "loss": 144.1252, - "step": 30420 - }, - { - "epoch": 0.24588110763661633, - "grad_norm": 1088.8759765625, - "learning_rate": 4.693116805602489e-05, - "loss": 149.1182, - "step": 30430 - }, - { - "epoch": 0.24596190984090047, - "grad_norm": 1931.6527099609375, - "learning_rate": 4.692781589108402e-05, - "loss": 135.4949, - "step": 30440 - }, - { - "epoch": 0.2460427120451846, - "grad_norm": 1956.088623046875, - "learning_rate": 4.692446201616692e-05, - "loss": 153.8644, - "step": 30450 - }, - { - "epoch": 0.24612351424946874, - "grad_norm": 914.1429443359375, - "learning_rate": 4.6921106431535135e-05, - "loss": 141.7656, - "step": 30460 - }, - { - "epoch": 0.24620431645375285, - "grad_norm": 1526.69970703125, - "learning_rate": 4.691774913745033e-05, - "loss": 140.4324, - "step": 30470 - }, - { - "epoch": 0.24628511865803698, - "grad_norm": 803.3629150390625, - "learning_rate": 4.691439013417433e-05, - "loss": 119.8788, - "step": 30480 - }, - { - "epoch": 0.24636592086232112, - "grad_norm": 918.0090942382812, - "learning_rate": 4.691102942196906e-05, - "loss": 108.0762, - "step": 30490 - }, - { - "epoch": 0.24644672306660526, - "grad_norm": 913.9592895507812, - "learning_rate": 4.690766700109659e-05, - "loss": 103.0521, - "step": 30500 - }, - { - "epoch": 0.2465275252708894, - "grad_norm": 833.38037109375, - "learning_rate": 4.690430287181915e-05, - "loss": 201.3237, - "step": 30510 - }, - { - "epoch": 0.24660832747517353, - "grad_norm": 1400.293212890625, - "learning_rate": 4.690093703439907e-05, - "loss": 150.5914, - "step": 30520 - }, - { - "epoch": 0.24668912967945766, - "grad_norm": 1068.9080810546875, - "learning_rate": 4.689756948909884e-05, - "loss": 88.9544, - "step": 30530 - }, - { - "epoch": 0.2467699318837418, - "grad_norm": 1136.314697265625, - "learning_rate": 4.689420023618104e-05, - "loss": 111.8415, - "step": 30540 - }, - { - "epoch": 0.2468507340880259, - "grad_norm": 847.436279296875, - "learning_rate": 4.6890829275908434e-05, - "loss": 122.4519, - "step": 30550 - }, - { - "epoch": 0.24693153629231004, - "grad_norm": 1348.418212890625, - "learning_rate": 4.688745660854388e-05, - "loss": 168.7058, - "step": 30560 - }, - { - "epoch": 0.24701233849659418, - "grad_norm": 525.7384033203125, - "learning_rate": 4.68840822343504e-05, - "loss": 124.7277, - "step": 30570 - }, - { - "epoch": 0.24709314070087832, - "grad_norm": 824.0347290039062, - "learning_rate": 4.688070615359114e-05, - "loss": 180.7755, - "step": 30580 - }, - { - "epoch": 0.24717394290516245, - "grad_norm": 1139.04443359375, - "learning_rate": 4.6877328366529346e-05, - "loss": 151.3644, - "step": 30590 - }, - { - "epoch": 0.2472547451094466, - "grad_norm": 853.4348754882812, - "learning_rate": 4.687394887342845e-05, - "loss": 127.1066, - "step": 30600 - }, - { - "epoch": 0.24733554731373072, - "grad_norm": 542.5347900390625, - "learning_rate": 4.687056767455198e-05, - "loss": 106.1202, - "step": 30610 - }, - { - "epoch": 0.24741634951801486, - "grad_norm": 761.7576904296875, - "learning_rate": 4.686718477016361e-05, - "loss": 137.2746, - "step": 30620 - }, - { - "epoch": 0.247497151722299, - "grad_norm": 1651.20703125, - "learning_rate": 4.6863800160527147e-05, - "loss": 145.2747, - "step": 30630 - }, - { - "epoch": 0.2475779539265831, - "grad_norm": 7766.7763671875, - "learning_rate": 4.6860413845906534e-05, - "loss": 148.329, - "step": 30640 - }, - { - "epoch": 0.24765875613086724, - "grad_norm": 585.8191528320312, - "learning_rate": 4.685702582656584e-05, - "loss": 94.057, - "step": 30650 - }, - { - "epoch": 0.24773955833515138, - "grad_norm": 1419.4744873046875, - "learning_rate": 4.6853636102769274e-05, - "loss": 139.8103, - "step": 30660 - }, - { - "epoch": 0.2478203605394355, - "grad_norm": 767.4970703125, - "learning_rate": 4.685024467478116e-05, - "loss": 121.8061, - "step": 30670 - }, - { - "epoch": 0.24790116274371965, - "grad_norm": 1048.727294921875, - "learning_rate": 4.684685154286599e-05, - "loss": 144.7214, - "step": 30680 - }, - { - "epoch": 0.24798196494800379, - "grad_norm": 1894.920654296875, - "learning_rate": 4.684345670728834e-05, - "loss": 123.1511, - "step": 30690 - }, - { - "epoch": 0.24806276715228792, - "grad_norm": 898.2999267578125, - "learning_rate": 4.684006016831297e-05, - "loss": 148.4904, - "step": 30700 - }, - { - "epoch": 0.24814356935657206, - "grad_norm": 1397.76318359375, - "learning_rate": 4.6836661926204736e-05, - "loss": 106.6877, - "step": 30710 - }, - { - "epoch": 0.2482243715608562, - "grad_norm": 1688.9957275390625, - "learning_rate": 4.6833261981228646e-05, - "loss": 137.0107, - "step": 30720 - }, - { - "epoch": 0.2483051737651403, - "grad_norm": 847.3025512695312, - "learning_rate": 4.6829860333649836e-05, - "loss": 110.6745, - "step": 30730 - }, - { - "epoch": 0.24838597596942444, - "grad_norm": 825.1484985351562, - "learning_rate": 4.682645698373357e-05, - "loss": 100.7792, - "step": 30740 - }, - { - "epoch": 0.24846677817370857, - "grad_norm": 774.1849975585938, - "learning_rate": 4.682305193174524e-05, - "loss": 124.0935, - "step": 30750 - }, - { - "epoch": 0.2485475803779927, - "grad_norm": 1124.6876220703125, - "learning_rate": 4.68196451779504e-05, - "loss": 152.3383, - "step": 30760 - }, - { - "epoch": 0.24862838258227685, - "grad_norm": 816.4825439453125, - "learning_rate": 4.681623672261469e-05, - "loss": 204.9982, - "step": 30770 - }, - { - "epoch": 0.24870918478656098, - "grad_norm": 1915.1468505859375, - "learning_rate": 4.6812826566003934e-05, - "loss": 134.0663, - "step": 30780 - }, - { - "epoch": 0.24878998699084512, - "grad_norm": 799.4733276367188, - "learning_rate": 4.6809414708384046e-05, - "loss": 107.3664, - "step": 30790 - }, - { - "epoch": 0.24887078919512926, - "grad_norm": 878.7203979492188, - "learning_rate": 4.68060011500211e-05, - "loss": 131.2767, - "step": 30800 - }, - { - "epoch": 0.24895159139941336, - "grad_norm": 1173.3115234375, - "learning_rate": 4.680258589118128e-05, - "loss": 120.9661, - "step": 30810 - }, - { - "epoch": 0.2490323936036975, - "grad_norm": 981.5140380859375, - "learning_rate": 4.6799168932130915e-05, - "loss": 125.159, - "step": 30820 - }, - { - "epoch": 0.24911319580798164, - "grad_norm": 940.4940185546875, - "learning_rate": 4.679575027313649e-05, - "loss": 107.5992, - "step": 30830 - }, - { - "epoch": 0.24919399801226577, - "grad_norm": 1501.449951171875, - "learning_rate": 4.679232991446456e-05, - "loss": 116.5124, - "step": 30840 - }, - { - "epoch": 0.2492748002165499, - "grad_norm": 1678.8966064453125, - "learning_rate": 4.6788907856381895e-05, - "loss": 138.6862, - "step": 30850 - }, - { - "epoch": 0.24935560242083404, - "grad_norm": 585.3444213867188, - "learning_rate": 4.678548409915532e-05, - "loss": 159.1779, - "step": 30860 - }, - { - "epoch": 0.24943640462511818, - "grad_norm": 1041.24658203125, - "learning_rate": 4.678205864305184e-05, - "loss": 143.9888, - "step": 30870 - }, - { - "epoch": 0.24951720682940232, - "grad_norm": 1156.5784912109375, - "learning_rate": 4.677863148833859e-05, - "loss": 125.0972, - "step": 30880 - }, - { - "epoch": 0.24959800903368645, - "grad_norm": 879.4472045898438, - "learning_rate": 4.67752026352828e-05, - "loss": 123.5027, - "step": 30890 - }, - { - "epoch": 0.24967881123797056, - "grad_norm": 2156.40771484375, - "learning_rate": 4.6771772084151885e-05, - "loss": 150.0622, - "step": 30900 - }, - { - "epoch": 0.2497596134422547, - "grad_norm": 1132.5755615234375, - "learning_rate": 4.676833983521335e-05, - "loss": 174.7187, - "step": 30910 - }, - { - "epoch": 0.24984041564653883, - "grad_norm": 1475.2421875, - "learning_rate": 4.676490588873486e-05, - "loss": 132.3048, - "step": 30920 - }, - { - "epoch": 0.24992121785082297, - "grad_norm": 945.8187866210938, - "learning_rate": 4.6761470244984196e-05, - "loss": 97.9541, - "step": 30930 - }, - { - "epoch": 0.2500020200551071, - "grad_norm": 810.1784057617188, - "learning_rate": 4.675803290422927e-05, - "loss": 118.6477, - "step": 30940 - }, - { - "epoch": 0.2500828222593912, - "grad_norm": 1500.1495361328125, - "learning_rate": 4.675459386673815e-05, - "loss": 116.4928, - "step": 30950 - }, - { - "epoch": 0.25016362446367535, - "grad_norm": 1645.0987548828125, - "learning_rate": 4.6751153132779e-05, - "loss": 135.6667, - "step": 30960 - }, - { - "epoch": 0.2502444266679595, - "grad_norm": 928.4085083007812, - "learning_rate": 4.674771070262014e-05, - "loss": 147.4753, - "step": 30970 - }, - { - "epoch": 0.2503252288722436, - "grad_norm": 1190.9405517578125, - "learning_rate": 4.674426657653003e-05, - "loss": 97.2158, - "step": 30980 - }, - { - "epoch": 0.25040603107652776, - "grad_norm": 930.0552978515625, - "learning_rate": 4.6740820754777235e-05, - "loss": 100.6357, - "step": 30990 - }, - { - "epoch": 0.2504868332808119, - "grad_norm": 916.4031372070312, - "learning_rate": 4.6737373237630476e-05, - "loss": 131.274, - "step": 31000 - }, - { - "epoch": 0.25056763548509603, - "grad_norm": 1300.7235107421875, - "learning_rate": 4.6733924025358597e-05, - "loss": 138.4902, - "step": 31010 - }, - { - "epoch": 0.25064843768938017, - "grad_norm": 3069.26904296875, - "learning_rate": 4.6730473118230575e-05, - "loss": 134.2327, - "step": 31020 - }, - { - "epoch": 0.2507292398936643, - "grad_norm": 1497.9208984375, - "learning_rate": 4.672702051651552e-05, - "loss": 134.1972, - "step": 31030 - }, - { - "epoch": 0.25081004209794844, - "grad_norm": 723.8341674804688, - "learning_rate": 4.6723566220482664e-05, - "loss": 123.4816, - "step": 31040 - }, - { - "epoch": 0.2508908443022326, - "grad_norm": 689.9921264648438, - "learning_rate": 4.672011023040138e-05, - "loss": 112.1364, - "step": 31050 - }, - { - "epoch": 0.2509716465065167, - "grad_norm": 1235.7105712890625, - "learning_rate": 4.6716652546541194e-05, - "loss": 113.4487, - "step": 31060 - }, - { - "epoch": 0.25105244871080085, - "grad_norm": 1054.8597412109375, - "learning_rate": 4.6713193169171724e-05, - "loss": 112.5311, - "step": 31070 - }, - { - "epoch": 0.251133250915085, - "grad_norm": 835.3629150390625, - "learning_rate": 4.6709732098562745e-05, - "loss": 132.1355, - "step": 31080 - }, - { - "epoch": 0.2512140531193691, - "grad_norm": 1165.464599609375, - "learning_rate": 4.670626933498415e-05, - "loss": 118.5429, - "step": 31090 - }, - { - "epoch": 0.25129485532365325, - "grad_norm": 1425.6583251953125, - "learning_rate": 4.670280487870598e-05, - "loss": 123.699, - "step": 31100 - }, - { - "epoch": 0.25137565752793734, - "grad_norm": 1160.815673828125, - "learning_rate": 4.669933872999841e-05, - "loss": 95.371, - "step": 31110 - }, - { - "epoch": 0.25145645973222147, - "grad_norm": 1083.7623291015625, - "learning_rate": 4.6695870889131724e-05, - "loss": 90.0622, - "step": 31120 - }, - { - "epoch": 0.2515372619365056, - "grad_norm": 1230.3162841796875, - "learning_rate": 4.669240135637635e-05, - "loss": 112.34, - "step": 31130 - }, - { - "epoch": 0.25161806414078974, - "grad_norm": 672.0086669921875, - "learning_rate": 4.668893013200286e-05, - "loss": 135.9741, - "step": 31140 - }, - { - "epoch": 0.2516988663450739, - "grad_norm": 815.0717163085938, - "learning_rate": 4.6685457216281936e-05, - "loss": 119.9893, - "step": 31150 - }, - { - "epoch": 0.251779668549358, - "grad_norm": 1014.9674072265625, - "learning_rate": 4.6681982609484416e-05, - "loss": 137.3441, - "step": 31160 - }, - { - "epoch": 0.25186047075364215, - "grad_norm": 1384.6041259765625, - "learning_rate": 4.6678506311881245e-05, - "loss": 146.6111, - "step": 31170 - }, - { - "epoch": 0.2519412729579263, - "grad_norm": 470.4507141113281, - "learning_rate": 4.667502832374352e-05, - "loss": 120.4862, - "step": 31180 - }, - { - "epoch": 0.2520220751622104, - "grad_norm": 950.8878173828125, - "learning_rate": 4.6671548645342456e-05, - "loss": 96.7869, - "step": 31190 - }, - { - "epoch": 0.25210287736649456, - "grad_norm": 1040.9105224609375, - "learning_rate": 4.6668067276949414e-05, - "loss": 124.3253, - "step": 31200 - }, - { - "epoch": 0.2521836795707787, - "grad_norm": 1017.7307739257812, - "learning_rate": 4.666458421883586e-05, - "loss": 150.368, - "step": 31210 - }, - { - "epoch": 0.25226448177506283, - "grad_norm": 1042.1240234375, - "learning_rate": 4.666109947127343e-05, - "loss": 126.0339, - "step": 31220 - }, - { - "epoch": 0.25234528397934697, - "grad_norm": 938.1630859375, - "learning_rate": 4.6657613034533866e-05, - "loss": 92.1871, - "step": 31230 - }, - { - "epoch": 0.2524260861836311, - "grad_norm": 668.5460205078125, - "learning_rate": 4.665412490888904e-05, - "loss": 158.1232, - "step": 31240 - }, - { - "epoch": 0.25250688838791524, - "grad_norm": 779.7586669921875, - "learning_rate": 4.665063509461097e-05, - "loss": 133.2458, - "step": 31250 - }, - { - "epoch": 0.2525876905921994, - "grad_norm": 711.7813110351562, - "learning_rate": 4.66471435919718e-05, - "loss": 104.5599, - "step": 31260 - }, - { - "epoch": 0.2526684927964835, - "grad_norm": 532.3453369140625, - "learning_rate": 4.66436504012438e-05, - "loss": 149.0667, - "step": 31270 - }, - { - "epoch": 0.25274929500076765, - "grad_norm": 1488.810791015625, - "learning_rate": 4.6640155522699374e-05, - "loss": 176.4526, - "step": 31280 - }, - { - "epoch": 0.25283009720505173, - "grad_norm": 961.314208984375, - "learning_rate": 4.663665895661107e-05, - "loss": 145.6121, - "step": 31290 - }, - { - "epoch": 0.25291089940933587, - "grad_norm": 1149.9271240234375, - "learning_rate": 4.6633160703251554e-05, - "loss": 139.3647, - "step": 31300 - }, - { - "epoch": 0.25299170161362, - "grad_norm": 1060.4437255859375, - "learning_rate": 4.662966076289362e-05, - "loss": 181.2407, - "step": 31310 - }, - { - "epoch": 0.25307250381790414, - "grad_norm": 936.9962768554688, - "learning_rate": 4.6626159135810205e-05, - "loss": 115.8184, - "step": 31320 - }, - { - "epoch": 0.2531533060221883, - "grad_norm": 934.7564697265625, - "learning_rate": 4.662265582227438e-05, - "loss": 146.0902, - "step": 31330 - }, - { - "epoch": 0.2532341082264724, - "grad_norm": 1293.936279296875, - "learning_rate": 4.661915082255932e-05, - "loss": 154.5603, - "step": 31340 - }, - { - "epoch": 0.25331491043075655, - "grad_norm": 820.6248168945312, - "learning_rate": 4.6615644136938375e-05, - "loss": 114.5268, - "step": 31350 - }, - { - "epoch": 0.2533957126350407, - "grad_norm": 1214.7662353515625, - "learning_rate": 4.6612135765685e-05, - "loss": 86.2635, - "step": 31360 - }, - { - "epoch": 0.2534765148393248, - "grad_norm": 1282.572509765625, - "learning_rate": 4.660862570907277e-05, - "loss": 135.8084, - "step": 31370 - }, - { - "epoch": 0.25355731704360895, - "grad_norm": 813.9265747070312, - "learning_rate": 4.660511396737541e-05, - "loss": 125.4082, - "step": 31380 - }, - { - "epoch": 0.2536381192478931, - "grad_norm": 717.7954711914062, - "learning_rate": 4.6601600540866794e-05, - "loss": 191.1712, - "step": 31390 - }, - { - "epoch": 0.2537189214521772, - "grad_norm": 1254.6070556640625, - "learning_rate": 4.659808542982088e-05, - "loss": 139.2613, - "step": 31400 - }, - { - "epoch": 0.25379972365646136, - "grad_norm": 418.7879333496094, - "learning_rate": 4.659456863451181e-05, - "loss": 115.6388, - "step": 31410 - }, - { - "epoch": 0.2538805258607455, - "grad_norm": 1027.3558349609375, - "learning_rate": 4.65910501552138e-05, - "loss": 127.2673, - "step": 31420 - }, - { - "epoch": 0.25396132806502963, - "grad_norm": 1325.302734375, - "learning_rate": 4.658752999220125e-05, - "loss": 141.2304, - "step": 31430 - }, - { - "epoch": 0.25404213026931377, - "grad_norm": 1152.749267578125, - "learning_rate": 4.6584008145748656e-05, - "loss": 112.7718, - "step": 31440 - }, - { - "epoch": 0.2541229324735979, - "grad_norm": 954.8677368164062, - "learning_rate": 4.658048461613068e-05, - "loss": 127.5997, - "step": 31450 - }, - { - "epoch": 0.254203734677882, - "grad_norm": 1916.9107666015625, - "learning_rate": 4.657695940362207e-05, - "loss": 171.5313, - "step": 31460 - }, - { - "epoch": 0.2542845368821661, - "grad_norm": 1336.6593017578125, - "learning_rate": 4.6573432508497735e-05, - "loss": 121.1602, - "step": 31470 - }, - { - "epoch": 0.25436533908645026, - "grad_norm": 691.5707397460938, - "learning_rate": 4.6569903931032735e-05, - "loss": 128.2831, - "step": 31480 - }, - { - "epoch": 0.2544461412907344, - "grad_norm": 578.7666015625, - "learning_rate": 4.6566373671502196e-05, - "loss": 110.4139, - "step": 31490 - }, - { - "epoch": 0.25452694349501853, - "grad_norm": 2594.50439453125, - "learning_rate": 4.656284173018144e-05, - "loss": 139.214, - "step": 31500 - }, - { - "epoch": 0.25460774569930267, - "grad_norm": 754.3787231445312, - "learning_rate": 4.655930810734589e-05, - "loss": 98.6249, - "step": 31510 - }, - { - "epoch": 0.2546885479035868, - "grad_norm": 753.6112670898438, - "learning_rate": 4.65557728032711e-05, - "loss": 116.2527, - "step": 31520 - }, - { - "epoch": 0.25476935010787094, - "grad_norm": 825.7274169921875, - "learning_rate": 4.6552235818232764e-05, - "loss": 107.6051, - "step": 31530 - }, - { - "epoch": 0.2548501523121551, - "grad_norm": 682.1669921875, - "learning_rate": 4.6548697152506705e-05, - "loss": 118.6188, - "step": 31540 - }, - { - "epoch": 0.2549309545164392, - "grad_norm": 873.027587890625, - "learning_rate": 4.654515680636888e-05, - "loss": 94.5319, - "step": 31550 - }, - { - "epoch": 0.25501175672072335, - "grad_norm": 709.2208862304688, - "learning_rate": 4.654161478009536e-05, - "loss": 115.816, - "step": 31560 - }, - { - "epoch": 0.2550925589250075, - "grad_norm": 1426.3155517578125, - "learning_rate": 4.653807107396237e-05, - "loss": 126.7004, - "step": 31570 - }, - { - "epoch": 0.2551733611292916, - "grad_norm": 713.8536376953125, - "learning_rate": 4.653452568824625e-05, - "loss": 115.0852, - "step": 31580 - }, - { - "epoch": 0.25525416333357576, - "grad_norm": 725.6244506835938, - "learning_rate": 4.653097862322348e-05, - "loss": 109.1049, - "step": 31590 - }, - { - "epoch": 0.2553349655378599, - "grad_norm": 1287.509765625, - "learning_rate": 4.652742987917066e-05, - "loss": 148.3504, - "step": 31600 - }, - { - "epoch": 0.25541576774214403, - "grad_norm": 668.9722900390625, - "learning_rate": 4.652387945636454e-05, - "loss": 132.7015, - "step": 31610 - }, - { - "epoch": 0.25549656994642816, - "grad_norm": 963.2771606445312, - "learning_rate": 4.652032735508198e-05, - "loss": 117.7157, - "step": 31620 - }, - { - "epoch": 0.25557737215071225, - "grad_norm": 2376.852783203125, - "learning_rate": 4.651677357559998e-05, - "loss": 111.6428, - "step": 31630 - }, - { - "epoch": 0.2556581743549964, - "grad_norm": 1291.9073486328125, - "learning_rate": 4.651321811819568e-05, - "loss": 153.0415, - "step": 31640 - }, - { - "epoch": 0.2557389765592805, - "grad_norm": 1309.7423095703125, - "learning_rate": 4.6509660983146334e-05, - "loss": 133.7439, - "step": 31650 - }, - { - "epoch": 0.25581977876356465, - "grad_norm": 1093.7462158203125, - "learning_rate": 4.650610217072934e-05, - "loss": 134.1148, - "step": 31660 - }, - { - "epoch": 0.2559005809678488, - "grad_norm": 1397.806396484375, - "learning_rate": 4.650254168122222e-05, - "loss": 111.1662, - "step": 31670 - }, - { - "epoch": 0.2559813831721329, - "grad_norm": 927.7184448242188, - "learning_rate": 4.649897951490262e-05, - "loss": 109.6437, - "step": 31680 - }, - { - "epoch": 0.25606218537641706, - "grad_norm": 741.7882080078125, - "learning_rate": 4.649541567204834e-05, - "loss": 117.6267, - "step": 31690 - }, - { - "epoch": 0.2561429875807012, - "grad_norm": 3240.059814453125, - "learning_rate": 4.649185015293728e-05, - "loss": 144.5376, - "step": 31700 - }, - { - "epoch": 0.25622378978498533, - "grad_norm": 2207.06201171875, - "learning_rate": 4.6488282957847494e-05, - "loss": 143.006, - "step": 31710 - }, - { - "epoch": 0.25630459198926947, - "grad_norm": 1139.0548095703125, - "learning_rate": 4.648471408705717e-05, - "loss": 123.4253, - "step": 31720 - }, - { - "epoch": 0.2563853941935536, - "grad_norm": 1316.4866943359375, - "learning_rate": 4.648114354084459e-05, - "loss": 176.6302, - "step": 31730 - }, - { - "epoch": 0.25646619639783774, - "grad_norm": 819.2802124023438, - "learning_rate": 4.647757131948822e-05, - "loss": 151.3781, - "step": 31740 - }, - { - "epoch": 0.2565469986021219, - "grad_norm": 652.6885986328125, - "learning_rate": 4.6473997423266614e-05, - "loss": 128.133, - "step": 31750 - }, - { - "epoch": 0.256627800806406, - "grad_norm": 988.3428344726562, - "learning_rate": 4.647042185245847e-05, - "loss": 124.0339, - "step": 31760 - }, - { - "epoch": 0.25670860301069015, - "grad_norm": 750.12890625, - "learning_rate": 4.646684460734263e-05, - "loss": 121.2098, - "step": 31770 - }, - { - "epoch": 0.2567894052149743, - "grad_norm": 769.1755981445312, - "learning_rate": 4.6463265688198044e-05, - "loss": 128.9864, - "step": 31780 - }, - { - "epoch": 0.2568702074192584, - "grad_norm": 1202.3675537109375, - "learning_rate": 4.645968509530381e-05, - "loss": 125.9083, - "step": 31790 - }, - { - "epoch": 0.2569510096235425, - "grad_norm": 1499.5919189453125, - "learning_rate": 4.645610282893915e-05, - "loss": 116.1968, - "step": 31800 - }, - { - "epoch": 0.25703181182782664, - "grad_norm": 1036.8206787109375, - "learning_rate": 4.6452518889383414e-05, - "loss": 146.0599, - "step": 31810 - }, - { - "epoch": 0.2571126140321108, - "grad_norm": 849.16552734375, - "learning_rate": 4.6448933276916076e-05, - "loss": 116.8707, - "step": 31820 - }, - { - "epoch": 0.2571934162363949, - "grad_norm": 926.8781127929688, - "learning_rate": 4.644534599181677e-05, - "loss": 99.8281, - "step": 31830 - }, - { - "epoch": 0.25727421844067905, - "grad_norm": 574.5735473632812, - "learning_rate": 4.644175703436522e-05, - "loss": 117.8184, - "step": 31840 - }, - { - "epoch": 0.2573550206449632, - "grad_norm": 1198.481201171875, - "learning_rate": 4.643816640484131e-05, - "loss": 114.8588, - "step": 31850 - }, - { - "epoch": 0.2574358228492473, - "grad_norm": 1389.0462646484375, - "learning_rate": 4.6434574103525044e-05, - "loss": 163.5483, - "step": 31860 - }, - { - "epoch": 0.25751662505353146, - "grad_norm": 1393.81640625, - "learning_rate": 4.6430980130696555e-05, - "loss": 135.483, - "step": 31870 - }, - { - "epoch": 0.2575974272578156, - "grad_norm": 1305.5269775390625, - "learning_rate": 4.6427384486636113e-05, - "loss": 114.8484, - "step": 31880 - }, - { - "epoch": 0.25767822946209973, - "grad_norm": 1784.3841552734375, - "learning_rate": 4.6423787171624114e-05, - "loss": 148.0979, - "step": 31890 - }, - { - "epoch": 0.25775903166638386, - "grad_norm": 989.03369140625, - "learning_rate": 4.642018818594107e-05, - "loss": 168.4256, - "step": 31900 - }, - { - "epoch": 0.257839833870668, - "grad_norm": 822.5046997070312, - "learning_rate": 4.6416587529867664e-05, - "loss": 97.9412, - "step": 31910 - }, - { - "epoch": 0.25792063607495214, - "grad_norm": 859.1937255859375, - "learning_rate": 4.6412985203684654e-05, - "loss": 121.4524, - "step": 31920 - }, - { - "epoch": 0.2580014382792363, - "grad_norm": 1053.469970703125, - "learning_rate": 4.6409381207672974e-05, - "loss": 114.0253, - "step": 31930 - }, - { - "epoch": 0.2580822404835204, - "grad_norm": 845.4104614257812, - "learning_rate": 4.640577554211366e-05, - "loss": 126.5556, - "step": 31940 - }, - { - "epoch": 0.25816304268780454, - "grad_norm": 843.736572265625, - "learning_rate": 4.64021682072879e-05, - "loss": 86.2402, - "step": 31950 - }, - { - "epoch": 0.2582438448920887, - "grad_norm": 1914.66552734375, - "learning_rate": 4.639855920347701e-05, - "loss": 145.8625, - "step": 31960 - }, - { - "epoch": 0.25832464709637276, - "grad_norm": 1299.5343017578125, - "learning_rate": 4.6394948530962396e-05, - "loss": 104.9562, - "step": 31970 - }, - { - "epoch": 0.2584054493006569, - "grad_norm": 2067.303955078125, - "learning_rate": 4.6391336190025644e-05, - "loss": 122.0683, - "step": 31980 - }, - { - "epoch": 0.25848625150494103, - "grad_norm": 935.3926391601562, - "learning_rate": 4.638772218094847e-05, - "loss": 108.5239, - "step": 31990 - }, - { - "epoch": 0.25856705370922517, - "grad_norm": 842.889892578125, - "learning_rate": 4.638410650401267e-05, - "loss": 96.5829, - "step": 32000 - }, - { - "epoch": 0.2586478559135093, - "grad_norm": 792.8538818359375, - "learning_rate": 4.638048915950022e-05, - "loss": 124.016, - "step": 32010 - }, - { - "epoch": 0.25872865811779344, - "grad_norm": 1047.8841552734375, - "learning_rate": 4.6376870147693196e-05, - "loss": 101.2743, - "step": 32020 - }, - { - "epoch": 0.2588094603220776, - "grad_norm": 1486.6014404296875, - "learning_rate": 4.6373249468873833e-05, - "loss": 127.4495, - "step": 32030 - }, - { - "epoch": 0.2588902625263617, - "grad_norm": 862.8234252929688, - "learning_rate": 4.6369627123324465e-05, - "loss": 129.138, - "step": 32040 - }, - { - "epoch": 0.25897106473064585, - "grad_norm": 1185.45703125, - "learning_rate": 4.636600311132758e-05, - "loss": 142.1432, - "step": 32050 - }, - { - "epoch": 0.25905186693493, - "grad_norm": 636.2596435546875, - "learning_rate": 4.636237743316578e-05, - "loss": 152.3235, - "step": 32060 - }, - { - "epoch": 0.2591326691392141, - "grad_norm": 1174.3681640625, - "learning_rate": 4.6358750089121795e-05, - "loss": 143.3491, - "step": 32070 - }, - { - "epoch": 0.25921347134349826, - "grad_norm": 1264.450439453125, - "learning_rate": 4.635512107947851e-05, - "loss": 105.3538, - "step": 32080 - }, - { - "epoch": 0.2592942735477824, - "grad_norm": 1059.5438232421875, - "learning_rate": 4.635149040451891e-05, - "loss": 127.4254, - "step": 32090 - }, - { - "epoch": 0.25937507575206653, - "grad_norm": 833.9962158203125, - "learning_rate": 4.6347858064526125e-05, - "loss": 136.2431, - "step": 32100 - }, - { - "epoch": 0.25945587795635067, - "grad_norm": 660.1419067382812, - "learning_rate": 4.634422405978342e-05, - "loss": 96.6548, - "step": 32110 - }, - { - "epoch": 0.2595366801606348, - "grad_norm": 1916.310546875, - "learning_rate": 4.634058839057417e-05, - "loss": 138.4265, - "step": 32120 - }, - { - "epoch": 0.25961748236491894, - "grad_norm": 1145.991455078125, - "learning_rate": 4.63369510571819e-05, - "loss": 157.2071, - "step": 32130 - }, - { - "epoch": 0.2596982845692031, - "grad_norm": 1537.623291015625, - "learning_rate": 4.6333312059890256e-05, - "loss": 176.9448, - "step": 32140 - }, - { - "epoch": 0.25977908677348716, - "grad_norm": 976.8302001953125, - "learning_rate": 4.632967139898301e-05, - "loss": 108.3438, - "step": 32150 - }, - { - "epoch": 0.2598598889777713, - "grad_norm": 1129.4639892578125, - "learning_rate": 4.6326029074744074e-05, - "loss": 136.6159, - "step": 32160 - }, - { - "epoch": 0.25994069118205543, - "grad_norm": 555.60009765625, - "learning_rate": 4.632238508745748e-05, - "loss": 146.1014, - "step": 32170 - }, - { - "epoch": 0.26002149338633956, - "grad_norm": 776.1646728515625, - "learning_rate": 4.63187394374074e-05, - "loss": 131.6885, - "step": 32180 - }, - { - "epoch": 0.2601022955906237, - "grad_norm": 1262.349609375, - "learning_rate": 4.631509212487811e-05, - "loss": 120.8813, - "step": 32190 - }, - { - "epoch": 0.26018309779490784, - "grad_norm": 1063.7286376953125, - "learning_rate": 4.631144315015407e-05, - "loss": 117.0632, - "step": 32200 - }, - { - "epoch": 0.26026389999919197, - "grad_norm": 895.3576049804688, - "learning_rate": 4.63077925135198e-05, - "loss": 106.281, - "step": 32210 - }, - { - "epoch": 0.2603447022034761, - "grad_norm": 1085.85302734375, - "learning_rate": 4.630414021525999e-05, - "loss": 118.2093, - "step": 32220 - }, - { - "epoch": 0.26042550440776024, - "grad_norm": 1039.57470703125, - "learning_rate": 4.6300486255659484e-05, - "loss": 106.82, - "step": 32230 - }, - { - "epoch": 0.2605063066120444, - "grad_norm": 1038.2347412109375, - "learning_rate": 4.629683063500319e-05, - "loss": 101.7498, - "step": 32240 - }, - { - "epoch": 0.2605871088163285, - "grad_norm": 1041.995361328125, - "learning_rate": 4.629317335357619e-05, - "loss": 130.4589, - "step": 32250 - }, - { - "epoch": 0.26066791102061265, - "grad_norm": 1721.8536376953125, - "learning_rate": 4.62895144116637e-05, - "loss": 138.5204, - "step": 32260 - }, - { - "epoch": 0.2607487132248968, - "grad_norm": 1276.2000732421875, - "learning_rate": 4.6285853809551036e-05, - "loss": 136.2981, - "step": 32270 - }, - { - "epoch": 0.2608295154291809, - "grad_norm": 1091.5250244140625, - "learning_rate": 4.628219154752367e-05, - "loss": 192.9632, - "step": 32280 - }, - { - "epoch": 0.26091031763346506, - "grad_norm": 874.2247314453125, - "learning_rate": 4.627852762586718e-05, - "loss": 121.6275, - "step": 32290 - }, - { - "epoch": 0.2609911198377492, - "grad_norm": 1179.25732421875, - "learning_rate": 4.6274862044867304e-05, - "loss": 137.7748, - "step": 32300 - }, - { - "epoch": 0.26107192204203333, - "grad_norm": 1310.9068603515625, - "learning_rate": 4.627119480480987e-05, - "loss": 164.4533, - "step": 32310 - }, - { - "epoch": 0.2611527242463174, - "grad_norm": 728.2359619140625, - "learning_rate": 4.626752590598088e-05, - "loss": 144.0471, - "step": 32320 - }, - { - "epoch": 0.26123352645060155, - "grad_norm": 3329.939208984375, - "learning_rate": 4.626385534866642e-05, - "loss": 115.7883, - "step": 32330 - }, - { - "epoch": 0.2613143286548857, - "grad_norm": 1444.554443359375, - "learning_rate": 4.626018313315275e-05, - "loss": 106.2003, - "step": 32340 - }, - { - "epoch": 0.2613951308591698, - "grad_norm": 602.4769897460938, - "learning_rate": 4.625650925972622e-05, - "loss": 153.679, - "step": 32350 - }, - { - "epoch": 0.26147593306345396, - "grad_norm": 965.1641235351562, - "learning_rate": 4.625283372867333e-05, - "loss": 102.821, - "step": 32360 - }, - { - "epoch": 0.2615567352677381, - "grad_norm": 1360.6485595703125, - "learning_rate": 4.62491565402807e-05, - "loss": 113.4514, - "step": 32370 - }, - { - "epoch": 0.26163753747202223, - "grad_norm": 1211.0313720703125, - "learning_rate": 4.6245477694835106e-05, - "loss": 111.3167, - "step": 32380 - }, - { - "epoch": 0.26171833967630637, - "grad_norm": 576.995361328125, - "learning_rate": 4.624179719262342e-05, - "loss": 125.0896, - "step": 32390 - }, - { - "epoch": 0.2617991418805905, - "grad_norm": 764.3323364257812, - "learning_rate": 4.6238115033932636e-05, - "loss": 105.9767, - "step": 32400 - }, - { - "epoch": 0.26187994408487464, - "grad_norm": 1187.8406982421875, - "learning_rate": 4.623443121904992e-05, - "loss": 129.9512, - "step": 32410 - }, - { - "epoch": 0.2619607462891588, - "grad_norm": 899.810546875, - "learning_rate": 4.623074574826254e-05, - "loss": 133.4673, - "step": 32420 - }, - { - "epoch": 0.2620415484934429, - "grad_norm": 845.756103515625, - "learning_rate": 4.622705862185789e-05, - "loss": 115.9907, - "step": 32430 - }, - { - "epoch": 0.26212235069772705, - "grad_norm": 847.8451538085938, - "learning_rate": 4.622336984012351e-05, - "loss": 130.2188, - "step": 32440 - }, - { - "epoch": 0.2622031529020112, - "grad_norm": 1480.852294921875, - "learning_rate": 4.621967940334705e-05, - "loss": 109.0172, - "step": 32450 - }, - { - "epoch": 0.2622839551062953, - "grad_norm": 1301.56787109375, - "learning_rate": 4.621598731181629e-05, - "loss": 103.3571, - "step": 32460 - }, - { - "epoch": 0.26236475731057946, - "grad_norm": 871.2351684570312, - "learning_rate": 4.6212293565819166e-05, - "loss": 121.6138, - "step": 32470 - }, - { - "epoch": 0.2624455595148636, - "grad_norm": 10766.201171875, - "learning_rate": 4.6208598165643715e-05, - "loss": 216.6335, - "step": 32480 - }, - { - "epoch": 0.26252636171914767, - "grad_norm": 800.0596313476562, - "learning_rate": 4.62049011115781e-05, - "loss": 138.1856, - "step": 32490 - }, - { - "epoch": 0.2626071639234318, - "grad_norm": 1323.046142578125, - "learning_rate": 4.620120240391065e-05, - "loss": 126.7793, - "step": 32500 - }, - { - "epoch": 0.26268796612771594, - "grad_norm": 755.5288696289062, - "learning_rate": 4.619750204292978e-05, - "loss": 116.7864, - "step": 32510 - }, - { - "epoch": 0.2627687683320001, - "grad_norm": 1724.346923828125, - "learning_rate": 4.619380002892406e-05, - "loss": 108.0637, - "step": 32520 - }, - { - "epoch": 0.2628495705362842, - "grad_norm": 1047.6300048828125, - "learning_rate": 4.6190096362182167e-05, - "loss": 130.5901, - "step": 32530 - }, - { - "epoch": 0.26293037274056835, - "grad_norm": 545.4581298828125, - "learning_rate": 4.618639104299294e-05, - "loss": 130.9705, - "step": 32540 - }, - { - "epoch": 0.2630111749448525, - "grad_norm": 948.7664184570312, - "learning_rate": 4.61826840716453e-05, - "loss": 104.0358, - "step": 32550 - }, - { - "epoch": 0.2630919771491366, - "grad_norm": 677.6072998046875, - "learning_rate": 4.617897544842836e-05, - "loss": 88.4454, - "step": 32560 - }, - { - "epoch": 0.26317277935342076, - "grad_norm": 668.8291625976562, - "learning_rate": 4.61752651736313e-05, - "loss": 103.1123, - "step": 32570 - }, - { - "epoch": 0.2632535815577049, - "grad_norm": 591.3098754882812, - "learning_rate": 4.617155324754346e-05, - "loss": 94.5506, - "step": 32580 - }, - { - "epoch": 0.26333438376198903, - "grad_norm": 878.4230346679688, - "learning_rate": 4.6167839670454315e-05, - "loss": 116.5864, - "step": 32590 - }, - { - "epoch": 0.26341518596627317, - "grad_norm": 1147.527587890625, - "learning_rate": 4.616412444265345e-05, - "loss": 136.4555, - "step": 32600 - }, - { - "epoch": 0.2634959881705573, - "grad_norm": 1084.6822509765625, - "learning_rate": 4.6160407564430574e-05, - "loss": 130.2947, - "step": 32610 - }, - { - "epoch": 0.26357679037484144, - "grad_norm": 2093.77880859375, - "learning_rate": 4.6156689036075555e-05, - "loss": 132.7588, - "step": 32620 - }, - { - "epoch": 0.2636575925791256, - "grad_norm": 1265.29150390625, - "learning_rate": 4.6152968857878366e-05, - "loss": 111.8967, - "step": 32630 - }, - { - "epoch": 0.2637383947834097, - "grad_norm": 542.5083618164062, - "learning_rate": 4.614924703012911e-05, - "loss": 102.1058, - "step": 32640 - }, - { - "epoch": 0.26381919698769385, - "grad_norm": 917.505615234375, - "learning_rate": 4.614552355311802e-05, - "loss": 87.7497, - "step": 32650 - }, - { - "epoch": 0.26389999919197793, - "grad_norm": 1161.6602783203125, - "learning_rate": 4.614179842713547e-05, - "loss": 115.7085, - "step": 32660 - }, - { - "epoch": 0.26398080139626207, - "grad_norm": 764.512451171875, - "learning_rate": 4.613807165247195e-05, - "loss": 107.8064, - "step": 32670 - }, - { - "epoch": 0.2640616036005462, - "grad_norm": 1938.1666259765625, - "learning_rate": 4.6134343229418075e-05, - "loss": 133.5946, - "step": 32680 - }, - { - "epoch": 0.26414240580483034, - "grad_norm": 1128.4102783203125, - "learning_rate": 4.613061315826461e-05, - "loss": 138.7345, - "step": 32690 - }, - { - "epoch": 0.2642232080091145, - "grad_norm": 1203.184814453125, - "learning_rate": 4.612688143930242e-05, - "loss": 118.502, - "step": 32700 - }, - { - "epoch": 0.2643040102133986, - "grad_norm": 1131.944091796875, - "learning_rate": 4.612314807282251e-05, - "loss": 114.0242, - "step": 32710 - }, - { - "epoch": 0.26438481241768275, - "grad_norm": 1733.390869140625, - "learning_rate": 4.611941305911602e-05, - "loss": 143.6088, - "step": 32720 - }, - { - "epoch": 0.2644656146219669, - "grad_norm": 1442.8763427734375, - "learning_rate": 4.611567639847422e-05, - "loss": 108.6247, - "step": 32730 - }, - { - "epoch": 0.264546416826251, - "grad_norm": 1246.44970703125, - "learning_rate": 4.61119380911885e-05, - "loss": 125.3503, - "step": 32740 - }, - { - "epoch": 0.26462721903053515, - "grad_norm": 988.0870971679688, - "learning_rate": 4.610819813755038e-05, - "loss": 109.1501, - "step": 32750 - }, - { - "epoch": 0.2647080212348193, - "grad_norm": 914.0441284179688, - "learning_rate": 4.610445653785151e-05, - "loss": 107.4585, - "step": 32760 - }, - { - "epoch": 0.2647888234391034, - "grad_norm": 827.869384765625, - "learning_rate": 4.610071329238366e-05, - "loss": 91.3245, - "step": 32770 - }, - { - "epoch": 0.26486962564338756, - "grad_norm": 1207.339599609375, - "learning_rate": 4.6096968401438745e-05, - "loss": 109.7002, - "step": 32780 - }, - { - "epoch": 0.2649504278476717, - "grad_norm": 1092.970947265625, - "learning_rate": 4.6093221865308786e-05, - "loss": 159.7583, - "step": 32790 - }, - { - "epoch": 0.26503123005195584, - "grad_norm": 1348.759521484375, - "learning_rate": 4.6089473684285974e-05, - "loss": 137.5785, - "step": 32800 - }, - { - "epoch": 0.26511203225623997, - "grad_norm": 710.8445434570312, - "learning_rate": 4.608572385866257e-05, - "loss": 96.0448, - "step": 32810 - }, - { - "epoch": 0.2651928344605241, - "grad_norm": 772.0751342773438, - "learning_rate": 4.608197238873101e-05, - "loss": 129.3, - "step": 32820 - }, - { - "epoch": 0.26527363666480824, - "grad_norm": 1264.3970947265625, - "learning_rate": 4.607821927478383e-05, - "loss": 142.2885, - "step": 32830 - }, - { - "epoch": 0.2653544388690923, - "grad_norm": 833.395263671875, - "learning_rate": 4.607446451711372e-05, - "loss": 124.798, - "step": 32840 - }, - { - "epoch": 0.26543524107337646, - "grad_norm": 1927.530517578125, - "learning_rate": 4.6070708116013476e-05, - "loss": 120.635, - "step": 32850 - }, - { - "epoch": 0.2655160432776606, - "grad_norm": 798.7236328125, - "learning_rate": 4.6066950071776015e-05, - "loss": 100.3111, - "step": 32860 - }, - { - "epoch": 0.26559684548194473, - "grad_norm": 755.7781372070312, - "learning_rate": 4.606319038469443e-05, - "loss": 149.0854, - "step": 32870 - }, - { - "epoch": 0.26567764768622887, - "grad_norm": 1669.6309814453125, - "learning_rate": 4.605942905506188e-05, - "loss": 122.094, - "step": 32880 - }, - { - "epoch": 0.265758449890513, - "grad_norm": 1145.0926513671875, - "learning_rate": 4.605566608317169e-05, - "loss": 160.8814, - "step": 32890 - }, - { - "epoch": 0.26583925209479714, - "grad_norm": 603.8025512695312, - "learning_rate": 4.605190146931731e-05, - "loss": 137.3919, - "step": 32900 - }, - { - "epoch": 0.2659200542990813, - "grad_norm": 710.5828857421875, - "learning_rate": 4.604813521379231e-05, - "loss": 134.6538, - "step": 32910 - }, - { - "epoch": 0.2660008565033654, - "grad_norm": 1110.471923828125, - "learning_rate": 4.6044367316890386e-05, - "loss": 109.5416, - "step": 32920 - }, - { - "epoch": 0.26608165870764955, - "grad_norm": 1101.7508544921875, - "learning_rate": 4.604059777890537e-05, - "loss": 107.0829, - "step": 32930 - }, - { - "epoch": 0.2661624609119337, - "grad_norm": 1244.531982421875, - "learning_rate": 4.6036826600131216e-05, - "loss": 98.2252, - "step": 32940 - }, - { - "epoch": 0.2662432631162178, - "grad_norm": 1753.546630859375, - "learning_rate": 4.603305378086201e-05, - "loss": 147.216, - "step": 32950 - }, - { - "epoch": 0.26632406532050196, - "grad_norm": 1442.4443359375, - "learning_rate": 4.602927932139197e-05, - "loss": 137.8071, - "step": 32960 - }, - { - "epoch": 0.2664048675247861, - "grad_norm": 917.7630615234375, - "learning_rate": 4.602550322201542e-05, - "loss": 127.9167, - "step": 32970 - }, - { - "epoch": 0.26648566972907023, - "grad_norm": 1136.6220703125, - "learning_rate": 4.602172548302684e-05, - "loss": 81.1369, - "step": 32980 - }, - { - "epoch": 0.26656647193335437, - "grad_norm": 839.653564453125, - "learning_rate": 4.6017946104720836e-05, - "loss": 129.4245, - "step": 32990 - }, - { - "epoch": 0.2666472741376385, - "grad_norm": 742.4212646484375, - "learning_rate": 4.601416508739211e-05, - "loss": 120.5945, - "step": 33000 - }, - { - "epoch": 0.2667280763419226, - "grad_norm": 846.051513671875, - "learning_rate": 4.601038243133552e-05, - "loss": 140.4996, - "step": 33010 - }, - { - "epoch": 0.2668088785462067, - "grad_norm": 1683.828857421875, - "learning_rate": 4.6006598136846056e-05, - "loss": 119.8989, - "step": 33020 - }, - { - "epoch": 0.26688968075049085, - "grad_norm": 1139.3094482421875, - "learning_rate": 4.6002812204218816e-05, - "loss": 105.4472, - "step": 33030 - }, - { - "epoch": 0.266970482954775, - "grad_norm": 1079.070556640625, - "learning_rate": 4.599902463374903e-05, - "loss": 107.6979, - "step": 33040 - }, - { - "epoch": 0.2670512851590591, - "grad_norm": 763.587890625, - "learning_rate": 4.599523542573207e-05, - "loss": 117.8105, - "step": 33050 - }, - { - "epoch": 0.26713208736334326, - "grad_norm": 567.7410278320312, - "learning_rate": 4.599144458046343e-05, - "loss": 105.0084, - "step": 33060 - }, - { - "epoch": 0.2672128895676274, - "grad_norm": 2938.99755859375, - "learning_rate": 4.5987652098238714e-05, - "loss": 122.589, - "step": 33070 - }, - { - "epoch": 0.26729369177191153, - "grad_norm": 910.2901611328125, - "learning_rate": 4.598385797935368e-05, - "loss": 95.9568, - "step": 33080 - }, - { - "epoch": 0.26737449397619567, - "grad_norm": 1389.7559814453125, - "learning_rate": 4.598006222410419e-05, - "loss": 96.0158, - "step": 33090 - }, - { - "epoch": 0.2674552961804798, - "grad_norm": 1100.4217529296875, - "learning_rate": 4.597626483278625e-05, - "loss": 116.382, - "step": 33100 - }, - { - "epoch": 0.26753609838476394, - "grad_norm": 1173.6622314453125, - "learning_rate": 4.5972465805695996e-05, - "loss": 126.8154, - "step": 33110 - }, - { - "epoch": 0.2676169005890481, - "grad_norm": 786.0316772460938, - "learning_rate": 4.596866514312967e-05, - "loss": 129.5567, - "step": 33120 - }, - { - "epoch": 0.2676977027933322, - "grad_norm": 858.4003295898438, - "learning_rate": 4.596486284538367e-05, - "loss": 114.7303, - "step": 33130 - }, - { - "epoch": 0.26777850499761635, - "grad_norm": 1453.0225830078125, - "learning_rate": 4.596105891275449e-05, - "loss": 135.1747, - "step": 33140 - }, - { - "epoch": 0.2678593072019005, - "grad_norm": 1426.4730224609375, - "learning_rate": 4.595725334553879e-05, - "loss": 168.6764, - "step": 33150 - }, - { - "epoch": 0.2679401094061846, - "grad_norm": 1348.0870361328125, - "learning_rate": 4.5953446144033316e-05, - "loss": 114.2353, - "step": 33160 - }, - { - "epoch": 0.26802091161046876, - "grad_norm": 815.2488403320312, - "learning_rate": 4.594963730853497e-05, - "loss": 131.8211, - "step": 33170 - }, - { - "epoch": 0.26810171381475284, - "grad_norm": 691.8651123046875, - "learning_rate": 4.594582683934078e-05, - "loss": 142.8994, - "step": 33180 - }, - { - "epoch": 0.268182516019037, - "grad_norm": 1122.7806396484375, - "learning_rate": 4.5942014736747875e-05, - "loss": 99.2127, - "step": 33190 - }, - { - "epoch": 0.2682633182233211, - "grad_norm": 4042.274169921875, - "learning_rate": 4.593820100105355e-05, - "loss": 139.5586, - "step": 33200 - }, - { - "epoch": 0.26834412042760525, - "grad_norm": 1002.4708251953125, - "learning_rate": 4.59343856325552e-05, - "loss": 112.2433, - "step": 33210 - }, - { - "epoch": 0.2684249226318894, - "grad_norm": 840.0751953125, - "learning_rate": 4.593056863155034e-05, - "loss": 160.196, - "step": 33220 - }, - { - "epoch": 0.2685057248361735, - "grad_norm": 1244.7557373046875, - "learning_rate": 4.592674999833666e-05, - "loss": 116.2589, - "step": 33230 - }, - { - "epoch": 0.26858652704045766, - "grad_norm": 826.8807983398438, - "learning_rate": 4.5922929733211926e-05, - "loss": 87.047, - "step": 33240 - }, - { - "epoch": 0.2686673292447418, - "grad_norm": 1107.5628662109375, - "learning_rate": 4.591910783647404e-05, - "loss": 156.9146, - "step": 33250 - }, - { - "epoch": 0.26874813144902593, - "grad_norm": 1074.4185791015625, - "learning_rate": 4.591528430842107e-05, - "loss": 109.7286, - "step": 33260 - }, - { - "epoch": 0.26882893365331006, - "grad_norm": 1093.247314453125, - "learning_rate": 4.591145914935116e-05, - "loss": 127.377, - "step": 33270 - }, - { - "epoch": 0.2689097358575942, - "grad_norm": 1278.361328125, - "learning_rate": 4.59076323595626e-05, - "loss": 136.0104, - "step": 33280 - }, - { - "epoch": 0.26899053806187834, - "grad_norm": 961.7113037109375, - "learning_rate": 4.590380393935383e-05, - "loss": 119.5472, - "step": 33290 - }, - { - "epoch": 0.2690713402661625, - "grad_norm": 2000.9180908203125, - "learning_rate": 4.589997388902338e-05, - "loss": 157.2297, - "step": 33300 - }, - { - "epoch": 0.2691521424704466, - "grad_norm": 634.181884765625, - "learning_rate": 4.5896142208869954e-05, - "loss": 113.5986, - "step": 33310 - }, - { - "epoch": 0.26923294467473075, - "grad_norm": 1461.14404296875, - "learning_rate": 4.589230889919232e-05, - "loss": 115.1206, - "step": 33320 - }, - { - "epoch": 0.2693137468790149, - "grad_norm": 1009.3434448242188, - "learning_rate": 4.588847396028942e-05, - "loss": 143.96, - "step": 33330 - }, - { - "epoch": 0.269394549083299, - "grad_norm": 734.7911987304688, - "learning_rate": 4.5884637392460314e-05, - "loss": 101.5015, - "step": 33340 - }, - { - "epoch": 0.2694753512875831, - "grad_norm": 887.0006103515625, - "learning_rate": 4.588079919600419e-05, - "loss": 126.053, - "step": 33350 - }, - { - "epoch": 0.26955615349186723, - "grad_norm": 580.8544921875, - "learning_rate": 4.5876959371220344e-05, - "loss": 97.2666, - "step": 33360 - }, - { - "epoch": 0.26963695569615137, - "grad_norm": 1209.7232666015625, - "learning_rate": 4.587311791840822e-05, - "loss": 150.8719, - "step": 33370 - }, - { - "epoch": 0.2697177579004355, - "grad_norm": 3253.60205078125, - "learning_rate": 4.5869274837867394e-05, - "loss": 129.5684, - "step": 33380 - }, - { - "epoch": 0.26979856010471964, - "grad_norm": 833.0447998046875, - "learning_rate": 4.5865430129897536e-05, - "loss": 89.548, - "step": 33390 - }, - { - "epoch": 0.2698793623090038, - "grad_norm": 511.82110595703125, - "learning_rate": 4.586158379479848e-05, - "loss": 99.7092, - "step": 33400 - }, - { - "epoch": 0.2699601645132879, - "grad_norm": 902.0722045898438, - "learning_rate": 4.5857735832870166e-05, - "loss": 125.9244, - "step": 33410 - }, - { - "epoch": 0.27004096671757205, - "grad_norm": 1096.8389892578125, - "learning_rate": 4.585388624441267e-05, - "loss": 126.6443, - "step": 33420 - }, - { - "epoch": 0.2701217689218562, - "grad_norm": 1337.8590087890625, - "learning_rate": 4.585003502972618e-05, - "loss": 113.9963, - "step": 33430 - }, - { - "epoch": 0.2702025711261403, - "grad_norm": 1261.0655517578125, - "learning_rate": 4.5846182189111035e-05, - "loss": 137.801, - "step": 33440 - }, - { - "epoch": 0.27028337333042446, - "grad_norm": 456.492431640625, - "learning_rate": 4.584232772286768e-05, - "loss": 83.9635, - "step": 33450 - }, - { - "epoch": 0.2703641755347086, - "grad_norm": 1019.708251953125, - "learning_rate": 4.58384716312967e-05, - "loss": 142.1781, - "step": 33460 - }, - { - "epoch": 0.27044497773899273, - "grad_norm": 966.0032958984375, - "learning_rate": 4.583461391469879e-05, - "loss": 114.3821, - "step": 33470 - }, - { - "epoch": 0.27052577994327687, - "grad_norm": 1801.5472412109375, - "learning_rate": 4.583075457337479e-05, - "loss": 155.7893, - "step": 33480 - }, - { - "epoch": 0.270606582147561, - "grad_norm": 1297.2867431640625, - "learning_rate": 4.5826893607625665e-05, - "loss": 120.5911, - "step": 33490 - }, - { - "epoch": 0.27068738435184514, - "grad_norm": 1396.174072265625, - "learning_rate": 4.5823031017752485e-05, - "loss": 122.4062, - "step": 33500 - }, - { - "epoch": 0.2707681865561293, - "grad_norm": 997.7889404296875, - "learning_rate": 4.581916680405648e-05, - "loss": 110.237, - "step": 33510 - }, - { - "epoch": 0.2708489887604134, - "grad_norm": 1115.41357421875, - "learning_rate": 4.581530096683898e-05, - "loss": 114.6065, - "step": 33520 - }, - { - "epoch": 0.2709297909646975, - "grad_norm": 1366.340087890625, - "learning_rate": 4.5811433506401456e-05, - "loss": 87.3567, - "step": 33530 - }, - { - "epoch": 0.27101059316898163, - "grad_norm": 773.7557983398438, - "learning_rate": 4.580756442304549e-05, - "loss": 81.9729, - "step": 33540 - }, - { - "epoch": 0.27109139537326576, - "grad_norm": 1567.5789794921875, - "learning_rate": 4.5803693717072815e-05, - "loss": 143.0868, - "step": 33550 - }, - { - "epoch": 0.2711721975775499, - "grad_norm": 1048.724853515625, - "learning_rate": 4.579982138878527e-05, - "loss": 111.2322, - "step": 33560 - }, - { - "epoch": 0.27125299978183404, - "grad_norm": 1020.1063842773438, - "learning_rate": 4.579594743848482e-05, - "loss": 153.6946, - "step": 33570 - }, - { - "epoch": 0.2713338019861182, - "grad_norm": 667.2181396484375, - "learning_rate": 4.579207186647357e-05, - "loss": 139.1391, - "step": 33580 - }, - { - "epoch": 0.2714146041904023, - "grad_norm": 3652.7509765625, - "learning_rate": 4.5788194673053756e-05, - "loss": 162.2207, - "step": 33590 - }, - { - "epoch": 0.27149540639468644, - "grad_norm": 787.8720092773438, - "learning_rate": 4.5784315858527715e-05, - "loss": 108.865, - "step": 33600 - }, - { - "epoch": 0.2715762085989706, - "grad_norm": 984.5777587890625, - "learning_rate": 4.578043542319793e-05, - "loss": 118.9497, - "step": 33610 - }, - { - "epoch": 0.2716570108032547, - "grad_norm": 800.0744018554688, - "learning_rate": 4.5776553367367e-05, - "loss": 111.6959, - "step": 33620 - }, - { - "epoch": 0.27173781300753885, - "grad_norm": 634.4630737304688, - "learning_rate": 4.5772669691337665e-05, - "loss": 105.7182, - "step": 33630 - }, - { - "epoch": 0.271818615211823, - "grad_norm": 1288.2630615234375, - "learning_rate": 4.576878439541278e-05, - "loss": 129.2969, - "step": 33640 - }, - { - "epoch": 0.2718994174161071, - "grad_norm": 660.7456665039062, - "learning_rate": 4.5764897479895317e-05, - "loss": 89.5331, - "step": 33650 - }, - { - "epoch": 0.27198021962039126, - "grad_norm": 1103.98779296875, - "learning_rate": 4.57610089450884e-05, - "loss": 126.5262, - "step": 33660 - }, - { - "epoch": 0.2720610218246754, - "grad_norm": 1130.5789794921875, - "learning_rate": 4.5757118791295264e-05, - "loss": 107.0988, - "step": 33670 - }, - { - "epoch": 0.27214182402895953, - "grad_norm": 685.6459350585938, - "learning_rate": 4.575322701881926e-05, - "loss": 109.0433, - "step": 33680 - }, - { - "epoch": 0.27222262623324367, - "grad_norm": 837.7173461914062, - "learning_rate": 4.5749333627963884e-05, - "loss": 116.8606, - "step": 33690 - }, - { - "epoch": 0.27230342843752775, - "grad_norm": 724.3385620117188, - "learning_rate": 4.574543861903274e-05, - "loss": 151.6955, - "step": 33700 - }, - { - "epoch": 0.2723842306418119, - "grad_norm": 773.584228515625, - "learning_rate": 4.574154199232959e-05, - "loss": 168.0668, - "step": 33710 - }, - { - "epoch": 0.272465032846096, - "grad_norm": 1029.8067626953125, - "learning_rate": 4.5737643748158295e-05, - "loss": 116.3829, - "step": 33720 - }, - { - "epoch": 0.27254583505038016, - "grad_norm": 1097.515380859375, - "learning_rate": 4.573374388682283e-05, - "loss": 107.7196, - "step": 33730 - }, - { - "epoch": 0.2726266372546643, - "grad_norm": 1344.555908203125, - "learning_rate": 4.5729842408627334e-05, - "loss": 86.979, - "step": 33740 - }, - { - "epoch": 0.27270743945894843, - "grad_norm": 598.4584350585938, - "learning_rate": 4.572593931387604e-05, - "loss": 108.2467, - "step": 33750 - }, - { - "epoch": 0.27278824166323257, - "grad_norm": 843.5016479492188, - "learning_rate": 4.572203460287333e-05, - "loss": 98.9899, - "step": 33760 - }, - { - "epoch": 0.2728690438675167, - "grad_norm": 599.7642822265625, - "learning_rate": 4.57181282759237e-05, - "loss": 115.4346, - "step": 33770 - }, - { - "epoch": 0.27294984607180084, - "grad_norm": 1034.5059814453125, - "learning_rate": 4.5714220333331756e-05, - "loss": 102.5996, - "step": 33780 - }, - { - "epoch": 0.273030648276085, - "grad_norm": 997.4892578125, - "learning_rate": 4.5710310775402274e-05, - "loss": 124.1014, - "step": 33790 - }, - { - "epoch": 0.2731114504803691, - "grad_norm": 1714.6414794921875, - "learning_rate": 4.5706399602440106e-05, - "loss": 124.7312, - "step": 33800 - }, - { - "epoch": 0.27319225268465325, - "grad_norm": 1089.0321044921875, - "learning_rate": 4.5702486814750265e-05, - "loss": 104.5177, - "step": 33810 - }, - { - "epoch": 0.2732730548889374, - "grad_norm": 837.1768188476562, - "learning_rate": 4.569857241263788e-05, - "loss": 135.7573, - "step": 33820 - }, - { - "epoch": 0.2733538570932215, - "grad_norm": 1877.053466796875, - "learning_rate": 4.5694656396408195e-05, - "loss": 158.8874, - "step": 33830 - }, - { - "epoch": 0.27343465929750566, - "grad_norm": 873.006103515625, - "learning_rate": 4.56907387663666e-05, - "loss": 134.0453, - "step": 33840 - }, - { - "epoch": 0.2735154615017898, - "grad_norm": 842.5532836914062, - "learning_rate": 4.5686819522818594e-05, - "loss": 127.7638, - "step": 33850 - }, - { - "epoch": 0.27359626370607393, - "grad_norm": 1131.977294921875, - "learning_rate": 4.568289866606981e-05, - "loss": 132.2854, - "step": 33860 - }, - { - "epoch": 0.273677065910358, - "grad_norm": 827.6063842773438, - "learning_rate": 4.567897619642601e-05, - "loss": 94.0177, - "step": 33870 - }, - { - "epoch": 0.27375786811464214, - "grad_norm": 1047.3580322265625, - "learning_rate": 4.567505211419305e-05, - "loss": 129.9691, - "step": 33880 - }, - { - "epoch": 0.2738386703189263, - "grad_norm": 684.3375244140625, - "learning_rate": 4.567112641967697e-05, - "loss": 112.7695, - "step": 33890 - }, - { - "epoch": 0.2739194725232104, - "grad_norm": 1657.9844970703125, - "learning_rate": 4.566719911318389e-05, - "loss": 134.2111, - "step": 33900 - }, - { - "epoch": 0.27400027472749455, - "grad_norm": 799.876220703125, - "learning_rate": 4.566327019502007e-05, - "loss": 81.3236, - "step": 33910 - }, - { - "epoch": 0.2740810769317787, - "grad_norm": 932.51220703125, - "learning_rate": 4.565933966549189e-05, - "loss": 105.4732, - "step": 33920 - }, - { - "epoch": 0.2741618791360628, - "grad_norm": 1137.973876953125, - "learning_rate": 4.5655407524905866e-05, - "loss": 95.0365, - "step": 33930 - }, - { - "epoch": 0.27424268134034696, - "grad_norm": 1658.28857421875, - "learning_rate": 4.565147377356864e-05, - "loss": 151.1227, - "step": 33940 - }, - { - "epoch": 0.2743234835446311, - "grad_norm": 683.3330688476562, - "learning_rate": 4.564753841178697e-05, - "loss": 124.8997, - "step": 33950 - }, - { - "epoch": 0.27440428574891523, - "grad_norm": 1065.53466796875, - "learning_rate": 4.5643601439867734e-05, - "loss": 167.2397, - "step": 33960 - }, - { - "epoch": 0.27448508795319937, - "grad_norm": 787.6380615234375, - "learning_rate": 4.563966285811796e-05, - "loss": 118.0972, - "step": 33970 - }, - { - "epoch": 0.2745658901574835, - "grad_norm": 740.9427490234375, - "learning_rate": 4.5635722666844775e-05, - "loss": 108.665, - "step": 33980 - }, - { - "epoch": 0.27464669236176764, - "grad_norm": 1288.418701171875, - "learning_rate": 4.5631780866355454e-05, - "loss": 149.1723, - "step": 33990 - }, - { - "epoch": 0.2747274945660518, - "grad_norm": 592.5372314453125, - "learning_rate": 4.562783745695738e-05, - "loss": 136.369, - "step": 34000 - }, - { - "epoch": 0.2748082967703359, - "grad_norm": 729.9514770507812, - "learning_rate": 4.5623892438958074e-05, - "loss": 86.666, - "step": 34010 - }, - { - "epoch": 0.27488909897462005, - "grad_norm": 1142.8770751953125, - "learning_rate": 4.561994581266516e-05, - "loss": 122.3828, - "step": 34020 - }, - { - "epoch": 0.2749699011789042, - "grad_norm": 665.6051025390625, - "learning_rate": 4.561599757838643e-05, - "loss": 117.9901, - "step": 34030 - }, - { - "epoch": 0.27505070338318827, - "grad_norm": 952.3036499023438, - "learning_rate": 4.561204773642974e-05, - "loss": 104.4478, - "step": 34040 - }, - { - "epoch": 0.2751315055874724, - "grad_norm": 1538.466796875, - "learning_rate": 4.560809628710315e-05, - "loss": 106.5705, - "step": 34050 - }, - { - "epoch": 0.27521230779175654, - "grad_norm": 553.0347290039062, - "learning_rate": 4.560414323071477e-05, - "loss": 91.047, - "step": 34060 - }, - { - "epoch": 0.2752931099960407, - "grad_norm": 1179.90380859375, - "learning_rate": 4.5600188567572876e-05, - "loss": 118.7583, - "step": 34070 - }, - { - "epoch": 0.2753739122003248, - "grad_norm": 1249.804443359375, - "learning_rate": 4.559623229798587e-05, - "loss": 124.7738, - "step": 34080 - }, - { - "epoch": 0.27545471440460895, - "grad_norm": 844.3828125, - "learning_rate": 4.559227442226226e-05, - "loss": 136.5087, - "step": 34090 - }, - { - "epoch": 0.2755355166088931, - "grad_norm": 768.51904296875, - "learning_rate": 4.558831494071069e-05, - "loss": 118.4079, - "step": 34100 - }, - { - "epoch": 0.2756163188131772, - "grad_norm": 784.980712890625, - "learning_rate": 4.558435385363993e-05, - "loss": 106.8219, - "step": 34110 - }, - { - "epoch": 0.27569712101746136, - "grad_norm": 727.8858642578125, - "learning_rate": 4.558039116135887e-05, - "loss": 112.7733, - "step": 34120 - }, - { - "epoch": 0.2757779232217455, - "grad_norm": 5066.3701171875, - "learning_rate": 4.557642686417654e-05, - "loss": 130.2065, - "step": 34130 - }, - { - "epoch": 0.2758587254260296, - "grad_norm": 575.8513793945312, - "learning_rate": 4.5572460962402075e-05, - "loss": 112.4158, - "step": 34140 - }, - { - "epoch": 0.27593952763031376, - "grad_norm": 1118.62841796875, - "learning_rate": 4.556849345634475e-05, - "loss": 129.3342, - "step": 34150 - }, - { - "epoch": 0.2760203298345979, - "grad_norm": 1496.4749755859375, - "learning_rate": 4.556452434631395e-05, - "loss": 144.4755, - "step": 34160 - }, - { - "epoch": 0.27610113203888204, - "grad_norm": 1114.9486083984375, - "learning_rate": 4.5560553632619205e-05, - "loss": 155.5758, - "step": 34170 - }, - { - "epoch": 0.27618193424316617, - "grad_norm": 899.761474609375, - "learning_rate": 4.555658131557015e-05, - "loss": 132.9466, - "step": 34180 - }, - { - "epoch": 0.2762627364474503, - "grad_norm": 725.985107421875, - "learning_rate": 4.555260739547657e-05, - "loss": 89.8608, - "step": 34190 - }, - { - "epoch": 0.27634353865173444, - "grad_norm": 748.9408569335938, - "learning_rate": 4.5548631872648326e-05, - "loss": 125.186, - "step": 34200 - }, - { - "epoch": 0.2764243408560185, - "grad_norm": 1559.5338134765625, - "learning_rate": 4.554465474739548e-05, - "loss": 113.6407, - "step": 34210 - }, - { - "epoch": 0.27650514306030266, - "grad_norm": 1236.1806640625, - "learning_rate": 4.5540676020028145e-05, - "loss": 81.5662, - "step": 34220 - }, - { - "epoch": 0.2765859452645868, - "grad_norm": 1086.869873046875, - "learning_rate": 4.5536695690856606e-05, - "loss": 108.9924, - "step": 34230 - }, - { - "epoch": 0.27666674746887093, - "grad_norm": 752.7017211914062, - "learning_rate": 4.553271376019125e-05, - "loss": 108.1873, - "step": 34240 - }, - { - "epoch": 0.27674754967315507, - "grad_norm": 943.9710693359375, - "learning_rate": 4.5528730228342605e-05, - "loss": 152.1963, - "step": 34250 - }, - { - "epoch": 0.2768283518774392, - "grad_norm": 1782.575439453125, - "learning_rate": 4.55247450956213e-05, - "loss": 106.1654, - "step": 34260 - }, - { - "epoch": 0.27690915408172334, - "grad_norm": 1254.2276611328125, - "learning_rate": 4.552075836233812e-05, - "loss": 160.432, - "step": 34270 - }, - { - "epoch": 0.2769899562860075, - "grad_norm": 1002.9295043945312, - "learning_rate": 4.5516770028803954e-05, - "loss": 122.4372, - "step": 34280 - }, - { - "epoch": 0.2770707584902916, - "grad_norm": 2361.109375, - "learning_rate": 4.551278009532981e-05, - "loss": 153.3122, - "step": 34290 - }, - { - "epoch": 0.27715156069457575, - "grad_norm": 1001.915771484375, - "learning_rate": 4.550878856222685e-05, - "loss": 117.4217, - "step": 34300 - }, - { - "epoch": 0.2772323628988599, - "grad_norm": 0.0, - "learning_rate": 4.550479542980632e-05, - "loss": 86.0711, - "step": 34310 - }, - { - "epoch": 0.277313165103144, - "grad_norm": 738.02685546875, - "learning_rate": 4.5500800698379624e-05, - "loss": 121.3124, - "step": 34320 - }, - { - "epoch": 0.27739396730742816, - "grad_norm": 786.0671997070312, - "learning_rate": 4.5496804368258286e-05, - "loss": 109.7231, - "step": 34330 - }, - { - "epoch": 0.2774747695117123, - "grad_norm": 1370.8685302734375, - "learning_rate": 4.5492806439753935e-05, - "loss": 94.5379, - "step": 34340 - }, - { - "epoch": 0.27755557171599643, - "grad_norm": 936.9679565429688, - "learning_rate": 4.548880691317835e-05, - "loss": 135.4465, - "step": 34350 - }, - { - "epoch": 0.27763637392028057, - "grad_norm": 1191.68896484375, - "learning_rate": 4.548480578884341e-05, - "loss": 113.1939, - "step": 34360 - }, - { - "epoch": 0.2777171761245647, - "grad_norm": 704.287841796875, - "learning_rate": 4.548080306706114e-05, - "loss": 110.1259, - "step": 34370 - }, - { - "epoch": 0.27779797832884884, - "grad_norm": 1024.306884765625, - "learning_rate": 4.547679874814368e-05, - "loss": 112.6879, - "step": 34380 - }, - { - "epoch": 0.2778787805331329, - "grad_norm": 732.1144409179688, - "learning_rate": 4.547279283240329e-05, - "loss": 117.785, - "step": 34390 - }, - { - "epoch": 0.27795958273741705, - "grad_norm": 1186.0765380859375, - "learning_rate": 4.5468785320152365e-05, - "loss": 120.7223, - "step": 34400 - }, - { - "epoch": 0.2780403849417012, - "grad_norm": 977.932861328125, - "learning_rate": 4.546477621170342e-05, - "loss": 115.2618, - "step": 34410 - }, - { - "epoch": 0.2781211871459853, - "grad_norm": 948.9174194335938, - "learning_rate": 4.5460765507369084e-05, - "loss": 117.0415, - "step": 34420 - }, - { - "epoch": 0.27820198935026946, - "grad_norm": 1732.18408203125, - "learning_rate": 4.545675320746212e-05, - "loss": 126.1609, - "step": 34430 - }, - { - "epoch": 0.2782827915545536, - "grad_norm": 1710.67431640625, - "learning_rate": 4.5452739312295436e-05, - "loss": 104.7393, - "step": 34440 - }, - { - "epoch": 0.27836359375883774, - "grad_norm": 1153.044677734375, - "learning_rate": 4.544872382218202e-05, - "loss": 93.7051, - "step": 34450 - }, - { - "epoch": 0.27844439596312187, - "grad_norm": 1124.745849609375, - "learning_rate": 4.5444706737435014e-05, - "loss": 130.6644, - "step": 34460 - }, - { - "epoch": 0.278525198167406, - "grad_norm": 1080.2679443359375, - "learning_rate": 4.5440688058367686e-05, - "loss": 129.3162, - "step": 34470 - }, - { - "epoch": 0.27860600037169014, - "grad_norm": 946.5127563476562, - "learning_rate": 4.543666778529342e-05, - "loss": 110.7551, - "step": 34480 - }, - { - "epoch": 0.2786868025759743, - "grad_norm": 916.5494384765625, - "learning_rate": 4.543264591852572e-05, - "loss": 96.2843, - "step": 34490 - }, - { - "epoch": 0.2787676047802584, - "grad_norm": 1342.7659912109375, - "learning_rate": 4.542862245837821e-05, - "loss": 94.5427, - "step": 34500 - }, - { - "epoch": 0.27884840698454255, - "grad_norm": 654.0097045898438, - "learning_rate": 4.542459740516467e-05, - "loss": 126.7555, - "step": 34510 - }, - { - "epoch": 0.2789292091888267, - "grad_norm": 1470.6068115234375, - "learning_rate": 4.542057075919897e-05, - "loss": 153.6898, - "step": 34520 - }, - { - "epoch": 0.2790100113931108, - "grad_norm": 396.3619079589844, - "learning_rate": 4.541654252079513e-05, - "loss": 106.5824, - "step": 34530 - }, - { - "epoch": 0.27909081359739496, - "grad_norm": 1335.83935546875, - "learning_rate": 4.5412512690267246e-05, - "loss": 122.7157, - "step": 34540 - }, - { - "epoch": 0.2791716158016791, - "grad_norm": 698.5989379882812, - "learning_rate": 4.5408481267929605e-05, - "loss": 107.813, - "step": 34550 - }, - { - "epoch": 0.2792524180059632, - "grad_norm": 687.357666015625, - "learning_rate": 4.540444825409657e-05, - "loss": 182.3593, - "step": 34560 - }, - { - "epoch": 0.2793332202102473, - "grad_norm": 766.945556640625, - "learning_rate": 4.540041364908265e-05, - "loss": 109.5158, - "step": 34570 - }, - { - "epoch": 0.27941402241453145, - "grad_norm": 2126.50244140625, - "learning_rate": 4.5396377453202466e-05, - "loss": 171.0725, - "step": 34580 - }, - { - "epoch": 0.2794948246188156, - "grad_norm": 1262.4678955078125, - "learning_rate": 4.539233966677078e-05, - "loss": 171.009, - "step": 34590 - }, - { - "epoch": 0.2795756268230997, - "grad_norm": 903.4573974609375, - "learning_rate": 4.5388300290102456e-05, - "loss": 148.0131, - "step": 34600 - }, - { - "epoch": 0.27965642902738386, - "grad_norm": 1543.662353515625, - "learning_rate": 4.5384259323512504e-05, - "loss": 152.2532, - "step": 34610 - }, - { - "epoch": 0.279737231231668, - "grad_norm": 2007.708984375, - "learning_rate": 4.538021676731603e-05, - "loss": 117.6894, - "step": 34620 - }, - { - "epoch": 0.27981803343595213, - "grad_norm": 986.9883422851562, - "learning_rate": 4.537617262182829e-05, - "loss": 128.1222, - "step": 34630 - }, - { - "epoch": 0.27989883564023627, - "grad_norm": 852.8289184570312, - "learning_rate": 4.5372126887364655e-05, - "loss": 151.4527, - "step": 34640 - }, - { - "epoch": 0.2799796378445204, - "grad_norm": 947.4114990234375, - "learning_rate": 4.536807956424063e-05, - "loss": 115.6713, - "step": 34650 - }, - { - "epoch": 0.28006044004880454, - "grad_norm": 625.111328125, - "learning_rate": 4.536403065277182e-05, - "loss": 126.8854, - "step": 34660 - }, - { - "epoch": 0.2801412422530887, - "grad_norm": 2799.299560546875, - "learning_rate": 4.5359980153273964e-05, - "loss": 106.6591, - "step": 34670 - }, - { - "epoch": 0.2802220444573728, - "grad_norm": 1096.3692626953125, - "learning_rate": 4.535592806606294e-05, - "loss": 135.9334, - "step": 34680 - }, - { - "epoch": 0.28030284666165695, - "grad_norm": 804.1647338867188, - "learning_rate": 4.535187439145473e-05, - "loss": 98.5152, - "step": 34690 - }, - { - "epoch": 0.2803836488659411, - "grad_norm": 1013.7943115234375, - "learning_rate": 4.534781912976546e-05, - "loss": 132.1083, - "step": 34700 - }, - { - "epoch": 0.2804644510702252, - "grad_norm": 954.5202026367188, - "learning_rate": 4.5343762281311345e-05, - "loss": 110.425, - "step": 34710 - }, - { - "epoch": 0.28054525327450935, - "grad_norm": 1350.681884765625, - "learning_rate": 4.533970384640877e-05, - "loss": 120.3986, - "step": 34720 - }, - { - "epoch": 0.28062605547879343, - "grad_norm": 1411.2952880859375, - "learning_rate": 4.533564382537421e-05, - "loss": 106.4892, - "step": 34730 - }, - { - "epoch": 0.28070685768307757, - "grad_norm": 1573.3634033203125, - "learning_rate": 4.533158221852427e-05, - "loss": 118.5769, - "step": 34740 - }, - { - "epoch": 0.2807876598873617, - "grad_norm": 850.43017578125, - "learning_rate": 4.532751902617569e-05, - "loss": 143.0521, - "step": 34750 - }, - { - "epoch": 0.28086846209164584, - "grad_norm": 1051.746826171875, - "learning_rate": 4.5323454248645324e-05, - "loss": 131.302, - "step": 34760 - }, - { - "epoch": 0.28094926429593, - "grad_norm": 1030.9208984375, - "learning_rate": 4.5319387886250156e-05, - "loss": 124.2802, - "step": 34770 - }, - { - "epoch": 0.2810300665002141, - "grad_norm": 425.7080993652344, - "learning_rate": 4.531531993930727e-05, - "loss": 126.1376, - "step": 34780 - }, - { - "epoch": 0.28111086870449825, - "grad_norm": 920.619384765625, - "learning_rate": 4.531125040813392e-05, - "loss": 111.8277, - "step": 34790 - }, - { - "epoch": 0.2811916709087824, - "grad_norm": 2937.628662109375, - "learning_rate": 4.530717929304743e-05, - "loss": 127.0246, - "step": 34800 - }, - { - "epoch": 0.2812724731130665, - "grad_norm": 702.6654052734375, - "learning_rate": 4.5303106594365296e-05, - "loss": 110.4365, - "step": 34810 - }, - { - "epoch": 0.28135327531735066, - "grad_norm": 1031.491943359375, - "learning_rate": 4.529903231240511e-05, - "loss": 122.6931, - "step": 34820 - }, - { - "epoch": 0.2814340775216348, - "grad_norm": 1236.973876953125, - "learning_rate": 4.5294956447484584e-05, - "loss": 131.1124, - "step": 34830 - }, - { - "epoch": 0.28151487972591893, - "grad_norm": 642.0905151367188, - "learning_rate": 4.529087899992156e-05, - "loss": 86.0983, - "step": 34840 - }, - { - "epoch": 0.28159568193020307, - "grad_norm": 1305.4334716796875, - "learning_rate": 4.528679997003403e-05, - "loss": 136.0719, - "step": 34850 - }, - { - "epoch": 0.2816764841344872, - "grad_norm": 1219.1221923828125, - "learning_rate": 4.5282719358140056e-05, - "loss": 137.1284, - "step": 34860 - }, - { - "epoch": 0.28175728633877134, - "grad_norm": 510.88397216796875, - "learning_rate": 4.5278637164557866e-05, - "loss": 137.5496, - "step": 34870 - }, - { - "epoch": 0.2818380885430555, - "grad_norm": 1230.408935546875, - "learning_rate": 4.52745533896058e-05, - "loss": 141.1205, - "step": 34880 - }, - { - "epoch": 0.2819188907473396, - "grad_norm": 1380.2786865234375, - "learning_rate": 4.527046803360232e-05, - "loss": 104.9832, - "step": 34890 - }, - { - "epoch": 0.2819996929516237, - "grad_norm": 1039.0751953125, - "learning_rate": 4.5266381096866e-05, - "loss": 108.6015, - "step": 34900 - }, - { - "epoch": 0.28208049515590783, - "grad_norm": 632.5457153320312, - "learning_rate": 4.5262292579715556e-05, - "loss": 132.2677, - "step": 34910 - }, - { - "epoch": 0.28216129736019196, - "grad_norm": 1173.7763671875, - "learning_rate": 4.525820248246982e-05, - "loss": 150.6363, - "step": 34920 - }, - { - "epoch": 0.2822420995644761, - "grad_norm": 1210.0218505859375, - "learning_rate": 4.525411080544775e-05, - "loss": 100.9185, - "step": 34930 - }, - { - "epoch": 0.28232290176876024, - "grad_norm": 503.28314208984375, - "learning_rate": 4.5250017548968404e-05, - "loss": 145.572, - "step": 34940 - }, - { - "epoch": 0.2824037039730444, - "grad_norm": 1026.7508544921875, - "learning_rate": 4.5245922713350996e-05, - "loss": 131.1489, - "step": 34950 - }, - { - "epoch": 0.2824845061773285, - "grad_norm": 691.9127197265625, - "learning_rate": 4.524182629891486e-05, - "loss": 105.2064, - "step": 34960 - }, - { - "epoch": 0.28256530838161265, - "grad_norm": 1015.0642700195312, - "learning_rate": 4.523772830597942e-05, - "loss": 106.6895, - "step": 34970 - }, - { - "epoch": 0.2826461105858968, - "grad_norm": 1064.6636962890625, - "learning_rate": 4.523362873486427e-05, - "loss": 110.9529, - "step": 34980 - }, - { - "epoch": 0.2827269127901809, - "grad_norm": 1894.8170166015625, - "learning_rate": 4.522952758588909e-05, - "loss": 149.2299, - "step": 34990 - }, - { - "epoch": 0.28280771499446505, - "grad_norm": 872.7687377929688, - "learning_rate": 4.522542485937369e-05, - "loss": 90.9981, - "step": 35000 - }, - { - "epoch": 0.2828885171987492, - "grad_norm": 1041.8123779296875, - "learning_rate": 4.5221320555638016e-05, - "loss": 115.0786, - "step": 35010 - }, - { - "epoch": 0.2829693194030333, - "grad_norm": 1080.722900390625, - "learning_rate": 4.521721467500213e-05, - "loss": 120.177, - "step": 35020 - }, - { - "epoch": 0.28305012160731746, - "grad_norm": 933.2387084960938, - "learning_rate": 4.521310721778622e-05, - "loss": 131.6707, - "step": 35030 - }, - { - "epoch": 0.2831309238116016, - "grad_norm": 1417.2791748046875, - "learning_rate": 4.5208998184310596e-05, - "loss": 155.1386, - "step": 35040 - }, - { - "epoch": 0.28321172601588573, - "grad_norm": 989.0880737304688, - "learning_rate": 4.5204887574895684e-05, - "loss": 134.7903, - "step": 35050 - }, - { - "epoch": 0.28329252822016987, - "grad_norm": 674.8705444335938, - "learning_rate": 4.5200775389862026e-05, - "loss": 106.9986, - "step": 35060 - }, - { - "epoch": 0.283373330424454, - "grad_norm": 1011.1456298828125, - "learning_rate": 4.519666162953032e-05, - "loss": 125.1009, - "step": 35070 - }, - { - "epoch": 0.2834541326287381, - "grad_norm": 799.091064453125, - "learning_rate": 4.519254629422136e-05, - "loss": 141.658, - "step": 35080 - }, - { - "epoch": 0.2835349348330222, - "grad_norm": 1478.8214111328125, - "learning_rate": 4.518842938425605e-05, - "loss": 110.739, - "step": 35090 - }, - { - "epoch": 0.28361573703730636, - "grad_norm": 506.6539611816406, - "learning_rate": 4.5184310899955465e-05, - "loss": 81.631, - "step": 35100 - }, - { - "epoch": 0.2836965392415905, - "grad_norm": 780.650390625, - "learning_rate": 4.5180190841640747e-05, - "loss": 95.6491, - "step": 35110 - }, - { - "epoch": 0.28377734144587463, - "grad_norm": 714.573974609375, - "learning_rate": 4.51760692096332e-05, - "loss": 130.2963, - "step": 35120 - }, - { - "epoch": 0.28385814365015877, - "grad_norm": 962.4329223632812, - "learning_rate": 4.517194600425423e-05, - "loss": 118.3506, - "step": 35130 - }, - { - "epoch": 0.2839389458544429, - "grad_norm": 462.5810241699219, - "learning_rate": 4.516782122582538e-05, - "loss": 89.7047, - "step": 35140 - }, - { - "epoch": 0.28401974805872704, - "grad_norm": 3363.796875, - "learning_rate": 4.516369487466832e-05, - "loss": 134.3732, - "step": 35150 - }, - { - "epoch": 0.2841005502630112, - "grad_norm": 1219.4478759765625, - "learning_rate": 4.5159566951104796e-05, - "loss": 133.6795, - "step": 35160 - }, - { - "epoch": 0.2841813524672953, - "grad_norm": 1269.0958251953125, - "learning_rate": 4.5155437455456744e-05, - "loss": 114.4325, - "step": 35170 - }, - { - "epoch": 0.28426215467157945, - "grad_norm": 1432.11474609375, - "learning_rate": 4.5151306388046175e-05, - "loss": 116.6853, - "step": 35180 - }, - { - "epoch": 0.2843429568758636, - "grad_norm": 1100.9908447265625, - "learning_rate": 4.5147173749195255e-05, - "loss": 134.7602, - "step": 35190 - }, - { - "epoch": 0.2844237590801477, - "grad_norm": 1108.481689453125, - "learning_rate": 4.5143039539226234e-05, - "loss": 123.7225, - "step": 35200 - }, - { - "epoch": 0.28450456128443186, - "grad_norm": 1388.0648193359375, - "learning_rate": 4.5138903758461515e-05, - "loss": 83.2804, - "step": 35210 - }, - { - "epoch": 0.284585363488716, - "grad_norm": 1008.038330078125, - "learning_rate": 4.513476640722362e-05, - "loss": 107.4747, - "step": 35220 - }, - { - "epoch": 0.28466616569300013, - "grad_norm": 2211.773681640625, - "learning_rate": 4.5130627485835186e-05, - "loss": 124.5671, - "step": 35230 - }, - { - "epoch": 0.28474696789728426, - "grad_norm": 776.4564208984375, - "learning_rate": 4.512648699461897e-05, - "loss": 99.4648, - "step": 35240 - }, - { - "epoch": 0.28482777010156834, - "grad_norm": 754.8226928710938, - "learning_rate": 4.512234493389785e-05, - "loss": 127.4045, - "step": 35250 - }, - { - "epoch": 0.2849085723058525, - "grad_norm": 959.7662963867188, - "learning_rate": 4.511820130399485e-05, - "loss": 128.489, - "step": 35260 - }, - { - "epoch": 0.2849893745101366, - "grad_norm": 1053.52978515625, - "learning_rate": 4.511405610523309e-05, - "loss": 151.0009, - "step": 35270 - }, - { - "epoch": 0.28507017671442075, - "grad_norm": 1389.510498046875, - "learning_rate": 4.510990933793583e-05, - "loss": 151.2305, - "step": 35280 - }, - { - "epoch": 0.2851509789187049, - "grad_norm": 833.6561889648438, - "learning_rate": 4.5105761002426415e-05, - "loss": 94.537, - "step": 35290 - }, - { - "epoch": 0.285231781122989, - "grad_norm": 714.683349609375, - "learning_rate": 4.510161109902837e-05, - "loss": 106.2177, - "step": 35300 - }, - { - "epoch": 0.28531258332727316, - "grad_norm": 1010.4981079101562, - "learning_rate": 4.50974596280653e-05, - "loss": 143.8065, - "step": 35310 - }, - { - "epoch": 0.2853933855315573, - "grad_norm": 807.5494995117188, - "learning_rate": 4.509330658986095e-05, - "loss": 115.15, - "step": 35320 - }, - { - "epoch": 0.28547418773584143, - "grad_norm": 794.9727172851562, - "learning_rate": 4.508915198473919e-05, - "loss": 128.3856, - "step": 35330 - }, - { - "epoch": 0.28555498994012557, - "grad_norm": 1342.813232421875, - "learning_rate": 4.508499581302398e-05, - "loss": 112.3642, - "step": 35340 - }, - { - "epoch": 0.2856357921444097, - "grad_norm": 714.3486328125, - "learning_rate": 4.508083807503945e-05, - "loss": 108.8095, - "step": 35350 - }, - { - "epoch": 0.28571659434869384, - "grad_norm": 481.2684020996094, - "learning_rate": 4.507667877110982e-05, - "loss": 116.7584, - "step": 35360 - }, - { - "epoch": 0.285797396552978, - "grad_norm": 740.3648681640625, - "learning_rate": 4.507251790155944e-05, - "loss": 104.9906, - "step": 35370 - }, - { - "epoch": 0.2858781987572621, - "grad_norm": 7337.34765625, - "learning_rate": 4.506835546671278e-05, - "loss": 160.4453, - "step": 35380 - }, - { - "epoch": 0.28595900096154625, - "grad_norm": 714.0308837890625, - "learning_rate": 4.506419146689446e-05, - "loss": 177.722, - "step": 35390 - }, - { - "epoch": 0.2860398031658304, - "grad_norm": 903.252685546875, - "learning_rate": 4.5060025902429174e-05, - "loss": 140.464, - "step": 35400 - }, - { - "epoch": 0.2861206053701145, - "grad_norm": 1707.4490966796875, - "learning_rate": 4.505585877364175e-05, - "loss": 143.0262, - "step": 35410 - }, - { - "epoch": 0.2862014075743986, - "grad_norm": 1198.5589599609375, - "learning_rate": 4.5051690080857176e-05, - "loss": 100.5387, - "step": 35420 - }, - { - "epoch": 0.28628220977868274, - "grad_norm": 881.50048828125, - "learning_rate": 4.504751982440052e-05, - "loss": 120.7444, - "step": 35430 - }, - { - "epoch": 0.2863630119829669, - "grad_norm": 560.0980834960938, - "learning_rate": 4.504334800459699e-05, - "loss": 100.8024, - "step": 35440 - }, - { - "epoch": 0.286443814187251, - "grad_norm": 939.2238159179688, - "learning_rate": 4.503917462177192e-05, - "loss": 129.7464, - "step": 35450 - }, - { - "epoch": 0.28652461639153515, - "grad_norm": 921.0413208007812, - "learning_rate": 4.5034999676250745e-05, - "loss": 102.4883, - "step": 35460 - }, - { - "epoch": 0.2866054185958193, - "grad_norm": 897.2872924804688, - "learning_rate": 4.5030823168359046e-05, - "loss": 130.2189, - "step": 35470 - }, - { - "epoch": 0.2866862208001034, - "grad_norm": 771.0413208007812, - "learning_rate": 4.5026645098422515e-05, - "loss": 165.105, - "step": 35480 - }, - { - "epoch": 0.28676702300438756, - "grad_norm": 1121.4375, - "learning_rate": 4.5022465466766974e-05, - "loss": 137.145, - "step": 35490 - }, - { - "epoch": 0.2868478252086717, - "grad_norm": 1141.2972412109375, - "learning_rate": 4.5018284273718336e-05, - "loss": 110.5927, - "step": 35500 - }, - { - "epoch": 0.28692862741295583, - "grad_norm": 952.0958251953125, - "learning_rate": 4.501410151960268e-05, - "loss": 117.8001, - "step": 35510 - }, - { - "epoch": 0.28700942961723996, - "grad_norm": 734.2439575195312, - "learning_rate": 4.5009917204746184e-05, - "loss": 105.4353, - "step": 35520 - }, - { - "epoch": 0.2870902318215241, - "grad_norm": 1291.39208984375, - "learning_rate": 4.500573132947514e-05, - "loss": 103.4242, - "step": 35530 - }, - { - "epoch": 0.28717103402580824, - "grad_norm": 1230.2783203125, - "learning_rate": 4.5001543894115975e-05, - "loss": 87.0285, - "step": 35540 - }, - { - "epoch": 0.28725183623009237, - "grad_norm": 1459.7908935546875, - "learning_rate": 4.499735489899524e-05, - "loss": 126.5571, - "step": 35550 - }, - { - "epoch": 0.2873326384343765, - "grad_norm": 802.614501953125, - "learning_rate": 4.499316434443959e-05, - "loss": 81.4039, - "step": 35560 - }, - { - "epoch": 0.28741344063866064, - "grad_norm": 1443.157470703125, - "learning_rate": 4.498897223077582e-05, - "loss": 148.1397, - "step": 35570 - }, - { - "epoch": 0.2874942428429448, - "grad_norm": 771.4644165039062, - "learning_rate": 4.4984778558330844e-05, - "loss": 167.606, - "step": 35580 - }, - { - "epoch": 0.28757504504722886, - "grad_norm": 941.3527221679688, - "learning_rate": 4.498058332743168e-05, - "loss": 112.0973, - "step": 35590 - }, - { - "epoch": 0.287655847251513, - "grad_norm": 1018.1768798828125, - "learning_rate": 4.4976386538405495e-05, - "loss": 147.5497, - "step": 35600 - }, - { - "epoch": 0.28773664945579713, - "grad_norm": 1396.66064453125, - "learning_rate": 4.497218819157956e-05, - "loss": 102.9582, - "step": 35610 - }, - { - "epoch": 0.28781745166008127, - "grad_norm": 678.246337890625, - "learning_rate": 4.496798828728126e-05, - "loss": 109.4126, - "step": 35620 - }, - { - "epoch": 0.2878982538643654, - "grad_norm": 634.16796875, - "learning_rate": 4.496378682583813e-05, - "loss": 149.2051, - "step": 35630 - }, - { - "epoch": 0.28797905606864954, - "grad_norm": 1301.6407470703125, - "learning_rate": 4.495958380757779e-05, - "loss": 151.9117, - "step": 35640 - }, - { - "epoch": 0.2880598582729337, - "grad_norm": 775.5172729492188, - "learning_rate": 4.495537923282801e-05, - "loss": 109.6005, - "step": 35650 - }, - { - "epoch": 0.2881406604772178, - "grad_norm": 779.864990234375, - "learning_rate": 4.4951173101916675e-05, - "loss": 99.7205, - "step": 35660 - }, - { - "epoch": 0.28822146268150195, - "grad_norm": 586.0054931640625, - "learning_rate": 4.4946965415171775e-05, - "loss": 110.9884, - "step": 35670 - }, - { - "epoch": 0.2883022648857861, - "grad_norm": 846.4296875, - "learning_rate": 4.494275617292144e-05, - "loss": 113.662, - "step": 35680 - }, - { - "epoch": 0.2883830670900702, - "grad_norm": 915.6343383789062, - "learning_rate": 4.4938545375493934e-05, - "loss": 97.9946, - "step": 35690 - }, - { - "epoch": 0.28846386929435436, - "grad_norm": 867.697265625, - "learning_rate": 4.493433302321759e-05, - "loss": 127.8901, - "step": 35700 - }, - { - "epoch": 0.2885446714986385, - "grad_norm": 1020.3027954101562, - "learning_rate": 4.493011911642092e-05, - "loss": 130.4993, - "step": 35710 - }, - { - "epoch": 0.28862547370292263, - "grad_norm": 926.0828247070312, - "learning_rate": 4.492590365543253e-05, - "loss": 108.4467, - "step": 35720 - }, - { - "epoch": 0.28870627590720677, - "grad_norm": 749.0755615234375, - "learning_rate": 4.492168664058114e-05, - "loss": 117.8848, - "step": 35730 - }, - { - "epoch": 0.2887870781114909, - "grad_norm": 902.9224243164062, - "learning_rate": 4.491746807219561e-05, - "loss": 107.3755, - "step": 35740 - }, - { - "epoch": 0.28886788031577504, - "grad_norm": 1235.7816162109375, - "learning_rate": 4.491324795060491e-05, - "loss": 102.4859, - "step": 35750 - }, - { - "epoch": 0.2889486825200592, - "grad_norm": 1196.7703857421875, - "learning_rate": 4.490902627613813e-05, - "loss": 156.5874, - "step": 35760 - }, - { - "epoch": 0.28902948472434326, - "grad_norm": 508.5019836425781, - "learning_rate": 4.49048030491245e-05, - "loss": 111.9037, - "step": 35770 - }, - { - "epoch": 0.2891102869286274, - "grad_norm": 1058.8076171875, - "learning_rate": 4.4900578269893335e-05, - "loss": 126.7406, - "step": 35780 - }, - { - "epoch": 0.2891910891329115, - "grad_norm": 2101.999267578125, - "learning_rate": 4.489635193877411e-05, - "loss": 121.6241, - "step": 35790 - }, - { - "epoch": 0.28927189133719566, - "grad_norm": 1217.8797607421875, - "learning_rate": 4.4892124056096386e-05, - "loss": 133.8553, - "step": 35800 - }, - { - "epoch": 0.2893526935414798, - "grad_norm": 774.1889038085938, - "learning_rate": 4.488789462218987e-05, - "loss": 90.9729, - "step": 35810 - }, - { - "epoch": 0.28943349574576394, - "grad_norm": 2153.329833984375, - "learning_rate": 4.4883663637384396e-05, - "loss": 149.872, - "step": 35820 - }, - { - "epoch": 0.28951429795004807, - "grad_norm": 890.8701782226562, - "learning_rate": 4.4879431102009886e-05, - "loss": 140.9723, - "step": 35830 - }, - { - "epoch": 0.2895951001543322, - "grad_norm": 998.8427124023438, - "learning_rate": 4.487519701639641e-05, - "loss": 117.5766, - "step": 35840 - }, - { - "epoch": 0.28967590235861634, - "grad_norm": 566.9569091796875, - "learning_rate": 4.487096138087414e-05, - "loss": 135.5254, - "step": 35850 - }, - { - "epoch": 0.2897567045629005, - "grad_norm": 1539.50927734375, - "learning_rate": 4.486672419577339e-05, - "loss": 152.108, - "step": 35860 - }, - { - "epoch": 0.2898375067671846, - "grad_norm": 927.10302734375, - "learning_rate": 4.4862485461424585e-05, - "loss": 137.8924, - "step": 35870 - }, - { - "epoch": 0.28991830897146875, - "grad_norm": 448.3216552734375, - "learning_rate": 4.4858245178158276e-05, - "loss": 95.7572, - "step": 35880 - }, - { - "epoch": 0.2899991111757529, - "grad_norm": 1155.0504150390625, - "learning_rate": 4.485400334630511e-05, - "loss": 114.0033, - "step": 35890 - }, - { - "epoch": 0.290079913380037, - "grad_norm": 876.059326171875, - "learning_rate": 4.484975996619589e-05, - "loss": 109.5067, - "step": 35900 - }, - { - "epoch": 0.29016071558432116, - "grad_norm": 1154.8907470703125, - "learning_rate": 4.4845515038161515e-05, - "loss": 107.2369, - "step": 35910 - }, - { - "epoch": 0.2902415177886053, - "grad_norm": 1266.2904052734375, - "learning_rate": 4.484126856253301e-05, - "loss": 105.3486, - "step": 35920 - }, - { - "epoch": 0.29032231999288943, - "grad_norm": 944.9496459960938, - "learning_rate": 4.483702053964154e-05, - "loss": 121.0696, - "step": 35930 - }, - { - "epoch": 0.2904031221971735, - "grad_norm": 652.3861694335938, - "learning_rate": 4.483277096981836e-05, - "loss": 138.7729, - "step": 35940 - }, - { - "epoch": 0.29048392440145765, - "grad_norm": 988.6136474609375, - "learning_rate": 4.482851985339487e-05, - "loss": 100.7484, - "step": 35950 - }, - { - "epoch": 0.2905647266057418, - "grad_norm": 713.495849609375, - "learning_rate": 4.482426719070258e-05, - "loss": 112.5299, - "step": 35960 - }, - { - "epoch": 0.2906455288100259, - "grad_norm": 984.3701171875, - "learning_rate": 4.48200129820731e-05, - "loss": 105.965, - "step": 35970 - }, - { - "epoch": 0.29072633101431006, - "grad_norm": 889.433837890625, - "learning_rate": 4.481575722783821e-05, - "loss": 125.5693, - "step": 35980 - }, - { - "epoch": 0.2908071332185942, - "grad_norm": 595.2171630859375, - "learning_rate": 4.481149992832977e-05, - "loss": 116.5514, - "step": 35990 - }, - { - "epoch": 0.29088793542287833, - "grad_norm": 1127.482666015625, - "learning_rate": 4.480724108387977e-05, - "loss": 158.5123, - "step": 36000 - }, - { - "epoch": 0.29096873762716247, - "grad_norm": 522.4324340820312, - "learning_rate": 4.480298069482033e-05, - "loss": 118.1042, - "step": 36010 - }, - { - "epoch": 0.2910495398314466, - "grad_norm": 914.2168579101562, - "learning_rate": 4.479871876148368e-05, - "loss": 122.7223, - "step": 36020 - }, - { - "epoch": 0.29113034203573074, - "grad_norm": 1882.6761474609375, - "learning_rate": 4.479445528420218e-05, - "loss": 171.1375, - "step": 36030 - }, - { - "epoch": 0.2912111442400149, - "grad_norm": 629.4611206054688, - "learning_rate": 4.4790190263308306e-05, - "loss": 106.6818, - "step": 36040 - }, - { - "epoch": 0.291291946444299, - "grad_norm": 1279.12841796875, - "learning_rate": 4.478592369913465e-05, - "loss": 119.8195, - "step": 36050 - }, - { - "epoch": 0.29137274864858315, - "grad_norm": 1169.1927490234375, - "learning_rate": 4.4781655592013914e-05, - "loss": 101.2631, - "step": 36060 - }, - { - "epoch": 0.2914535508528673, - "grad_norm": 807.729736328125, - "learning_rate": 4.477738594227895e-05, - "loss": 108.0282, - "step": 36070 - }, - { - "epoch": 0.2915343530571514, - "grad_norm": 1172.6905517578125, - "learning_rate": 4.477311475026271e-05, - "loss": 138.7188, - "step": 36080 - }, - { - "epoch": 0.29161515526143555, - "grad_norm": 944.0347290039062, - "learning_rate": 4.4768842016298275e-05, - "loss": 123.9575, - "step": 36090 - }, - { - "epoch": 0.2916959574657197, - "grad_norm": 1842.523681640625, - "learning_rate": 4.4764567740718825e-05, - "loss": 118.9388, - "step": 36100 - }, - { - "epoch": 0.29177675967000377, - "grad_norm": 489.44049072265625, - "learning_rate": 4.476029192385769e-05, - "loss": 111.6372, - "step": 36110 - }, - { - "epoch": 0.2918575618742879, - "grad_norm": 965.0722045898438, - "learning_rate": 4.475601456604831e-05, - "loss": 120.8675, - "step": 36120 - }, - { - "epoch": 0.29193836407857204, - "grad_norm": 1206.4427490234375, - "learning_rate": 4.4751735667624237e-05, - "loss": 154.3426, - "step": 36130 - }, - { - "epoch": 0.2920191662828562, - "grad_norm": 829.5217895507812, - "learning_rate": 4.4747455228919146e-05, - "loss": 128.9677, - "step": 36140 - }, - { - "epoch": 0.2920999684871403, - "grad_norm": 674.0625610351562, - "learning_rate": 4.474317325026684e-05, - "loss": 114.1324, - "step": 36150 - }, - { - "epoch": 0.29218077069142445, - "grad_norm": 1030.25634765625, - "learning_rate": 4.4738889732001234e-05, - "loss": 139.0328, - "step": 36160 - }, - { - "epoch": 0.2922615728957086, - "grad_norm": 770.5888061523438, - "learning_rate": 4.473460467445637e-05, - "loss": 135.6064, - "step": 36170 - }, - { - "epoch": 0.2923423750999927, - "grad_norm": 1034.4508056640625, - "learning_rate": 4.473031807796639e-05, - "loss": 144.3514, - "step": 36180 - }, - { - "epoch": 0.29242317730427686, - "grad_norm": 731.2726440429688, - "learning_rate": 4.472602994286559e-05, - "loss": 128.6037, - "step": 36190 - }, - { - "epoch": 0.292503979508561, - "grad_norm": 1365.0003662109375, - "learning_rate": 4.4721740269488355e-05, - "loss": 105.3024, - "step": 36200 - }, - { - "epoch": 0.29258478171284513, - "grad_norm": 787.8053588867188, - "learning_rate": 4.4717449058169216e-05, - "loss": 124.5325, - "step": 36210 - }, - { - "epoch": 0.29266558391712927, - "grad_norm": 1392.0694580078125, - "learning_rate": 4.471315630924279e-05, - "loss": 135.1548, - "step": 36220 - }, - { - "epoch": 0.2927463861214134, - "grad_norm": 993.1351318359375, - "learning_rate": 4.4708862023043854e-05, - "loss": 118.3388, - "step": 36230 - }, - { - "epoch": 0.29282718832569754, - "grad_norm": 940.2886352539062, - "learning_rate": 4.470456619990727e-05, - "loss": 119.7695, - "step": 36240 - }, - { - "epoch": 0.2929079905299817, - "grad_norm": 675.0407104492188, - "learning_rate": 4.4700268840168045e-05, - "loss": 114.3469, - "step": 36250 - }, - { - "epoch": 0.2929887927342658, - "grad_norm": 698.9947509765625, - "learning_rate": 4.46959699441613e-05, - "loss": 96.8346, - "step": 36260 - }, - { - "epoch": 0.29306959493854995, - "grad_norm": 835.1098022460938, - "learning_rate": 4.469166951222227e-05, - "loss": 88.2586, - "step": 36270 - }, - { - "epoch": 0.29315039714283403, - "grad_norm": 817.1708984375, - "learning_rate": 4.46873675446863e-05, - "loss": 131.4905, - "step": 36280 - }, - { - "epoch": 0.29323119934711817, - "grad_norm": 633.8955688476562, - "learning_rate": 4.468306404188887e-05, - "loss": 119.7594, - "step": 36290 - }, - { - "epoch": 0.2933120015514023, - "grad_norm": 868.7075805664062, - "learning_rate": 4.4678759004165584e-05, - "loss": 128.0557, - "step": 36300 - }, - { - "epoch": 0.29339280375568644, - "grad_norm": 424.8953857421875, - "learning_rate": 4.4674452431852155e-05, - "loss": 76.9472, - "step": 36310 - }, - { - "epoch": 0.2934736059599706, - "grad_norm": 1228.647216796875, - "learning_rate": 4.4670144325284414e-05, - "loss": 138.3897, - "step": 36320 - }, - { - "epoch": 0.2935544081642547, - "grad_norm": 1021.4649658203125, - "learning_rate": 4.4665834684798316e-05, - "loss": 122.3442, - "step": 36330 - }, - { - "epoch": 0.29363521036853885, - "grad_norm": 886.6144409179688, - "learning_rate": 4.466152351072994e-05, - "loss": 113.4936, - "step": 36340 - }, - { - "epoch": 0.293716012572823, - "grad_norm": 2141.16943359375, - "learning_rate": 4.465721080341547e-05, - "loss": 96.7589, - "step": 36350 - }, - { - "epoch": 0.2937968147771071, - "grad_norm": 978.112060546875, - "learning_rate": 4.465289656319124e-05, - "loss": 235.0146, - "step": 36360 - }, - { - "epoch": 0.29387761698139125, - "grad_norm": 975.784423828125, - "learning_rate": 4.464858079039367e-05, - "loss": 114.5082, - "step": 36370 - }, - { - "epoch": 0.2939584191856754, - "grad_norm": 1028.307373046875, - "learning_rate": 4.464426348535931e-05, - "loss": 175.0176, - "step": 36380 - }, - { - "epoch": 0.2940392213899595, - "grad_norm": 899.1151733398438, - "learning_rate": 4.463994464842484e-05, - "loss": 124.9253, - "step": 36390 - }, - { - "epoch": 0.29412002359424366, - "grad_norm": 1693.2586669921875, - "learning_rate": 4.4635624279927044e-05, - "loss": 125.1011, - "step": 36400 - }, - { - "epoch": 0.2942008257985278, - "grad_norm": 1099.2884521484375, - "learning_rate": 4.463130238020285e-05, - "loss": 104.9331, - "step": 36410 - }, - { - "epoch": 0.29428162800281193, - "grad_norm": 744.2664184570312, - "learning_rate": 4.462697894958926e-05, - "loss": 123.865, - "step": 36420 - }, - { - "epoch": 0.29436243020709607, - "grad_norm": 1611.406005859375, - "learning_rate": 4.4622653988423455e-05, - "loss": 122.7415, - "step": 36430 - }, - { - "epoch": 0.2944432324113802, - "grad_norm": 949.6927490234375, - "learning_rate": 4.461832749704268e-05, - "loss": 138.3154, - "step": 36440 - }, - { - "epoch": 0.29452403461566434, - "grad_norm": 657.3694458007812, - "learning_rate": 4.4613999475784336e-05, - "loss": 103.2282, - "step": 36450 - }, - { - "epoch": 0.2946048368199484, - "grad_norm": 428.4473876953125, - "learning_rate": 4.460966992498593e-05, - "loss": 147.5168, - "step": 36460 - }, - { - "epoch": 0.29468563902423256, - "grad_norm": 1618.7548828125, - "learning_rate": 4.460533884498509e-05, - "loss": 95.6227, - "step": 36470 - }, - { - "epoch": 0.2947664412285167, - "grad_norm": 1572.71484375, - "learning_rate": 4.460100623611955e-05, - "loss": 143.9246, - "step": 36480 - }, - { - "epoch": 0.29484724343280083, - "grad_norm": 4086.5341796875, - "learning_rate": 4.4596672098727195e-05, - "loss": 148.2812, - "step": 36490 - }, - { - "epoch": 0.29492804563708497, - "grad_norm": 619.3634033203125, - "learning_rate": 4.4592336433146e-05, - "loss": 103.9121, - "step": 36500 - }, - { - "epoch": 0.2950088478413691, - "grad_norm": 919.9912109375, - "learning_rate": 4.458799923971406e-05, - "loss": 132.5406, - "step": 36510 - }, - { - "epoch": 0.29508965004565324, - "grad_norm": 915.8984375, - "learning_rate": 4.458366051876962e-05, - "loss": 106.1989, - "step": 36520 - }, - { - "epoch": 0.2951704522499374, - "grad_norm": 687.1302490234375, - "learning_rate": 4.457932027065102e-05, - "loss": 116.8813, - "step": 36530 - }, - { - "epoch": 0.2952512544542215, - "grad_norm": 1091.016845703125, - "learning_rate": 4.45749784956967e-05, - "loss": 158.8696, - "step": 36540 - }, - { - "epoch": 0.29533205665850565, - "grad_norm": 725.3900146484375, - "learning_rate": 4.457063519424525e-05, - "loss": 103.9494, - "step": 36550 - }, - { - "epoch": 0.2954128588627898, - "grad_norm": 1524.7835693359375, - "learning_rate": 4.456629036663537e-05, - "loss": 115.3339, - "step": 36560 - }, - { - "epoch": 0.2954936610670739, - "grad_norm": 1941.026123046875, - "learning_rate": 4.4561944013205885e-05, - "loss": 133.687, - "step": 36570 - }, - { - "epoch": 0.29557446327135806, - "grad_norm": 1179.541015625, - "learning_rate": 4.455759613429573e-05, - "loss": 110.7166, - "step": 36580 - }, - { - "epoch": 0.2956552654756422, - "grad_norm": 661.1014404296875, - "learning_rate": 4.455324673024396e-05, - "loss": 164.9227, - "step": 36590 - }, - { - "epoch": 0.29573606767992633, - "grad_norm": 1713.9471435546875, - "learning_rate": 4.454889580138975e-05, - "loss": 127.5746, - "step": 36600 - }, - { - "epoch": 0.29581686988421046, - "grad_norm": 1104.717529296875, - "learning_rate": 4.45445433480724e-05, - "loss": 123.0741, - "step": 36610 - }, - { - "epoch": 0.2958976720884946, - "grad_norm": 1367.0338134765625, - "learning_rate": 4.4540189370631315e-05, - "loss": 172.2203, - "step": 36620 - }, - { - "epoch": 0.2959784742927787, - "grad_norm": 1255.3448486328125, - "learning_rate": 4.4535833869406027e-05, - "loss": 126.8193, - "step": 36630 - }, - { - "epoch": 0.2960592764970628, - "grad_norm": 1059.6141357421875, - "learning_rate": 4.45314768447362e-05, - "loss": 121.4211, - "step": 36640 - }, - { - "epoch": 0.29614007870134695, - "grad_norm": 1522.713134765625, - "learning_rate": 4.452711829696158e-05, - "loss": 122.3892, - "step": 36650 - }, - { - "epoch": 0.2962208809056311, - "grad_norm": 885.0914916992188, - "learning_rate": 4.4522758226422076e-05, - "loss": 94.9889, - "step": 36660 - }, - { - "epoch": 0.2963016831099152, - "grad_norm": 953.8609008789062, - "learning_rate": 4.4518396633457696e-05, - "loss": 136.5476, - "step": 36670 - }, - { - "epoch": 0.29638248531419936, - "grad_norm": 1196.8687744140625, - "learning_rate": 4.451403351840855e-05, - "loss": 101.3226, - "step": 36680 - }, - { - "epoch": 0.2964632875184835, - "grad_norm": 1308.44091796875, - "learning_rate": 4.4509668881614894e-05, - "loss": 150.1893, - "step": 36690 - }, - { - "epoch": 0.29654408972276763, - "grad_norm": 578.9537353515625, - "learning_rate": 4.450530272341709e-05, - "loss": 114.1401, - "step": 36700 - }, - { - "epoch": 0.29662489192705177, - "grad_norm": 1027.5540771484375, - "learning_rate": 4.4500935044155626e-05, - "loss": 112.1286, - "step": 36710 - }, - { - "epoch": 0.2967056941313359, - "grad_norm": 849.3358154296875, - "learning_rate": 4.449656584417108e-05, - "loss": 107.2726, - "step": 36720 - }, - { - "epoch": 0.29678649633562004, - "grad_norm": 884.8580932617188, - "learning_rate": 4.44921951238042e-05, - "loss": 125.4854, - "step": 36730 - }, - { - "epoch": 0.2968672985399042, - "grad_norm": 778.6787109375, - "learning_rate": 4.4487822883395805e-05, - "loss": 115.5268, - "step": 36740 - }, - { - "epoch": 0.2969481007441883, - "grad_norm": 923.2658081054688, - "learning_rate": 4.448344912328686e-05, - "loss": 96.8813, - "step": 36750 - }, - { - "epoch": 0.29702890294847245, - "grad_norm": 634.1044921875, - "learning_rate": 4.447907384381843e-05, - "loss": 100.6929, - "step": 36760 - }, - { - "epoch": 0.2971097051527566, - "grad_norm": 657.9680786132812, - "learning_rate": 4.447469704533172e-05, - "loss": 128.1657, - "step": 36770 - }, - { - "epoch": 0.2971905073570407, - "grad_norm": 1035.9586181640625, - "learning_rate": 4.447031872816804e-05, - "loss": 132.5084, - "step": 36780 - }, - { - "epoch": 0.29727130956132486, - "grad_norm": 852.6742553710938, - "learning_rate": 4.4465938892668814e-05, - "loss": 170.7047, - "step": 36790 - }, - { - "epoch": 0.29735211176560894, - "grad_norm": 1070.7940673828125, - "learning_rate": 4.4461557539175594e-05, - "loss": 133.4162, - "step": 36800 - }, - { - "epoch": 0.2974329139698931, - "grad_norm": 1185.7442626953125, - "learning_rate": 4.445717466803004e-05, - "loss": 124.6443, - "step": 36810 - }, - { - "epoch": 0.2975137161741772, - "grad_norm": 602.00146484375, - "learning_rate": 4.445279027957395e-05, - "loss": 99.0589, - "step": 36820 - }, - { - "epoch": 0.29759451837846135, - "grad_norm": 831.6179809570312, - "learning_rate": 4.444840437414922e-05, - "loss": 131.2572, - "step": 36830 - }, - { - "epoch": 0.2976753205827455, - "grad_norm": 733.0899658203125, - "learning_rate": 4.444401695209788e-05, - "loss": 102.7688, - "step": 36840 - }, - { - "epoch": 0.2977561227870296, - "grad_norm": 464.7803039550781, - "learning_rate": 4.443962801376205e-05, - "loss": 112.3894, - "step": 36850 - }, - { - "epoch": 0.29783692499131376, - "grad_norm": 1011.0020141601562, - "learning_rate": 4.443523755948401e-05, - "loss": 124.0087, - "step": 36860 - }, - { - "epoch": 0.2979177271955979, - "grad_norm": 1786.9320068359375, - "learning_rate": 4.443084558960613e-05, - "loss": 147.5566, - "step": 36870 - }, - { - "epoch": 0.29799852939988203, - "grad_norm": 774.4219360351562, - "learning_rate": 4.4426452104470903e-05, - "loss": 103.0817, - "step": 36880 - }, - { - "epoch": 0.29807933160416616, - "grad_norm": 1132.37841796875, - "learning_rate": 4.4422057104420946e-05, - "loss": 116.8056, - "step": 36890 - }, - { - "epoch": 0.2981601338084503, - "grad_norm": 433.2102966308594, - "learning_rate": 4.441766058979898e-05, - "loss": 116.6097, - "step": 36900 - }, - { - "epoch": 0.29824093601273444, - "grad_norm": 563.262451171875, - "learning_rate": 4.441326256094787e-05, - "loss": 105.7932, - "step": 36910 - }, - { - "epoch": 0.2983217382170186, - "grad_norm": 1102.1732177734375, - "learning_rate": 4.4408863018210564e-05, - "loss": 95.8325, - "step": 36920 - }, - { - "epoch": 0.2984025404213027, - "grad_norm": 2111.252685546875, - "learning_rate": 4.440446196193016e-05, - "loss": 177.4551, - "step": 36930 - }, - { - "epoch": 0.29848334262558684, - "grad_norm": 799.4558715820312, - "learning_rate": 4.440005939244986e-05, - "loss": 97.75, - "step": 36940 - }, - { - "epoch": 0.298564144829871, - "grad_norm": 1028.9544677734375, - "learning_rate": 4.439565531011299e-05, - "loss": 111.2369, - "step": 36950 - }, - { - "epoch": 0.2986449470341551, - "grad_norm": 1321.359619140625, - "learning_rate": 4.439124971526297e-05, - "loss": 122.0615, - "step": 36960 - }, - { - "epoch": 0.2987257492384392, - "grad_norm": 1753.9129638671875, - "learning_rate": 4.438684260824339e-05, - "loss": 142.3784, - "step": 36970 - }, - { - "epoch": 0.29880655144272333, - "grad_norm": 1688.9437255859375, - "learning_rate": 4.4382433989397895e-05, - "loss": 96.2927, - "step": 36980 - }, - { - "epoch": 0.29888735364700747, - "grad_norm": 681.3382568359375, - "learning_rate": 4.437802385907029e-05, - "loss": 127.2185, - "step": 36990 - }, - { - "epoch": 0.2989681558512916, - "grad_norm": 603.5244140625, - "learning_rate": 4.4373612217604496e-05, - "loss": 93.3739, - "step": 37000 - }, - { - "epoch": 0.29904895805557574, - "grad_norm": 560.8984375, - "learning_rate": 4.4369199065344525e-05, - "loss": 110.261, - "step": 37010 - }, - { - "epoch": 0.2991297602598599, - "grad_norm": 1296.68603515625, - "learning_rate": 4.436478440263453e-05, - "loss": 110.1629, - "step": 37020 - }, - { - "epoch": 0.299210562464144, - "grad_norm": 1621.6697998046875, - "learning_rate": 4.436036822981877e-05, - "loss": 325.5025, - "step": 37030 - }, - { - "epoch": 0.29929136466842815, - "grad_norm": 1201.3565673828125, - "learning_rate": 4.4355950547241645e-05, - "loss": 115.1013, - "step": 37040 - }, - { - "epoch": 0.2993721668727123, - "grad_norm": 1426.513671875, - "learning_rate": 4.435153135524763e-05, - "loss": 145.3934, - "step": 37050 - }, - { - "epoch": 0.2994529690769964, - "grad_norm": 1013.943115234375, - "learning_rate": 4.434711065418137e-05, - "loss": 156.9819, - "step": 37060 - }, - { - "epoch": 0.29953377128128056, - "grad_norm": 1477.3516845703125, - "learning_rate": 4.434268844438758e-05, - "loss": 135.9137, - "step": 37070 - }, - { - "epoch": 0.2996145734855647, - "grad_norm": 1007.1724243164062, - "learning_rate": 4.433826472621112e-05, - "loss": 112.4269, - "step": 37080 - }, - { - "epoch": 0.29969537568984883, - "grad_norm": 729.6262817382812, - "learning_rate": 4.4333839499996954e-05, - "loss": 130.67, - "step": 37090 - }, - { - "epoch": 0.29977617789413297, - "grad_norm": 697.9806518554688, - "learning_rate": 4.432941276609018e-05, - "loss": 76.7371, - "step": 37100 - }, - { - "epoch": 0.2998569800984171, - "grad_norm": 7807.8896484375, - "learning_rate": 4.4324984524836e-05, - "loss": 97.4996, - "step": 37110 - }, - { - "epoch": 0.29993778230270124, - "grad_norm": 642.447265625, - "learning_rate": 4.4320554776579747e-05, - "loss": 114.7995, - "step": 37120 - }, - { - "epoch": 0.3000185845069854, - "grad_norm": 880.8277587890625, - "learning_rate": 4.431612352166684e-05, - "loss": 107.3988, - "step": 37130 - }, - { - "epoch": 0.30009938671126946, - "grad_norm": 1430.34375, - "learning_rate": 4.431169076044286e-05, - "loss": 115.5768, - "step": 37140 - }, - { - "epoch": 0.3001801889155536, - "grad_norm": 1099.5316162109375, - "learning_rate": 4.4307256493253457e-05, - "loss": 111.0541, - "step": 37150 - }, - { - "epoch": 0.30026099111983773, - "grad_norm": 1050.425537109375, - "learning_rate": 4.4302820720444456e-05, - "loss": 145.8769, - "step": 37160 - }, - { - "epoch": 0.30034179332412186, - "grad_norm": 1163.5174560546875, - "learning_rate": 4.429838344236174e-05, - "loss": 180.7474, - "step": 37170 - }, - { - "epoch": 0.300422595528406, - "grad_norm": 1810.3154296875, - "learning_rate": 4.429394465935136e-05, - "loss": 124.3441, - "step": 37180 - }, - { - "epoch": 0.30050339773269014, - "grad_norm": 924.7874755859375, - "learning_rate": 4.428950437175944e-05, - "loss": 131.745, - "step": 37190 - }, - { - "epoch": 0.3005841999369743, - "grad_norm": 1222.805419921875, - "learning_rate": 4.428506257993226e-05, - "loss": 123.6144, - "step": 37200 - }, - { - "epoch": 0.3006650021412584, - "grad_norm": 1185.9072265625, - "learning_rate": 4.428061928421618e-05, - "loss": 114.7299, - "step": 37210 - }, - { - "epoch": 0.30074580434554254, - "grad_norm": 1134.1878662109375, - "learning_rate": 4.427617448495772e-05, - "loss": 123.8651, - "step": 37220 - }, - { - "epoch": 0.3008266065498267, - "grad_norm": 1444.3021240234375, - "learning_rate": 4.427172818250349e-05, - "loss": 91.2975, - "step": 37230 - }, - { - "epoch": 0.3009074087541108, - "grad_norm": 1075.3841552734375, - "learning_rate": 4.4267280377200205e-05, - "loss": 115.6354, - "step": 37240 - }, - { - "epoch": 0.30098821095839495, - "grad_norm": 996.8224487304688, - "learning_rate": 4.426283106939474e-05, - "loss": 117.0218, - "step": 37250 - }, - { - "epoch": 0.3010690131626791, - "grad_norm": 1111.2142333984375, - "learning_rate": 4.425838025943403e-05, - "loss": 130.1219, - "step": 37260 - }, - { - "epoch": 0.3011498153669632, - "grad_norm": 861.32373046875, - "learning_rate": 4.4253927947665185e-05, - "loss": 140.277, - "step": 37270 - }, - { - "epoch": 0.30123061757124736, - "grad_norm": 1503.1151123046875, - "learning_rate": 4.424947413443539e-05, - "loss": 124.2421, - "step": 37280 - }, - { - "epoch": 0.3013114197755315, - "grad_norm": 998.5097045898438, - "learning_rate": 4.424501882009198e-05, - "loss": 102.2651, - "step": 37290 - }, - { - "epoch": 0.30139222197981563, - "grad_norm": 1350.6142578125, - "learning_rate": 4.4240562004982364e-05, - "loss": 104.0625, - "step": 37300 - }, - { - "epoch": 0.30147302418409977, - "grad_norm": 485.34716796875, - "learning_rate": 4.423610368945411e-05, - "loss": 122.5666, - "step": 37310 - }, - { - "epoch": 0.30155382638838385, - "grad_norm": 1117.5447998046875, - "learning_rate": 4.423164387385489e-05, - "loss": 108.5208, - "step": 37320 - }, - { - "epoch": 0.301634628592668, - "grad_norm": 902.8930053710938, - "learning_rate": 4.422718255853248e-05, - "loss": 108.7043, - "step": 37330 - }, - { - "epoch": 0.3017154307969521, - "grad_norm": 966.7778930664062, - "learning_rate": 4.422271974383479e-05, - "loss": 140.0457, - "step": 37340 - }, - { - "epoch": 0.30179623300123626, - "grad_norm": 771.8065185546875, - "learning_rate": 4.421825543010983e-05, - "loss": 111.9546, - "step": 37350 - }, - { - "epoch": 0.3018770352055204, - "grad_norm": 1222.7432861328125, - "learning_rate": 4.4213789617705746e-05, - "loss": 94.9703, - "step": 37360 - }, - { - "epoch": 0.30195783740980453, - "grad_norm": 984.2037353515625, - "learning_rate": 4.420932230697079e-05, - "loss": 109.0498, - "step": 37370 - }, - { - "epoch": 0.30203863961408867, - "grad_norm": 866.2888793945312, - "learning_rate": 4.420485349825332e-05, - "loss": 149.2853, - "step": 37380 - }, - { - "epoch": 0.3021194418183728, - "grad_norm": 689.14453125, - "learning_rate": 4.420038319190184e-05, - "loss": 132.5732, - "step": 37390 - }, - { - "epoch": 0.30220024402265694, - "grad_norm": 1139.3125, - "learning_rate": 4.4195911388264946e-05, - "loss": 99.106, - "step": 37400 - }, - { - "epoch": 0.3022810462269411, - "grad_norm": 653.4215087890625, - "learning_rate": 4.419143808769135e-05, - "loss": 82.1814, - "step": 37410 - }, - { - "epoch": 0.3023618484312252, - "grad_norm": 1491.076904296875, - "learning_rate": 4.41869632905299e-05, - "loss": 148.1032, - "step": 37420 - }, - { - "epoch": 0.30244265063550935, - "grad_norm": 934.8798217773438, - "learning_rate": 4.418248699712955e-05, - "loss": 133.3504, - "step": 37430 - }, - { - "epoch": 0.3025234528397935, - "grad_norm": 1311.5662841796875, - "learning_rate": 4.417800920783937e-05, - "loss": 89.8974, - "step": 37440 - }, - { - "epoch": 0.3026042550440776, - "grad_norm": 941.7271118164062, - "learning_rate": 4.417352992300854e-05, - "loss": 108.6178, - "step": 37450 - }, - { - "epoch": 0.30268505724836176, - "grad_norm": 665.7123413085938, - "learning_rate": 4.4169049142986376e-05, - "loss": 103.0933, - "step": 37460 - }, - { - "epoch": 0.3027658594526459, - "grad_norm": 1711.3316650390625, - "learning_rate": 4.4164566868122286e-05, - "loss": 140.2492, - "step": 37470 - }, - { - "epoch": 0.30284666165693, - "grad_norm": 1100.235107421875, - "learning_rate": 4.4160083098765815e-05, - "loss": 118.1711, - "step": 37480 - }, - { - "epoch": 0.3029274638612141, - "grad_norm": 875.5321655273438, - "learning_rate": 4.4155597835266616e-05, - "loss": 88.2336, - "step": 37490 - }, - { - "epoch": 0.30300826606549824, - "grad_norm": 696.4012451171875, - "learning_rate": 4.415111107797445e-05, - "loss": 111.0814, - "step": 37500 - }, - { - "epoch": 0.3030890682697824, - "grad_norm": 1203.421142578125, - "learning_rate": 4.414662282723922e-05, - "loss": 151.073, - "step": 37510 - }, - { - "epoch": 0.3031698704740665, - "grad_norm": 1201.158935546875, - "learning_rate": 4.414213308341092e-05, - "loss": 98.1354, - "step": 37520 - }, - { - "epoch": 0.30325067267835065, - "grad_norm": 1171.56005859375, - "learning_rate": 4.413764184683966e-05, - "loss": 116.6695, - "step": 37530 - }, - { - "epoch": 0.3033314748826348, - "grad_norm": 765.4888916015625, - "learning_rate": 4.413314911787569e-05, - "loss": 151.9455, - "step": 37540 - }, - { - "epoch": 0.3034122770869189, - "grad_norm": 1011.399658203125, - "learning_rate": 4.412865489686936e-05, - "loss": 85.7622, - "step": 37550 - }, - { - "epoch": 0.30349307929120306, - "grad_norm": 1261.4908447265625, - "learning_rate": 4.4124159184171134e-05, - "loss": 161.5575, - "step": 37560 - }, - { - "epoch": 0.3035738814954872, - "grad_norm": 1734.952880859375, - "learning_rate": 4.41196619801316e-05, - "loss": 109.7504, - "step": 37570 - }, - { - "epoch": 0.30365468369977133, - "grad_norm": 1072.2435302734375, - "learning_rate": 4.411516328510145e-05, - "loss": 103.7176, - "step": 37580 - }, - { - "epoch": 0.30373548590405547, - "grad_norm": 656.2511596679688, - "learning_rate": 4.4110663099431514e-05, - "loss": 115.9184, - "step": 37590 - }, - { - "epoch": 0.3038162881083396, - "grad_norm": 672.2481079101562, - "learning_rate": 4.410616142347273e-05, - "loss": 133.4341, - "step": 37600 - }, - { - "epoch": 0.30389709031262374, - "grad_norm": 1313.50146484375, - "learning_rate": 4.410165825757613e-05, - "loss": 125.5788, - "step": 37610 - }, - { - "epoch": 0.3039778925169079, - "grad_norm": 1051.2347412109375, - "learning_rate": 4.409715360209289e-05, - "loss": 114.4167, - "step": 37620 - }, - { - "epoch": 0.304058694721192, - "grad_norm": 677.7921752929688, - "learning_rate": 4.40926474573743e-05, - "loss": 91.3588, - "step": 37630 - }, - { - "epoch": 0.30413949692547615, - "grad_norm": 773.4998168945312, - "learning_rate": 4.4088139823771744e-05, - "loss": 98.4398, - "step": 37640 - }, - { - "epoch": 0.3042202991297603, - "grad_norm": 615.3533935546875, - "learning_rate": 4.408363070163675e-05, - "loss": 85.0433, - "step": 37650 - }, - { - "epoch": 0.30430110133404437, - "grad_norm": 841.9133911132812, - "learning_rate": 4.407912009132093e-05, - "loss": 105.7684, - "step": 37660 - }, - { - "epoch": 0.3043819035383285, - "grad_norm": 703.7492065429688, - "learning_rate": 4.407460799317604e-05, - "loss": 106.3364, - "step": 37670 - }, - { - "epoch": 0.30446270574261264, - "grad_norm": 1444.4390869140625, - "learning_rate": 4.407009440755396e-05, - "loss": 121.9615, - "step": 37680 - }, - { - "epoch": 0.3045435079468968, - "grad_norm": 1432.2054443359375, - "learning_rate": 4.406557933480664e-05, - "loss": 136.9116, - "step": 37690 - }, - { - "epoch": 0.3046243101511809, - "grad_norm": 1049.96630859375, - "learning_rate": 4.40610627752862e-05, - "loss": 133.9505, - "step": 37700 - }, - { - "epoch": 0.30470511235546505, - "grad_norm": 1372.518310546875, - "learning_rate": 4.405654472934483e-05, - "loss": 99.9223, - "step": 37710 - }, - { - "epoch": 0.3047859145597492, - "grad_norm": 1110.1002197265625, - "learning_rate": 4.4052025197334864e-05, - "loss": 115.3444, - "step": 37720 - }, - { - "epoch": 0.3048667167640333, - "grad_norm": 1247.1700439453125, - "learning_rate": 4.4047504179608755e-05, - "loss": 143.812, - "step": 37730 - }, - { - "epoch": 0.30494751896831745, - "grad_norm": 1031.3797607421875, - "learning_rate": 4.404298167651905e-05, - "loss": 74.8218, - "step": 37740 - }, - { - "epoch": 0.3050283211726016, - "grad_norm": 1569.984130859375, - "learning_rate": 4.403845768841842e-05, - "loss": 129.8514, - "step": 37750 - }, - { - "epoch": 0.3051091233768857, - "grad_norm": 872.8795166015625, - "learning_rate": 4.403393221565966e-05, - "loss": 142.0767, - "step": 37760 - }, - { - "epoch": 0.30518992558116986, - "grad_norm": 1905.04931640625, - "learning_rate": 4.402940525859568e-05, - "loss": 102.6401, - "step": 37770 - }, - { - "epoch": 0.305270727785454, - "grad_norm": 1085.094482421875, - "learning_rate": 4.40248768175795e-05, - "loss": 139.8112, - "step": 37780 - }, - { - "epoch": 0.30535152998973814, - "grad_norm": 611.7904663085938, - "learning_rate": 4.4020346892964246e-05, - "loss": 84.7788, - "step": 37790 - }, - { - "epoch": 0.30543233219402227, - "grad_norm": 931.4846801757812, - "learning_rate": 4.401581548510318e-05, - "loss": 89.6141, - "step": 37800 - }, - { - "epoch": 0.3055131343983064, - "grad_norm": 856.0546264648438, - "learning_rate": 4.401128259434968e-05, - "loss": 114.5535, - "step": 37810 - }, - { - "epoch": 0.30559393660259054, - "grad_norm": 1360.8074951171875, - "learning_rate": 4.4006748221057206e-05, - "loss": 117.5268, - "step": 37820 - }, - { - "epoch": 0.3056747388068746, - "grad_norm": 858.98876953125, - "learning_rate": 4.400221236557938e-05, - "loss": 134.4408, - "step": 37830 - }, - { - "epoch": 0.30575554101115876, - "grad_norm": 828.2800903320312, - "learning_rate": 4.3997675028269906e-05, - "loss": 122.7517, - "step": 37840 - }, - { - "epoch": 0.3058363432154429, - "grad_norm": 806.5180053710938, - "learning_rate": 4.399313620948262e-05, - "loss": 99.9936, - "step": 37850 - }, - { - "epoch": 0.30591714541972703, - "grad_norm": 874.6348266601562, - "learning_rate": 4.3988595909571464e-05, - "loss": 135.6958, - "step": 37860 - }, - { - "epoch": 0.30599794762401117, - "grad_norm": 966.5182495117188, - "learning_rate": 4.398405412889051e-05, - "loss": 94.1271, - "step": 37870 - }, - { - "epoch": 0.3060787498282953, - "grad_norm": 684.477294921875, - "learning_rate": 4.3979510867793917e-05, - "loss": 121.0109, - "step": 37880 - }, - { - "epoch": 0.30615955203257944, - "grad_norm": 776.9493408203125, - "learning_rate": 4.397496612663599e-05, - "loss": 122.9949, - "step": 37890 - }, - { - "epoch": 0.3062403542368636, - "grad_norm": 2081.328369140625, - "learning_rate": 4.3970419905771145e-05, - "loss": 173.5545, - "step": 37900 - }, - { - "epoch": 0.3063211564411477, - "grad_norm": 1486.5269775390625, - "learning_rate": 4.3965872205553885e-05, - "loss": 145.7915, - "step": 37910 - }, - { - "epoch": 0.30640195864543185, - "grad_norm": 948.6929931640625, - "learning_rate": 4.396132302633886e-05, - "loss": 116.5081, - "step": 37920 - }, - { - "epoch": 0.306482760849716, - "grad_norm": 1533.728271484375, - "learning_rate": 4.3956772368480836e-05, - "loss": 252.9777, - "step": 37930 - }, - { - "epoch": 0.3065635630540001, - "grad_norm": 1009.5624389648438, - "learning_rate": 4.395222023233466e-05, - "loss": 86.6304, - "step": 37940 - }, - { - "epoch": 0.30664436525828426, - "grad_norm": 817.4370727539062, - "learning_rate": 4.394766661825533e-05, - "loss": 114.7126, - "step": 37950 - }, - { - "epoch": 0.3067251674625684, - "grad_norm": 1509.2158203125, - "learning_rate": 4.394311152659796e-05, - "loss": 89.7742, - "step": 37960 - }, - { - "epoch": 0.30680596966685253, - "grad_norm": 1312.8382568359375, - "learning_rate": 4.393855495771774e-05, - "loss": 137.7279, - "step": 37970 - }, - { - "epoch": 0.30688677187113667, - "grad_norm": 2152.255615234375, - "learning_rate": 4.393399691197e-05, - "loss": 146.6019, - "step": 37980 - }, - { - "epoch": 0.3069675740754208, - "grad_norm": 1140.3125, - "learning_rate": 4.392943738971021e-05, - "loss": 105.6107, - "step": 37990 - }, - { - "epoch": 0.30704837627970494, - "grad_norm": 812.2454833984375, - "learning_rate": 4.3924876391293915e-05, - "loss": 129.91, - "step": 38000 - }, - { - "epoch": 0.307129178483989, - "grad_norm": 947.0767211914062, - "learning_rate": 4.3920313917076794e-05, - "loss": 95.957, - "step": 38010 - }, - { - "epoch": 0.30720998068827315, - "grad_norm": 954.76953125, - "learning_rate": 4.391574996741463e-05, - "loss": 114.6525, - "step": 38020 - }, - { - "epoch": 0.3072907828925573, - "grad_norm": 1447.2447509765625, - "learning_rate": 4.3911184542663344e-05, - "loss": 139.2432, - "step": 38030 - }, - { - "epoch": 0.3073715850968414, - "grad_norm": 803.22021484375, - "learning_rate": 4.390661764317895e-05, - "loss": 114.9043, - "step": 38040 - }, - { - "epoch": 0.30745238730112556, - "grad_norm": 1779.864501953125, - "learning_rate": 4.390204926931758e-05, - "loss": 95.5969, - "step": 38050 - }, - { - "epoch": 0.3075331895054097, - "grad_norm": 986.6395263671875, - "learning_rate": 4.38974794214355e-05, - "loss": 118.0821, - "step": 38060 - }, - { - "epoch": 0.30761399170969383, - "grad_norm": 963.3289184570312, - "learning_rate": 4.3892908099889054e-05, - "loss": 126.8173, - "step": 38070 - }, - { - "epoch": 0.30769479391397797, - "grad_norm": 856.3060302734375, - "learning_rate": 4.388833530503473e-05, - "loss": 150.2272, - "step": 38080 - }, - { - "epoch": 0.3077755961182621, - "grad_norm": 596.4969482421875, - "learning_rate": 4.3883761037229146e-05, - "loss": 111.8843, - "step": 38090 - }, - { - "epoch": 0.30785639832254624, - "grad_norm": 1166.034423828125, - "learning_rate": 4.387918529682898e-05, - "loss": 102.7273, - "step": 38100 - }, - { - "epoch": 0.3079372005268304, - "grad_norm": 661.765625, - "learning_rate": 4.387460808419108e-05, - "loss": 137.9503, - "step": 38110 - }, - { - "epoch": 0.3080180027311145, - "grad_norm": 1324.128173828125, - "learning_rate": 4.387002939967237e-05, - "loss": 130.0307, - "step": 38120 - }, - { - "epoch": 0.30809880493539865, - "grad_norm": 636.979736328125, - "learning_rate": 4.386544924362993e-05, - "loss": 118.3966, - "step": 38130 - }, - { - "epoch": 0.3081796071396828, - "grad_norm": 693.372314453125, - "learning_rate": 4.386086761642091e-05, - "loss": 120.2471, - "step": 38140 - }, - { - "epoch": 0.3082604093439669, - "grad_norm": 793.03564453125, - "learning_rate": 4.3856284518402594e-05, - "loss": 149.7395, - "step": 38150 - }, - { - "epoch": 0.30834121154825106, - "grad_norm": 1197.666748046875, - "learning_rate": 4.3851699949932396e-05, - "loss": 128.0856, - "step": 38160 - }, - { - "epoch": 0.3084220137525352, - "grad_norm": 1568.858154296875, - "learning_rate": 4.384711391136781e-05, - "loss": 106.0753, - "step": 38170 - }, - { - "epoch": 0.3085028159568193, - "grad_norm": 1025.5081787109375, - "learning_rate": 4.3842526403066486e-05, - "loss": 88.1382, - "step": 38180 - }, - { - "epoch": 0.3085836181611034, - "grad_norm": 894.8125610351562, - "learning_rate": 4.383793742538616e-05, - "loss": 155.592, - "step": 38190 - }, - { - "epoch": 0.30866442036538755, - "grad_norm": 921.5929565429688, - "learning_rate": 4.3833346978684675e-05, - "loss": 127.5323, - "step": 38200 - }, - { - "epoch": 0.3087452225696717, - "grad_norm": 1084.8704833984375, - "learning_rate": 4.3828755063320016e-05, - "loss": 113.9268, - "step": 38210 - }, - { - "epoch": 0.3088260247739558, - "grad_norm": 966.5870361328125, - "learning_rate": 4.382416167965028e-05, - "loss": 101.7444, - "step": 38220 - }, - { - "epoch": 0.30890682697823996, - "grad_norm": 1390.020263671875, - "learning_rate": 4.381956682803365e-05, - "loss": 112.2445, - "step": 38230 - }, - { - "epoch": 0.3089876291825241, - "grad_norm": 1229.51123046875, - "learning_rate": 4.381497050882845e-05, - "loss": 111.7491, - "step": 38240 - }, - { - "epoch": 0.30906843138680823, - "grad_norm": 777.4800415039062, - "learning_rate": 4.381037272239311e-05, - "loss": 94.3155, - "step": 38250 - }, - { - "epoch": 0.30914923359109237, - "grad_norm": 678.0169067382812, - "learning_rate": 4.380577346908618e-05, - "loss": 119.7754, - "step": 38260 - }, - { - "epoch": 0.3092300357953765, - "grad_norm": 1025.1263427734375, - "learning_rate": 4.380117274926631e-05, - "loss": 128.2441, - "step": 38270 - }, - { - "epoch": 0.30931083799966064, - "grad_norm": 1335.092041015625, - "learning_rate": 4.379657056329228e-05, - "loss": 167.0427, - "step": 38280 - }, - { - "epoch": 0.3093916402039448, - "grad_norm": 1194.683349609375, - "learning_rate": 4.379196691152298e-05, - "loss": 123.6019, - "step": 38290 - }, - { - "epoch": 0.3094724424082289, - "grad_norm": 1275.6986083984375, - "learning_rate": 4.3787361794317405e-05, - "loss": 122.2862, - "step": 38300 - }, - { - "epoch": 0.30955324461251305, - "grad_norm": 979.6260375976562, - "learning_rate": 4.3782755212034675e-05, - "loss": 151.8163, - "step": 38310 - }, - { - "epoch": 0.3096340468167972, - "grad_norm": 844.6943359375, - "learning_rate": 4.3778147165034025e-05, - "loss": 148.6413, - "step": 38320 - }, - { - "epoch": 0.3097148490210813, - "grad_norm": 861.6826782226562, - "learning_rate": 4.377353765367479e-05, - "loss": 141.0275, - "step": 38330 - }, - { - "epoch": 0.30979565122536545, - "grad_norm": 824.5519409179688, - "learning_rate": 4.376892667831644e-05, - "loss": 85.7971, - "step": 38340 - }, - { - "epoch": 0.30987645342964953, - "grad_norm": 1542.6492919921875, - "learning_rate": 4.376431423931853e-05, - "loss": 134.0757, - "step": 38350 - }, - { - "epoch": 0.30995725563393367, - "grad_norm": 1860.0059814453125, - "learning_rate": 4.375970033704077e-05, - "loss": 130.4323, - "step": 38360 - }, - { - "epoch": 0.3100380578382178, - "grad_norm": 1580.017578125, - "learning_rate": 4.3755084971842954e-05, - "loss": 94.0326, - "step": 38370 - }, - { - "epoch": 0.31011886004250194, - "grad_norm": 1404.3966064453125, - "learning_rate": 4.375046814408499e-05, - "loss": 116.8863, - "step": 38380 - }, - { - "epoch": 0.3101996622467861, - "grad_norm": 1407.6103515625, - "learning_rate": 4.374584985412692e-05, - "loss": 111.7966, - "step": 38390 - }, - { - "epoch": 0.3102804644510702, - "grad_norm": 1489.8251953125, - "learning_rate": 4.374123010232888e-05, - "loss": 110.4587, - "step": 38400 - }, - { - "epoch": 0.31036126665535435, - "grad_norm": 1284.820556640625, - "learning_rate": 4.373660888905113e-05, - "loss": 108.2251, - "step": 38410 - }, - { - "epoch": 0.3104420688596385, - "grad_norm": 1439.919677734375, - "learning_rate": 4.373198621465404e-05, - "loss": 129.1054, - "step": 38420 - }, - { - "epoch": 0.3105228710639226, - "grad_norm": 863.4996337890625, - "learning_rate": 4.372736207949809e-05, - "loss": 129.7889, - "step": 38430 - }, - { - "epoch": 0.31060367326820676, - "grad_norm": 947.4799194335938, - "learning_rate": 4.372273648394389e-05, - "loss": 112.2936, - "step": 38440 - }, - { - "epoch": 0.3106844754724909, - "grad_norm": 801.3700561523438, - "learning_rate": 4.371810942835215e-05, - "loss": 87.9463, - "step": 38450 - }, - { - "epoch": 0.31076527767677503, - "grad_norm": 865.8770141601562, - "learning_rate": 4.37134809130837e-05, - "loss": 113.4838, - "step": 38460 - }, - { - "epoch": 0.31084607988105917, - "grad_norm": 745.05810546875, - "learning_rate": 4.370885093849948e-05, - "loss": 80.5933, - "step": 38470 - }, - { - "epoch": 0.3109268820853433, - "grad_norm": 952.232421875, - "learning_rate": 4.370421950496054e-05, - "loss": 138.4925, - "step": 38480 - }, - { - "epoch": 0.31100768428962744, - "grad_norm": 844.3306274414062, - "learning_rate": 4.369958661282805e-05, - "loss": 108.0141, - "step": 38490 - }, - { - "epoch": 0.3110884864939116, - "grad_norm": 1474.55419921875, - "learning_rate": 4.36949522624633e-05, - "loss": 130.476, - "step": 38500 - }, - { - "epoch": 0.3111692886981957, - "grad_norm": 1357.7593994140625, - "learning_rate": 4.3690316454227674e-05, - "loss": 130.1099, - "step": 38510 - }, - { - "epoch": 0.3112500909024798, - "grad_norm": 575.6583251953125, - "learning_rate": 4.368567918848269e-05, - "loss": 122.2708, - "step": 38520 - }, - { - "epoch": 0.31133089310676393, - "grad_norm": 1357.952392578125, - "learning_rate": 4.3681040465589976e-05, - "loss": 128.5893, - "step": 38530 - }, - { - "epoch": 0.31141169531104806, - "grad_norm": 1329.2703857421875, - "learning_rate": 4.3676400285911256e-05, - "loss": 113.2182, - "step": 38540 - }, - { - "epoch": 0.3114924975153322, - "grad_norm": 1224.45703125, - "learning_rate": 4.367175864980839e-05, - "loss": 122.2324, - "step": 38550 - }, - { - "epoch": 0.31157329971961634, - "grad_norm": 1179.831787109375, - "learning_rate": 4.3667115557643336e-05, - "loss": 117.856, - "step": 38560 - }, - { - "epoch": 0.3116541019239005, - "grad_norm": 1122.662841796875, - "learning_rate": 4.366247100977818e-05, - "loss": 107.4483, - "step": 38570 - }, - { - "epoch": 0.3117349041281846, - "grad_norm": 1522.09130859375, - "learning_rate": 4.3657825006575106e-05, - "loss": 118.0557, - "step": 38580 - }, - { - "epoch": 0.31181570633246875, - "grad_norm": 830.7593383789062, - "learning_rate": 4.3653177548396426e-05, - "loss": 119.6881, - "step": 38590 - }, - { - "epoch": 0.3118965085367529, - "grad_norm": 717.469482421875, - "learning_rate": 4.3648528635604556e-05, - "loss": 109.8253, - "step": 38600 - }, - { - "epoch": 0.311977310741037, - "grad_norm": 1047.29541015625, - "learning_rate": 4.364387826856202e-05, - "loss": 113.5046, - "step": 38610 - }, - { - "epoch": 0.31205811294532115, - "grad_norm": 720.7019653320312, - "learning_rate": 4.363922644763147e-05, - "loss": 129.2219, - "step": 38620 - }, - { - "epoch": 0.3121389151496053, - "grad_norm": 1761.85986328125, - "learning_rate": 4.363457317317567e-05, - "loss": 147.9658, - "step": 38630 - }, - { - "epoch": 0.3122197173538894, - "grad_norm": 978.6909790039062, - "learning_rate": 4.362991844555749e-05, - "loss": 79.9376, - "step": 38640 - }, - { - "epoch": 0.31230051955817356, - "grad_norm": 705.4371948242188, - "learning_rate": 4.3625262265139906e-05, - "loss": 126.836, - "step": 38650 - }, - { - "epoch": 0.3123813217624577, - "grad_norm": 754.1978149414062, - "learning_rate": 4.3620604632286024e-05, - "loss": 124.1801, - "step": 38660 - }, - { - "epoch": 0.31246212396674183, - "grad_norm": 1324.1651611328125, - "learning_rate": 4.361594554735905e-05, - "loss": 127.1789, - "step": 38670 - }, - { - "epoch": 0.31254292617102597, - "grad_norm": 1993.076416015625, - "learning_rate": 4.361128501072231e-05, - "loss": 141.09, - "step": 38680 - }, - { - "epoch": 0.3126237283753101, - "grad_norm": 913.458740234375, - "learning_rate": 4.360662302273925e-05, - "loss": 113.3142, - "step": 38690 - }, - { - "epoch": 0.3127045305795942, - "grad_norm": 587.2592163085938, - "learning_rate": 4.3601959583773415e-05, - "loss": 143.8163, - "step": 38700 - }, - { - "epoch": 0.3127853327838783, - "grad_norm": 1072.756591796875, - "learning_rate": 4.3597294694188475e-05, - "loss": 140.5, - "step": 38710 - }, - { - "epoch": 0.31286613498816246, - "grad_norm": 574.6174926757812, - "learning_rate": 4.35926283543482e-05, - "loss": 98.7149, - "step": 38720 - }, - { - "epoch": 0.3129469371924466, - "grad_norm": 837.947021484375, - "learning_rate": 4.358796056461648e-05, - "loss": 159.1835, - "step": 38730 - }, - { - "epoch": 0.31302773939673073, - "grad_norm": 674.5875854492188, - "learning_rate": 4.358329132535733e-05, - "loss": 163.4595, - "step": 38740 - }, - { - "epoch": 0.31310854160101487, - "grad_norm": 3087.755615234375, - "learning_rate": 4.357862063693486e-05, - "loss": 129.5215, - "step": 38750 - }, - { - "epoch": 0.313189343805299, - "grad_norm": 1132.33056640625, - "learning_rate": 4.35739484997133e-05, - "loss": 168.4956, - "step": 38760 - }, - { - "epoch": 0.31327014600958314, - "grad_norm": 1033.60498046875, - "learning_rate": 4.356927491405699e-05, - "loss": 101.4553, - "step": 38770 - }, - { - "epoch": 0.3133509482138673, - "grad_norm": 902.907470703125, - "learning_rate": 4.356459988033039e-05, - "loss": 101.9961, - "step": 38780 - }, - { - "epoch": 0.3134317504181514, - "grad_norm": 1353.718994140625, - "learning_rate": 4.355992339889806e-05, - "loss": 114.7233, - "step": 38790 - }, - { - "epoch": 0.31351255262243555, - "grad_norm": 3347.521240234375, - "learning_rate": 4.355524547012471e-05, - "loss": 131.8447, - "step": 38800 - }, - { - "epoch": 0.3135933548267197, - "grad_norm": 1190.19189453125, - "learning_rate": 4.3550566094375086e-05, - "loss": 93.5287, - "step": 38810 - }, - { - "epoch": 0.3136741570310038, - "grad_norm": 775.0213012695312, - "learning_rate": 4.354588527201414e-05, - "loss": 142.9776, - "step": 38820 - }, - { - "epoch": 0.31375495923528796, - "grad_norm": 1168.238037109375, - "learning_rate": 4.354120300340688e-05, - "loss": 108.127, - "step": 38830 - }, - { - "epoch": 0.3138357614395721, - "grad_norm": 1076.7213134765625, - "learning_rate": 4.353651928891842e-05, - "loss": 112.3075, - "step": 38840 - }, - { - "epoch": 0.31391656364385623, - "grad_norm": 966.3694458007812, - "learning_rate": 4.3531834128914025e-05, - "loss": 94.956, - "step": 38850 - }, - { - "epoch": 0.31399736584814036, - "grad_norm": 1032.4862060546875, - "learning_rate": 4.352714752375906e-05, - "loss": 147.1205, - "step": 38860 - }, - { - "epoch": 0.31407816805242444, - "grad_norm": 792.0556030273438, - "learning_rate": 4.352245947381897e-05, - "loss": 143.4195, - "step": 38870 - }, - { - "epoch": 0.3141589702567086, - "grad_norm": 899.2998046875, - "learning_rate": 4.351776997945936e-05, - "loss": 117.9321, - "step": 38880 - }, - { - "epoch": 0.3142397724609927, - "grad_norm": 1030.437744140625, - "learning_rate": 4.351307904104592e-05, - "loss": 103.507, - "step": 38890 - }, - { - "epoch": 0.31432057466527685, - "grad_norm": 2323.088134765625, - "learning_rate": 4.350838665894446e-05, - "loss": 124.9846, - "step": 38900 - }, - { - "epoch": 0.314401376869561, - "grad_norm": 980.265625, - "learning_rate": 4.3503692833520894e-05, - "loss": 95.4946, - "step": 38910 - }, - { - "epoch": 0.3144821790738451, - "grad_norm": 770.2604370117188, - "learning_rate": 4.3498997565141267e-05, - "loss": 89.1535, - "step": 38920 - }, - { - "epoch": 0.31456298127812926, - "grad_norm": 2414.138427734375, - "learning_rate": 4.3494300854171715e-05, - "loss": 140.2339, - "step": 38930 - }, - { - "epoch": 0.3146437834824134, - "grad_norm": 1284.97705078125, - "learning_rate": 4.348960270097851e-05, - "loss": 106.5645, - "step": 38940 - }, - { - "epoch": 0.31472458568669753, - "grad_norm": 1147.8294677734375, - "learning_rate": 4.348490310592801e-05, - "loss": 135.7514, - "step": 38950 - }, - { - "epoch": 0.31480538789098167, - "grad_norm": 1143.9412841796875, - "learning_rate": 4.348020206938672e-05, - "loss": 128.0387, - "step": 38960 - }, - { - "epoch": 0.3148861900952658, - "grad_norm": 3205.775390625, - "learning_rate": 4.347549959172121e-05, - "loss": 117.5442, - "step": 38970 - }, - { - "epoch": 0.31496699229954994, - "grad_norm": 2403.03515625, - "learning_rate": 4.3470795673298206e-05, - "loss": 120.8946, - "step": 38980 - }, - { - "epoch": 0.3150477945038341, - "grad_norm": 690.9103393554688, - "learning_rate": 4.346609031448452e-05, - "loss": 72.8542, - "step": 38990 - }, - { - "epoch": 0.3151285967081182, - "grad_norm": 1095.7021484375, - "learning_rate": 4.3461383515647106e-05, - "loss": 117.6166, - "step": 39000 - }, - { - "epoch": 0.31520939891240235, - "grad_norm": 1060.40625, - "learning_rate": 4.3456675277152973e-05, - "loss": 89.0072, - "step": 39010 - }, - { - "epoch": 0.3152902011166865, - "grad_norm": 955.5571899414062, - "learning_rate": 4.345196559936932e-05, - "loss": 133.9836, - "step": 39020 - }, - { - "epoch": 0.3153710033209706, - "grad_norm": 701.19140625, - "learning_rate": 4.344725448266338e-05, - "loss": 98.5555, - "step": 39030 - }, - { - "epoch": 0.3154518055252547, - "grad_norm": 861.9324340820312, - "learning_rate": 4.3442541927402566e-05, - "loss": 98.312, - "step": 39040 - }, - { - "epoch": 0.31553260772953884, - "grad_norm": 812.0772094726562, - "learning_rate": 4.343782793395435e-05, - "loss": 108.8839, - "step": 39050 - }, - { - "epoch": 0.315613409933823, - "grad_norm": 1177.6285400390625, - "learning_rate": 4.3433112502686355e-05, - "loss": 112.737, - "step": 39060 - }, - { - "epoch": 0.3156942121381071, - "grad_norm": 775.11181640625, - "learning_rate": 4.342839563396629e-05, - "loss": 111.2105, - "step": 39070 - }, - { - "epoch": 0.31577501434239125, - "grad_norm": 1149.7894287109375, - "learning_rate": 4.3423677328161996e-05, - "loss": 118.0603, - "step": 39080 - }, - { - "epoch": 0.3158558165466754, - "grad_norm": 1137.329833984375, - "learning_rate": 4.341895758564141e-05, - "loss": 121.5907, - "step": 39090 - }, - { - "epoch": 0.3159366187509595, - "grad_norm": 2528.37841796875, - "learning_rate": 4.3414236406772584e-05, - "loss": 156.1274, - "step": 39100 - }, - { - "epoch": 0.31601742095524366, - "grad_norm": 1206.318115234375, - "learning_rate": 4.340951379192369e-05, - "loss": 132.1304, - "step": 39110 - }, - { - "epoch": 0.3160982231595278, - "grad_norm": 932.46435546875, - "learning_rate": 4.3404789741463e-05, - "loss": 104.3671, - "step": 39120 - }, - { - "epoch": 0.3161790253638119, - "grad_norm": 1179.34765625, - "learning_rate": 4.340006425575892e-05, - "loss": 83.7175, - "step": 39130 - }, - { - "epoch": 0.31625982756809606, - "grad_norm": 1198.925537109375, - "learning_rate": 4.3395337335179945e-05, - "loss": 124.2574, - "step": 39140 - }, - { - "epoch": 0.3163406297723802, - "grad_norm": 673.528076171875, - "learning_rate": 4.339060898009469e-05, - "loss": 119.1199, - "step": 39150 - }, - { - "epoch": 0.31642143197666434, - "grad_norm": 650.783447265625, - "learning_rate": 4.338587919087187e-05, - "loss": 88.8582, - "step": 39160 - }, - { - "epoch": 0.31650223418094847, - "grad_norm": 1518.3681640625, - "learning_rate": 4.338114796788035e-05, - "loss": 166.7917, - "step": 39170 - }, - { - "epoch": 0.3165830363852326, - "grad_norm": 639.2760009765625, - "learning_rate": 4.3376415311489056e-05, - "loss": 110.966, - "step": 39180 - }, - { - "epoch": 0.31666383858951674, - "grad_norm": 642.9440307617188, - "learning_rate": 4.337168122206706e-05, - "loss": 145.9575, - "step": 39190 - }, - { - "epoch": 0.3167446407938009, - "grad_norm": 951.6559448242188, - "learning_rate": 4.336694569998354e-05, - "loss": 112.331, - "step": 39200 - }, - { - "epoch": 0.31682544299808496, - "grad_norm": 1648.744873046875, - "learning_rate": 4.336220874560778e-05, - "loss": 117.1864, - "step": 39210 - }, - { - "epoch": 0.3169062452023691, - "grad_norm": 1047.643310546875, - "learning_rate": 4.335747035930916e-05, - "loss": 98.1013, - "step": 39220 - }, - { - "epoch": 0.31698704740665323, - "grad_norm": 1020.3357543945312, - "learning_rate": 4.335273054145722e-05, - "loss": 142.1662, - "step": 39230 - }, - { - "epoch": 0.31706784961093737, - "grad_norm": 1175.232177734375, - "learning_rate": 4.334798929242155e-05, - "loss": 118.3595, - "step": 39240 - }, - { - "epoch": 0.3171486518152215, - "grad_norm": 1336.9315185546875, - "learning_rate": 4.334324661257191e-05, - "loss": 115.2342, - "step": 39250 - }, - { - "epoch": 0.31722945401950564, - "grad_norm": 830.479248046875, - "learning_rate": 4.3338502502278134e-05, - "loss": 122.526, - "step": 39260 - }, - { - "epoch": 0.3173102562237898, - "grad_norm": 1046.214599609375, - "learning_rate": 4.3333756961910166e-05, - "loss": 103.9069, - "step": 39270 - }, - { - "epoch": 0.3173910584280739, - "grad_norm": 1311.0205078125, - "learning_rate": 4.3329009991838084e-05, - "loss": 96.0481, - "step": 39280 - }, - { - "epoch": 0.31747186063235805, - "grad_norm": 1204.68701171875, - "learning_rate": 4.3324261592432056e-05, - "loss": 101.876, - "step": 39290 - }, - { - "epoch": 0.3175526628366422, - "grad_norm": 1037.3402099609375, - "learning_rate": 4.331951176406239e-05, - "loss": 95.0132, - "step": 39300 - }, - { - "epoch": 0.3176334650409263, - "grad_norm": 890.4486694335938, - "learning_rate": 4.331476050709948e-05, - "loss": 101.8592, - "step": 39310 - }, - { - "epoch": 0.31771426724521046, - "grad_norm": 634.5355224609375, - "learning_rate": 4.3310007821913836e-05, - "loss": 113.4796, - "step": 39320 - }, - { - "epoch": 0.3177950694494946, - "grad_norm": 470.7014465332031, - "learning_rate": 4.330525370887607e-05, - "loss": 103.1893, - "step": 39330 - }, - { - "epoch": 0.31787587165377873, - "grad_norm": 870.4022827148438, - "learning_rate": 4.330049816835694e-05, - "loss": 140.5201, - "step": 39340 - }, - { - "epoch": 0.31795667385806287, - "grad_norm": 1266.8585205078125, - "learning_rate": 4.329574120072728e-05, - "loss": 153.9299, - "step": 39350 - }, - { - "epoch": 0.318037476062347, - "grad_norm": 757.488525390625, - "learning_rate": 4.3290982806358046e-05, - "loss": 111.1849, - "step": 39360 - }, - { - "epoch": 0.31811827826663114, - "grad_norm": 786.891357421875, - "learning_rate": 4.328622298562033e-05, - "loss": 136.8115, - "step": 39370 - }, - { - "epoch": 0.3181990804709153, - "grad_norm": 1138.264892578125, - "learning_rate": 4.3281461738885274e-05, - "loss": 126.7419, - "step": 39380 - }, - { - "epoch": 0.31827988267519935, - "grad_norm": 1188.788818359375, - "learning_rate": 4.327669906652421e-05, - "loss": 123.707, - "step": 39390 - }, - { - "epoch": 0.3183606848794835, - "grad_norm": 1289.3082275390625, - "learning_rate": 4.3271934968908514e-05, - "loss": 122.8561, - "step": 39400 - }, - { - "epoch": 0.3184414870837676, - "grad_norm": 789.050048828125, - "learning_rate": 4.32671694464097e-05, - "loss": 98.2394, - "step": 39410 - }, - { - "epoch": 0.31852228928805176, - "grad_norm": 1768.001708984375, - "learning_rate": 4.3262402499399404e-05, - "loss": 119.3589, - "step": 39420 - }, - { - "epoch": 0.3186030914923359, - "grad_norm": 720.7200317382812, - "learning_rate": 4.325763412824937e-05, - "loss": 126.0495, - "step": 39430 - }, - { - "epoch": 0.31868389369662004, - "grad_norm": 923.09912109375, - "learning_rate": 4.325286433333142e-05, - "loss": 132.648, - "step": 39440 - }, - { - "epoch": 0.31876469590090417, - "grad_norm": 755.3150634765625, - "learning_rate": 4.324809311501754e-05, - "loss": 113.4982, - "step": 39450 - }, - { - "epoch": 0.3188454981051883, - "grad_norm": 742.4007568359375, - "learning_rate": 4.3243320473679785e-05, - "loss": 115.1539, - "step": 39460 - }, - { - "epoch": 0.31892630030947244, - "grad_norm": 838.2288208007812, - "learning_rate": 4.323854640969033e-05, - "loss": 108.5719, - "step": 39470 - }, - { - "epoch": 0.3190071025137566, - "grad_norm": 822.6356811523438, - "learning_rate": 4.323377092342148e-05, - "loss": 109.1691, - "step": 39480 - }, - { - "epoch": 0.3190879047180407, - "grad_norm": 1178.4033203125, - "learning_rate": 4.322899401524563e-05, - "loss": 135.6591, - "step": 39490 - }, - { - "epoch": 0.31916870692232485, - "grad_norm": 652.5155639648438, - "learning_rate": 4.3224215685535294e-05, - "loss": 117.6863, - "step": 39500 - }, - { - "epoch": 0.319249509126609, - "grad_norm": 682.0370483398438, - "learning_rate": 4.321943593466309e-05, - "loss": 103.8676, - "step": 39510 - }, - { - "epoch": 0.3193303113308931, - "grad_norm": 1420.8062744140625, - "learning_rate": 4.321465476300177e-05, - "loss": 136.662, - "step": 39520 - }, - { - "epoch": 0.31941111353517726, - "grad_norm": 698.1507568359375, - "learning_rate": 4.320987217092416e-05, - "loss": 105.2421, - "step": 39530 - }, - { - "epoch": 0.3194919157394614, - "grad_norm": 1522.26904296875, - "learning_rate": 4.3205088158803226e-05, - "loss": 116.2464, - "step": 39540 - }, - { - "epoch": 0.31957271794374553, - "grad_norm": 1105.83544921875, - "learning_rate": 4.320030272701203e-05, - "loss": 90.8007, - "step": 39550 - }, - { - "epoch": 0.3196535201480296, - "grad_norm": 536.1699829101562, - "learning_rate": 4.319551587592376e-05, - "loss": 99.7324, - "step": 39560 - }, - { - "epoch": 0.31973432235231375, - "grad_norm": 1099.2618408203125, - "learning_rate": 4.31907276059117e-05, - "loss": 125.6454, - "step": 39570 - }, - { - "epoch": 0.3198151245565979, - "grad_norm": 1193.3060302734375, - "learning_rate": 4.318593791734924e-05, - "loss": 121.2533, - "step": 39580 - }, - { - "epoch": 0.319895926760882, - "grad_norm": 919.3883666992188, - "learning_rate": 4.31811468106099e-05, - "loss": 122.7136, - "step": 39590 - }, - { - "epoch": 0.31997672896516616, - "grad_norm": 1152.9061279296875, - "learning_rate": 4.31763542860673e-05, - "loss": 122.5919, - "step": 39600 - }, - { - "epoch": 0.3200575311694503, - "grad_norm": 905.7959594726562, - "learning_rate": 4.3171560344095164e-05, - "loss": 136.4776, - "step": 39610 - }, - { - "epoch": 0.32013833337373443, - "grad_norm": 873.199951171875, - "learning_rate": 4.3166764985067343e-05, - "loss": 111.6839, - "step": 39620 - }, - { - "epoch": 0.32021913557801857, - "grad_norm": 1683.5948486328125, - "learning_rate": 4.3161968209357776e-05, - "loss": 116.4151, - "step": 39630 - }, - { - "epoch": 0.3202999377823027, - "grad_norm": 791.1875, - "learning_rate": 4.3157170017340545e-05, - "loss": 121.1761, - "step": 39640 - }, - { - "epoch": 0.32038073998658684, - "grad_norm": 5690.720703125, - "learning_rate": 4.3152370409389795e-05, - "loss": 138.5395, - "step": 39650 - }, - { - "epoch": 0.320461542190871, - "grad_norm": 1950.7032470703125, - "learning_rate": 4.314756938587984e-05, - "loss": 156.0846, - "step": 39660 - }, - { - "epoch": 0.3205423443951551, - "grad_norm": 960.7974853515625, - "learning_rate": 4.3142766947185056e-05, - "loss": 121.0033, - "step": 39670 - }, - { - "epoch": 0.32062314659943925, - "grad_norm": 1366.6922607421875, - "learning_rate": 4.3137963093679945e-05, - "loss": 119.4104, - "step": 39680 - }, - { - "epoch": 0.3207039488037234, - "grad_norm": 1155.9256591796875, - "learning_rate": 4.313315782573913e-05, - "loss": 112.0471, - "step": 39690 - }, - { - "epoch": 0.3207847510080075, - "grad_norm": 1377.0623779296875, - "learning_rate": 4.3128351143737335e-05, - "loss": 182.7688, - "step": 39700 - }, - { - "epoch": 0.32086555321229165, - "grad_norm": 1066.0819091796875, - "learning_rate": 4.312354304804939e-05, - "loss": 143.873, - "step": 39710 - }, - { - "epoch": 0.3209463554165758, - "grad_norm": 1220.1307373046875, - "learning_rate": 4.3118733539050244e-05, - "loss": 124.0734, - "step": 39720 - }, - { - "epoch": 0.32102715762085987, - "grad_norm": 646.0499267578125, - "learning_rate": 4.311392261711495e-05, - "loss": 146.0453, - "step": 39730 - }, - { - "epoch": 0.321107959825144, - "grad_norm": 604.1016845703125, - "learning_rate": 4.310911028261867e-05, - "loss": 104.298, - "step": 39740 - }, - { - "epoch": 0.32118876202942814, - "grad_norm": 865.0166625976562, - "learning_rate": 4.3104296535936695e-05, - "loss": 105.7755, - "step": 39750 - }, - { - "epoch": 0.3212695642337123, - "grad_norm": 1403.947265625, - "learning_rate": 4.3099481377444384e-05, - "loss": 118.2042, - "step": 39760 - }, - { - "epoch": 0.3213503664379964, - "grad_norm": 645.03466796875, - "learning_rate": 4.309466480751726e-05, - "loss": 129.1096, - "step": 39770 - }, - { - "epoch": 0.32143116864228055, - "grad_norm": 1202.8157958984375, - "learning_rate": 4.308984682653092e-05, - "loss": 103.1521, - "step": 39780 - }, - { - "epoch": 0.3215119708465647, - "grad_norm": 1066.569091796875, - "learning_rate": 4.308502743486107e-05, - "loss": 96.8323, - "step": 39790 - }, - { - "epoch": 0.3215927730508488, - "grad_norm": 726.9209594726562, - "learning_rate": 4.3080206632883554e-05, - "loss": 113.3219, - "step": 39800 - }, - { - "epoch": 0.32167357525513296, - "grad_norm": 1058.8128662109375, - "learning_rate": 4.307538442097429e-05, - "loss": 84.1078, - "step": 39810 - }, - { - "epoch": 0.3217543774594171, - "grad_norm": 665.1336059570312, - "learning_rate": 4.307056079950934e-05, - "loss": 95.0601, - "step": 39820 - }, - { - "epoch": 0.32183517966370123, - "grad_norm": 932.8870239257812, - "learning_rate": 4.306573576886484e-05, - "loss": 123.7209, - "step": 39830 - }, - { - "epoch": 0.32191598186798537, - "grad_norm": 1023.8536376953125, - "learning_rate": 4.306090932941708e-05, - "loss": 78.0412, - "step": 39840 - }, - { - "epoch": 0.3219967840722695, - "grad_norm": 374.17401123046875, - "learning_rate": 4.305608148154242e-05, - "loss": 122.4146, - "step": 39850 - }, - { - "epoch": 0.32207758627655364, - "grad_norm": 867.6629028320312, - "learning_rate": 4.305125222561736e-05, - "loss": 117.9014, - "step": 39860 - }, - { - "epoch": 0.3221583884808378, - "grad_norm": 841.1533203125, - "learning_rate": 4.304642156201847e-05, - "loss": 100.4794, - "step": 39870 - }, - { - "epoch": 0.3222391906851219, - "grad_norm": 1411.64306640625, - "learning_rate": 4.304158949112247e-05, - "loss": 105.7429, - "step": 39880 - }, - { - "epoch": 0.32231999288940605, - "grad_norm": 660.92236328125, - "learning_rate": 4.303675601330618e-05, - "loss": 97.1658, - "step": 39890 - }, - { - "epoch": 0.32240079509369013, - "grad_norm": 587.6409301757812, - "learning_rate": 4.303192112894652e-05, - "loss": 104.4315, - "step": 39900 - }, - { - "epoch": 0.32248159729797427, - "grad_norm": 1262.100830078125, - "learning_rate": 4.3027084838420516e-05, - "loss": 139.2632, - "step": 39910 - }, - { - "epoch": 0.3225623995022584, - "grad_norm": 606.1165161132812, - "learning_rate": 4.302224714210532e-05, - "loss": 89.6003, - "step": 39920 - }, - { - "epoch": 0.32264320170654254, - "grad_norm": 1615.9356689453125, - "learning_rate": 4.301740804037819e-05, - "loss": 103.9455, - "step": 39930 - }, - { - "epoch": 0.3227240039108267, - "grad_norm": 2577.093994140625, - "learning_rate": 4.301256753361649e-05, - "loss": 132.5038, - "step": 39940 - }, - { - "epoch": 0.3228048061151108, - "grad_norm": 1625.3076171875, - "learning_rate": 4.3007725622197674e-05, - "loss": 124.3869, - "step": 39950 - }, - { - "epoch": 0.32288560831939495, - "grad_norm": 1049.01025390625, - "learning_rate": 4.3002882306499345e-05, - "loss": 117.5613, - "step": 39960 - }, - { - "epoch": 0.3229664105236791, - "grad_norm": 1337.247314453125, - "learning_rate": 4.299803758689919e-05, - "loss": 112.2433, - "step": 39970 - }, - { - "epoch": 0.3230472127279632, - "grad_norm": 749.79736328125, - "learning_rate": 4.2993191463774997e-05, - "loss": 115.3291, - "step": 39980 - }, - { - "epoch": 0.32312801493224735, - "grad_norm": 779.1278076171875, - "learning_rate": 4.2988343937504686e-05, - "loss": 117.4147, - "step": 39990 - }, - { - "epoch": 0.3232088171365315, - "grad_norm": 768.2434692382812, - "learning_rate": 4.2983495008466276e-05, - "loss": 115.9884, - "step": 40000 - }, - { - "epoch": 0.3232896193408156, - "grad_norm": 899.2980346679688, - "learning_rate": 4.29786446770379e-05, - "loss": 112.9874, - "step": 40010 - }, - { - "epoch": 0.32337042154509976, - "grad_norm": 2296.4775390625, - "learning_rate": 4.297379294359781e-05, - "loss": 121.5104, - "step": 40020 - }, - { - "epoch": 0.3234512237493839, - "grad_norm": 873.6467895507812, - "learning_rate": 4.2968939808524323e-05, - "loss": 107.9856, - "step": 40030 - }, - { - "epoch": 0.32353202595366803, - "grad_norm": 455.082275390625, - "learning_rate": 4.296408527219592e-05, - "loss": 127.3892, - "step": 40040 - }, - { - "epoch": 0.32361282815795217, - "grad_norm": 1413.2080078125, - "learning_rate": 4.2959229334991156e-05, - "loss": 96.3568, - "step": 40050 - }, - { - "epoch": 0.3236936303622363, - "grad_norm": 879.512939453125, - "learning_rate": 4.295437199728871e-05, - "loss": 129.104, - "step": 40060 - }, - { - "epoch": 0.3237744325665204, - "grad_norm": 1050.2576904296875, - "learning_rate": 4.294951325946737e-05, - "loss": 138.211, - "step": 40070 - }, - { - "epoch": 0.3238552347708045, - "grad_norm": 957.4417114257812, - "learning_rate": 4.294465312190603e-05, - "loss": 82.6545, - "step": 40080 - }, - { - "epoch": 0.32393603697508866, - "grad_norm": 1228.427490234375, - "learning_rate": 4.293979158498369e-05, - "loss": 120.2625, - "step": 40090 - }, - { - "epoch": 0.3240168391793728, - "grad_norm": 940.7662353515625, - "learning_rate": 4.293492864907947e-05, - "loss": 123.3591, - "step": 40100 - }, - { - "epoch": 0.32409764138365693, - "grad_norm": 905.258544921875, - "learning_rate": 4.293006431457258e-05, - "loss": 122.146, - "step": 40110 - }, - { - "epoch": 0.32417844358794107, - "grad_norm": 554.9683837890625, - "learning_rate": 4.292519858184236e-05, - "loss": 116.7726, - "step": 40120 - }, - { - "epoch": 0.3242592457922252, - "grad_norm": 772.2385864257812, - "learning_rate": 4.292033145126825e-05, - "loss": 102.5366, - "step": 40130 - }, - { - "epoch": 0.32434004799650934, - "grad_norm": 1022.6396484375, - "learning_rate": 4.291546292322979e-05, - "loss": 132.7645, - "step": 40140 - }, - { - "epoch": 0.3244208502007935, - "grad_norm": 974.7703857421875, - "learning_rate": 4.291059299810665e-05, - "loss": 93.4667, - "step": 40150 - }, - { - "epoch": 0.3245016524050776, - "grad_norm": 457.89599609375, - "learning_rate": 4.290572167627859e-05, - "loss": 108.8828, - "step": 40160 - }, - { - "epoch": 0.32458245460936175, - "grad_norm": 1314.8458251953125, - "learning_rate": 4.2900848958125485e-05, - "loss": 96.287, - "step": 40170 - }, - { - "epoch": 0.3246632568136459, - "grad_norm": 951.5792236328125, - "learning_rate": 4.289597484402732e-05, - "loss": 150.6653, - "step": 40180 - }, - { - "epoch": 0.32474405901793, - "grad_norm": 1148.9228515625, - "learning_rate": 4.289109933436419e-05, - "loss": 113.2348, - "step": 40190 - }, - { - "epoch": 0.32482486122221416, - "grad_norm": 1233.408203125, - "learning_rate": 4.2886222429516296e-05, - "loss": 110.7155, - "step": 40200 - }, - { - "epoch": 0.3249056634264983, - "grad_norm": 941.3579711914062, - "learning_rate": 4.288134412986395e-05, - "loss": 99.5515, - "step": 40210 - }, - { - "epoch": 0.32498646563078243, - "grad_norm": 979.3038330078125, - "learning_rate": 4.287646443578758e-05, - "loss": 110.8872, - "step": 40220 - }, - { - "epoch": 0.32506726783506656, - "grad_norm": 802.3125610351562, - "learning_rate": 4.28715833476677e-05, - "loss": 97.4228, - "step": 40230 - }, - { - "epoch": 0.3251480700393507, - "grad_norm": 580.212158203125, - "learning_rate": 4.2866700865884954e-05, - "loss": 100.0555, - "step": 40240 - }, - { - "epoch": 0.3252288722436348, - "grad_norm": 1882.9150390625, - "learning_rate": 4.2861816990820084e-05, - "loss": 122.3498, - "step": 40250 - }, - { - "epoch": 0.3253096744479189, - "grad_norm": 991.8593139648438, - "learning_rate": 4.285693172285396e-05, - "loss": 132.7911, - "step": 40260 - }, - { - "epoch": 0.32539047665220305, - "grad_norm": 914.2153930664062, - "learning_rate": 4.2852045062367516e-05, - "loss": 124.8931, - "step": 40270 - }, - { - "epoch": 0.3254712788564872, - "grad_norm": 1497.272705078125, - "learning_rate": 4.2847157009741856e-05, - "loss": 129.3443, - "step": 40280 - }, - { - "epoch": 0.3255520810607713, - "grad_norm": 1057.322265625, - "learning_rate": 4.284226756535814e-05, - "loss": 121.6815, - "step": 40290 - }, - { - "epoch": 0.32563288326505546, - "grad_norm": 1753.85400390625, - "learning_rate": 4.283737672959766e-05, - "loss": 107.63, - "step": 40300 - }, - { - "epoch": 0.3257136854693396, - "grad_norm": 898.4024658203125, - "learning_rate": 4.283248450284182e-05, - "loss": 94.2415, - "step": 40310 - }, - { - "epoch": 0.32579448767362373, - "grad_norm": 1062.618896484375, - "learning_rate": 4.2827590885472125e-05, - "loss": 132.3236, - "step": 40320 - }, - { - "epoch": 0.32587528987790787, - "grad_norm": 907.2509155273438, - "learning_rate": 4.2822695877870177e-05, - "loss": 114.8373, - "step": 40330 - }, - { - "epoch": 0.325956092082192, - "grad_norm": 1165.89208984375, - "learning_rate": 4.281779948041772e-05, - "loss": 103.2252, - "step": 40340 - }, - { - "epoch": 0.32603689428647614, - "grad_norm": 1217.372314453125, - "learning_rate": 4.2812901693496564e-05, - "loss": 105.4117, - "step": 40350 - }, - { - "epoch": 0.3261176964907603, - "grad_norm": 1906.8818359375, - "learning_rate": 4.2808002517488667e-05, - "loss": 140.8356, - "step": 40360 - }, - { - "epoch": 0.3261984986950444, - "grad_norm": 916.7537231445312, - "learning_rate": 4.280310195277606e-05, - "loss": 116.4482, - "step": 40370 - }, - { - "epoch": 0.32627930089932855, - "grad_norm": 566.9185791015625, - "learning_rate": 4.279819999974091e-05, - "loss": 91.6317, - "step": 40380 - }, - { - "epoch": 0.3263601031036127, - "grad_norm": 1333.477783203125, - "learning_rate": 4.279329665876548e-05, - "loss": 102.2559, - "step": 40390 - }, - { - "epoch": 0.3264409053078968, - "grad_norm": 1389.2467041015625, - "learning_rate": 4.278839193023214e-05, - "loss": 114.0456, - "step": 40400 - }, - { - "epoch": 0.32652170751218096, - "grad_norm": 1419.3245849609375, - "learning_rate": 4.278348581452337e-05, - "loss": 103.855, - "step": 40410 - }, - { - "epoch": 0.32660250971646504, - "grad_norm": 731.2609252929688, - "learning_rate": 4.2778578312021754e-05, - "loss": 97.3651, - "step": 40420 - }, - { - "epoch": 0.3266833119207492, - "grad_norm": 915.2779541015625, - "learning_rate": 4.277366942311001e-05, - "loss": 115.423, - "step": 40430 - }, - { - "epoch": 0.3267641141250333, - "grad_norm": 619.8368530273438, - "learning_rate": 4.2768759148170915e-05, - "loss": 97.7749, - "step": 40440 - }, - { - "epoch": 0.32684491632931745, - "grad_norm": 1194.5458984375, - "learning_rate": 4.276384748758741e-05, - "loss": 106.7146, - "step": 40450 - }, - { - "epoch": 0.3269257185336016, - "grad_norm": 1193.24658203125, - "learning_rate": 4.2758934441742496e-05, - "loss": 115.8756, - "step": 40460 - }, - { - "epoch": 0.3270065207378857, - "grad_norm": 795.9136352539062, - "learning_rate": 4.275402001101931e-05, - "loss": 133.3119, - "step": 40470 - }, - { - "epoch": 0.32708732294216986, - "grad_norm": 943.6404418945312, - "learning_rate": 4.274910419580108e-05, - "loss": 104.9797, - "step": 40480 - }, - { - "epoch": 0.327168125146454, - "grad_norm": 1569.99365234375, - "learning_rate": 4.2744186996471174e-05, - "loss": 125.1978, - "step": 40490 - }, - { - "epoch": 0.32724892735073813, - "grad_norm": 463.89105224609375, - "learning_rate": 4.273926841341302e-05, - "loss": 112.2322, - "step": 40500 - }, - { - "epoch": 0.32732972955502226, - "grad_norm": 448.5730285644531, - "learning_rate": 4.2734348447010206e-05, - "loss": 106.9901, - "step": 40510 - }, - { - "epoch": 0.3274105317593064, - "grad_norm": 807.04443359375, - "learning_rate": 4.272942709764638e-05, - "loss": 122.7582, - "step": 40520 - }, - { - "epoch": 0.32749133396359054, - "grad_norm": 1096.1396484375, - "learning_rate": 4.2724504365705314e-05, - "loss": 86.5552, - "step": 40530 - }, - { - "epoch": 0.3275721361678747, - "grad_norm": 680.6603393554688, - "learning_rate": 4.2719580251570915e-05, - "loss": 122.6824, - "step": 40540 - }, - { - "epoch": 0.3276529383721588, - "grad_norm": 603.6065063476562, - "learning_rate": 4.271465475562716e-05, - "loss": 96.2456, - "step": 40550 - }, - { - "epoch": 0.32773374057644294, - "grad_norm": 2812.093505859375, - "learning_rate": 4.270972787825815e-05, - "loss": 121.8358, - "step": 40560 - }, - { - "epoch": 0.3278145427807271, - "grad_norm": 1012.7164916992188, - "learning_rate": 4.27047996198481e-05, - "loss": 135.7245, - "step": 40570 - }, - { - "epoch": 0.3278953449850112, - "grad_norm": 642.3297729492188, - "learning_rate": 4.269986998078132e-05, - "loss": 102.9574, - "step": 40580 - }, - { - "epoch": 0.3279761471892953, - "grad_norm": 1766.62548828125, - "learning_rate": 4.269493896144224e-05, - "loss": 107.094, - "step": 40590 - }, - { - "epoch": 0.32805694939357943, - "grad_norm": 833.731201171875, - "learning_rate": 4.2690006562215384e-05, - "loss": 116.6608, - "step": 40600 - }, - { - "epoch": 0.32813775159786357, - "grad_norm": 1189.327880859375, - "learning_rate": 4.268507278348539e-05, - "loss": 78.9912, - "step": 40610 - }, - { - "epoch": 0.3282185538021477, - "grad_norm": 1034.6590576171875, - "learning_rate": 4.268013762563702e-05, - "loss": 108.6397, - "step": 40620 - }, - { - "epoch": 0.32829935600643184, - "grad_norm": 978.4329833984375, - "learning_rate": 4.2675201089055096e-05, - "loss": 131.9509, - "step": 40630 - }, - { - "epoch": 0.328380158210716, - "grad_norm": 782.1638793945312, - "learning_rate": 4.267026317412461e-05, - "loss": 102.386, - "step": 40640 - }, - { - "epoch": 0.3284609604150001, - "grad_norm": 2270.06494140625, - "learning_rate": 4.2665323881230624e-05, - "loss": 121.6748, - "step": 40650 - }, - { - "epoch": 0.32854176261928425, - "grad_norm": 1039.422119140625, - "learning_rate": 4.266038321075831e-05, - "loss": 113.124, - "step": 40660 - }, - { - "epoch": 0.3286225648235684, - "grad_norm": 1190.6673583984375, - "learning_rate": 4.265544116309294e-05, - "loss": 110.7518, - "step": 40670 - }, - { - "epoch": 0.3287033670278525, - "grad_norm": 696.2127685546875, - "learning_rate": 4.265049773861991e-05, - "loss": 110.6778, - "step": 40680 - }, - { - "epoch": 0.32878416923213666, - "grad_norm": 648.0077514648438, - "learning_rate": 4.2645552937724744e-05, - "loss": 145.3896, - "step": 40690 - }, - { - "epoch": 0.3288649714364208, - "grad_norm": 857.546875, - "learning_rate": 4.264060676079302e-05, - "loss": 100.0939, - "step": 40700 - }, - { - "epoch": 0.32894577364070493, - "grad_norm": 1062.038818359375, - "learning_rate": 4.263565920821046e-05, - "loss": 109.8498, - "step": 40710 - }, - { - "epoch": 0.32902657584498907, - "grad_norm": 1013.2899780273438, - "learning_rate": 4.263071028036288e-05, - "loss": 121.1266, - "step": 40720 - }, - { - "epoch": 0.3291073780492732, - "grad_norm": 1237.09326171875, - "learning_rate": 4.2625759977636214e-05, - "loss": 127.8479, - "step": 40730 - }, - { - "epoch": 0.32918818025355734, - "grad_norm": 945.4116821289062, - "learning_rate": 4.26208083004165e-05, - "loss": 112.6646, - "step": 40740 - }, - { - "epoch": 0.3292689824578415, - "grad_norm": 1012.056396484375, - "learning_rate": 4.261585524908987e-05, - "loss": 125.8222, - "step": 40750 - }, - { - "epoch": 0.32934978466212556, - "grad_norm": 1127.966064453125, - "learning_rate": 4.261090082404258e-05, - "loss": 133.4863, - "step": 40760 - }, - { - "epoch": 0.3294305868664097, - "grad_norm": 669.5462036132812, - "learning_rate": 4.260594502566097e-05, - "loss": 111.5103, - "step": 40770 - }, - { - "epoch": 0.3295113890706938, - "grad_norm": 1078.441650390625, - "learning_rate": 4.260098785433154e-05, - "loss": 108.1902, - "step": 40780 - }, - { - "epoch": 0.32959219127497796, - "grad_norm": 1380.302001953125, - "learning_rate": 4.2596029310440824e-05, - "loss": 113.2387, - "step": 40790 - }, - { - "epoch": 0.3296729934792621, - "grad_norm": 2181.116455078125, - "learning_rate": 4.259106939437551e-05, - "loss": 114.2387, - "step": 40800 - }, - { - "epoch": 0.32975379568354624, - "grad_norm": 647.5372924804688, - "learning_rate": 4.258610810652239e-05, - "loss": 101.3678, - "step": 40810 - }, - { - "epoch": 0.32983459788783037, - "grad_norm": 1228.7413330078125, - "learning_rate": 4.258114544726835e-05, - "loss": 110.8658, - "step": 40820 - }, - { - "epoch": 0.3299154000921145, - "grad_norm": 1205.242431640625, - "learning_rate": 4.257618141700039e-05, - "loss": 100.3633, - "step": 40830 - }, - { - "epoch": 0.32999620229639864, - "grad_norm": 1567.399658203125, - "learning_rate": 4.2571216016105614e-05, - "loss": 138.9608, - "step": 40840 - }, - { - "epoch": 0.3300770045006828, - "grad_norm": 1735.1202392578125, - "learning_rate": 4.256624924497123e-05, - "loss": 148.3961, - "step": 40850 - }, - { - "epoch": 0.3301578067049669, - "grad_norm": 1247.9520263671875, - "learning_rate": 4.256128110398457e-05, - "loss": 126.5563, - "step": 40860 - }, - { - "epoch": 0.33023860890925105, - "grad_norm": 912.4246826171875, - "learning_rate": 4.255631159353305e-05, - "loss": 137.764, - "step": 40870 - }, - { - "epoch": 0.3303194111135352, - "grad_norm": 1337.9869384765625, - "learning_rate": 4.2551340714004203e-05, - "loss": 123.0041, - "step": 40880 - }, - { - "epoch": 0.3304002133178193, - "grad_norm": 534.7035522460938, - "learning_rate": 4.254636846578566e-05, - "loss": 133.3996, - "step": 40890 - }, - { - "epoch": 0.33048101552210346, - "grad_norm": 877.7348022460938, - "learning_rate": 4.254139484926519e-05, - "loss": 132.013, - "step": 40900 - }, - { - "epoch": 0.3305618177263876, - "grad_norm": 927.3030395507812, - "learning_rate": 4.253641986483062e-05, - "loss": 119.0273, - "step": 40910 - }, - { - "epoch": 0.33064261993067173, - "grad_norm": 2280.363037109375, - "learning_rate": 4.253144351286994e-05, - "loss": 147.7519, - "step": 40920 - }, - { - "epoch": 0.33072342213495587, - "grad_norm": 1058.6842041015625, - "learning_rate": 4.252646579377119e-05, - "loss": 121.5241, - "step": 40930 - }, - { - "epoch": 0.33080422433923995, - "grad_norm": 1315.1671142578125, - "learning_rate": 4.252148670792254e-05, - "loss": 134.9192, - "step": 40940 - }, - { - "epoch": 0.3308850265435241, - "grad_norm": 972.918212890625, - "learning_rate": 4.2516506255712296e-05, - "loss": 91.3426, - "step": 40950 - }, - { - "epoch": 0.3309658287478082, - "grad_norm": 1350.6805419921875, - "learning_rate": 4.2511524437528825e-05, - "loss": 140.2676, - "step": 40960 - }, - { - "epoch": 0.33104663095209236, - "grad_norm": 1082.634521484375, - "learning_rate": 4.250654125376062e-05, - "loss": 126.6204, - "step": 40970 - }, - { - "epoch": 0.3311274331563765, - "grad_norm": 693.4226684570312, - "learning_rate": 4.250155670479628e-05, - "loss": 105.8912, - "step": 40980 - }, - { - "epoch": 0.33120823536066063, - "grad_norm": 1507.3685302734375, - "learning_rate": 4.2496570791024513e-05, - "loss": 121.1401, - "step": 40990 - }, - { - "epoch": 0.33128903756494477, - "grad_norm": 1499.3818359375, - "learning_rate": 4.249158351283414e-05, - "loss": 102.3292, - "step": 41000 - }, - { - "epoch": 0.3313698397692289, - "grad_norm": 715.9673461914062, - "learning_rate": 4.248659487061406e-05, - "loss": 89.0298, - "step": 41010 - }, - { - "epoch": 0.33145064197351304, - "grad_norm": 1182.8348388671875, - "learning_rate": 4.248160486475331e-05, - "loss": 110.7405, - "step": 41020 - }, - { - "epoch": 0.3315314441777972, - "grad_norm": 902.036865234375, - "learning_rate": 4.2476613495641026e-05, - "loss": 112.3735, - "step": 41030 - }, - { - "epoch": 0.3316122463820813, - "grad_norm": 1076.53662109375, - "learning_rate": 4.247162076366643e-05, - "loss": 107.7344, - "step": 41040 - }, - { - "epoch": 0.33169304858636545, - "grad_norm": 663.93798828125, - "learning_rate": 4.246662666921888e-05, - "loss": 107.2916, - "step": 41050 - }, - { - "epoch": 0.3317738507906496, - "grad_norm": 942.774169921875, - "learning_rate": 4.2461631212687816e-05, - "loss": 118.4916, - "step": 41060 - }, - { - "epoch": 0.3318546529949337, - "grad_norm": 834.71337890625, - "learning_rate": 4.24566343944628e-05, - "loss": 114.6676, - "step": 41070 - }, - { - "epoch": 0.33193545519921785, - "grad_norm": 1153.38720703125, - "learning_rate": 4.245163621493349e-05, - "loss": 103.1737, - "step": 41080 - }, - { - "epoch": 0.332016257403502, - "grad_norm": 868.2684936523438, - "learning_rate": 4.2446636674489645e-05, - "loss": 96.508, - "step": 41090 - }, - { - "epoch": 0.3320970596077861, - "grad_norm": 1002.597412109375, - "learning_rate": 4.244163577352116e-05, - "loss": 111.9354, - "step": 41100 - }, - { - "epoch": 0.3321778618120702, - "grad_norm": 2193.45068359375, - "learning_rate": 4.243663351241801e-05, - "loss": 118.4467, - "step": 41110 - }, - { - "epoch": 0.33225866401635434, - "grad_norm": 877.7936401367188, - "learning_rate": 4.2431629891570266e-05, - "loss": 109.6179, - "step": 41120 - }, - { - "epoch": 0.3323394662206385, - "grad_norm": 713.3062133789062, - "learning_rate": 4.2426624911368146e-05, - "loss": 114.1503, - "step": 41130 - }, - { - "epoch": 0.3324202684249226, - "grad_norm": 1050.3089599609375, - "learning_rate": 4.242161857220193e-05, - "loss": 127.7152, - "step": 41140 - }, - { - "epoch": 0.33250107062920675, - "grad_norm": 1206.72412109375, - "learning_rate": 4.241661087446202e-05, - "loss": 93.5209, - "step": 41150 - }, - { - "epoch": 0.3325818728334909, - "grad_norm": 985.6680908203125, - "learning_rate": 4.241160181853894e-05, - "loss": 100.172, - "step": 41160 - }, - { - "epoch": 0.332662675037775, - "grad_norm": 780.0789794921875, - "learning_rate": 4.24065914048233e-05, - "loss": 120.4688, - "step": 41170 - }, - { - "epoch": 0.33274347724205916, - "grad_norm": 1601.8748779296875, - "learning_rate": 4.240157963370582e-05, - "loss": 122.7475, - "step": 41180 - }, - { - "epoch": 0.3328242794463433, - "grad_norm": 933.0560302734375, - "learning_rate": 4.239656650557734e-05, - "loss": 114.7916, - "step": 41190 - }, - { - "epoch": 0.33290508165062743, - "grad_norm": 1233.2462158203125, - "learning_rate": 4.2391552020828775e-05, - "loss": 126.3845, - "step": 41200 - }, - { - "epoch": 0.33298588385491157, - "grad_norm": 5802.0263671875, - "learning_rate": 4.238653617985118e-05, - "loss": 147.0103, - "step": 41210 - }, - { - "epoch": 0.3330666860591957, - "grad_norm": 1232.3936767578125, - "learning_rate": 4.238151898303569e-05, - "loss": 130.4673, - "step": 41220 - }, - { - "epoch": 0.33314748826347984, - "grad_norm": 1086.385498046875, - "learning_rate": 4.237650043077357e-05, - "loss": 120.4514, - "step": 41230 - }, - { - "epoch": 0.333228290467764, - "grad_norm": 955.5416259765625, - "learning_rate": 4.237148052345616e-05, - "loss": 159.1334, - "step": 41240 - }, - { - "epoch": 0.3333090926720481, - "grad_norm": 940.6236572265625, - "learning_rate": 4.2366459261474933e-05, - "loss": 116.1305, - "step": 41250 - }, - { - "epoch": 0.33338989487633225, - "grad_norm": 1035.8299560546875, - "learning_rate": 4.236143664522146e-05, - "loss": 125.2028, - "step": 41260 - }, - { - "epoch": 0.3334706970806164, - "grad_norm": 1120.801513671875, - "learning_rate": 4.2356412675087406e-05, - "loss": 101.4415, - "step": 41270 - }, - { - "epoch": 0.33355149928490047, - "grad_norm": 1547.17431640625, - "learning_rate": 4.2351387351464565e-05, - "loss": 113.0267, - "step": 41280 - }, - { - "epoch": 0.3336323014891846, - "grad_norm": 816.02783203125, - "learning_rate": 4.2346360674744815e-05, - "loss": 92.7242, - "step": 41290 - }, - { - "epoch": 0.33371310369346874, - "grad_norm": 1095.8355712890625, - "learning_rate": 4.234133264532012e-05, - "loss": 132.4738, - "step": 41300 - }, - { - "epoch": 0.3337939058977529, - "grad_norm": 719.6564331054688, - "learning_rate": 4.2336303263582624e-05, - "loss": 93.1649, - "step": 41310 - }, - { - "epoch": 0.333874708102037, - "grad_norm": 924.6814575195312, - "learning_rate": 4.2331272529924495e-05, - "loss": 109.1696, - "step": 41320 - }, - { - "epoch": 0.33395551030632115, - "grad_norm": 692.873046875, - "learning_rate": 4.2326240444738055e-05, - "loss": 142.4033, - "step": 41330 - }, - { - "epoch": 0.3340363125106053, - "grad_norm": 910.02978515625, - "learning_rate": 4.232120700841571e-05, - "loss": 109.2628, - "step": 41340 - }, - { - "epoch": 0.3341171147148894, - "grad_norm": 719.2833251953125, - "learning_rate": 4.2316172221349973e-05, - "loss": 84.8165, - "step": 41350 - }, - { - "epoch": 0.33419791691917355, - "grad_norm": 903.0330200195312, - "learning_rate": 4.231113608393348e-05, - "loss": 111.1525, - "step": 41360 - }, - { - "epoch": 0.3342787191234577, - "grad_norm": 1286.6417236328125, - "learning_rate": 4.230609859655895e-05, - "loss": 136.9282, - "step": 41370 - }, - { - "epoch": 0.3343595213277418, - "grad_norm": 1123.5858154296875, - "learning_rate": 4.230105975961921e-05, - "loss": 109.6661, - "step": 41380 - }, - { - "epoch": 0.33444032353202596, - "grad_norm": 822.5098266601562, - "learning_rate": 4.229601957350722e-05, - "loss": 97.3865, - "step": 41390 - }, - { - "epoch": 0.3345211257363101, - "grad_norm": 825.315673828125, - "learning_rate": 4.2290978038616e-05, - "loss": 110.3631, - "step": 41400 - }, - { - "epoch": 0.33460192794059423, - "grad_norm": 992.9793090820312, - "learning_rate": 4.2285935155338724e-05, - "loss": 118.5908, - "step": 41410 - }, - { - "epoch": 0.33468273014487837, - "grad_norm": 944.6324462890625, - "learning_rate": 4.2280890924068625e-05, - "loss": 111.8006, - "step": 41420 - }, - { - "epoch": 0.3347635323491625, - "grad_norm": 1114.3631591796875, - "learning_rate": 4.227584534519907e-05, - "loss": 112.1767, - "step": 41430 - }, - { - "epoch": 0.33484433455344664, - "grad_norm": 1965.0574951171875, - "learning_rate": 4.2270798419123534e-05, - "loss": 127.5403, - "step": 41440 - }, - { - "epoch": 0.3349251367577307, - "grad_norm": 1632.0330810546875, - "learning_rate": 4.226575014623557e-05, - "loss": 141.1655, - "step": 41450 - }, - { - "epoch": 0.33500593896201486, - "grad_norm": 587.453857421875, - "learning_rate": 4.226070052692886e-05, - "loss": 111.4413, - "step": 41460 - }, - { - "epoch": 0.335086741166299, - "grad_norm": 694.474365234375, - "learning_rate": 4.2255649561597186e-05, - "loss": 109.5578, - "step": 41470 - }, - { - "epoch": 0.33516754337058313, - "grad_norm": 1440.5325927734375, - "learning_rate": 4.225059725063444e-05, - "loss": 113.3155, - "step": 41480 - }, - { - "epoch": 0.33524834557486727, - "grad_norm": 729.2142333984375, - "learning_rate": 4.224554359443459e-05, - "loss": 98.748, - "step": 41490 - }, - { - "epoch": 0.3353291477791514, - "grad_norm": 1440.349609375, - "learning_rate": 4.224048859339175e-05, - "loss": 150.7559, - "step": 41500 - }, - { - "epoch": 0.33540994998343554, - "grad_norm": 567.5632934570312, - "learning_rate": 4.22354322479001e-05, - "loss": 121.6019, - "step": 41510 - }, - { - "epoch": 0.3354907521877197, - "grad_norm": 1247.8280029296875, - "learning_rate": 4.223037455835397e-05, - "loss": 110.7824, - "step": 41520 - }, - { - "epoch": 0.3355715543920038, - "grad_norm": 694.1570434570312, - "learning_rate": 4.222531552514775e-05, - "loss": 92.5867, - "step": 41530 - }, - { - "epoch": 0.33565235659628795, - "grad_norm": 439.62677001953125, - "learning_rate": 4.2220255148675956e-05, - "loss": 128.6065, - "step": 41540 - }, - { - "epoch": 0.3357331588005721, - "grad_norm": 844.4154663085938, - "learning_rate": 4.221519342933321e-05, - "loss": 123.6261, - "step": 41550 - }, - { - "epoch": 0.3358139610048562, - "grad_norm": 1023.6814575195312, - "learning_rate": 4.221013036751424e-05, - "loss": 117.6568, - "step": 41560 - }, - { - "epoch": 0.33589476320914036, - "grad_norm": 870.8734130859375, - "learning_rate": 4.2205065963613864e-05, - "loss": 109.6536, - "step": 41570 - }, - { - "epoch": 0.3359755654134245, - "grad_norm": 1021.2188720703125, - "learning_rate": 4.220000021802702e-05, - "loss": 110.3445, - "step": 41580 - }, - { - "epoch": 0.33605636761770863, - "grad_norm": 2119.448486328125, - "learning_rate": 4.219493313114875e-05, - "loss": 148.3456, - "step": 41590 - }, - { - "epoch": 0.33613716982199277, - "grad_norm": 1678.76708984375, - "learning_rate": 4.218986470337419e-05, - "loss": 108.0264, - "step": 41600 - }, - { - "epoch": 0.3362179720262769, - "grad_norm": 1361.9954833984375, - "learning_rate": 4.218479493509858e-05, - "loss": 140.6976, - "step": 41610 - }, - { - "epoch": 0.33629877423056104, - "grad_norm": 1839.7706298828125, - "learning_rate": 4.217972382671729e-05, - "loss": 99.9292, - "step": 41620 - }, - { - "epoch": 0.3363795764348451, - "grad_norm": 762.42431640625, - "learning_rate": 4.2174651378625754e-05, - "loss": 125.2607, - "step": 41630 - }, - { - "epoch": 0.33646037863912925, - "grad_norm": 920.329833984375, - "learning_rate": 4.2169577591219545e-05, - "loss": 111.5084, - "step": 41640 - }, - { - "epoch": 0.3365411808434134, - "grad_norm": 1217.3065185546875, - "learning_rate": 4.2164502464894316e-05, - "loss": 97.9931, - "step": 41650 - }, - { - "epoch": 0.3366219830476975, - "grad_norm": 755.00390625, - "learning_rate": 4.2159426000045854e-05, - "loss": 101.5915, - "step": 41660 - }, - { - "epoch": 0.33670278525198166, - "grad_norm": 1925.526123046875, - "learning_rate": 4.2154348197070017e-05, - "loss": 116.58, - "step": 41670 - }, - { - "epoch": 0.3367835874562658, - "grad_norm": 1252.3587646484375, - "learning_rate": 4.2149269056362794e-05, - "loss": 117.0276, - "step": 41680 - }, - { - "epoch": 0.33686438966054993, - "grad_norm": 1284.8072509765625, - "learning_rate": 4.214418857832025e-05, - "loss": 139.4919, - "step": 41690 - }, - { - "epoch": 0.33694519186483407, - "grad_norm": 849.3757934570312, - "learning_rate": 4.213910676333859e-05, - "loss": 103.8805, - "step": 41700 - }, - { - "epoch": 0.3370259940691182, - "grad_norm": 1074.7708740234375, - "learning_rate": 4.213402361181409e-05, - "loss": 115.7553, - "step": 41710 - }, - { - "epoch": 0.33710679627340234, - "grad_norm": 700.4140014648438, - "learning_rate": 4.212893912414316e-05, - "loss": 108.764, - "step": 41720 - }, - { - "epoch": 0.3371875984776865, - "grad_norm": 1171.544921875, - "learning_rate": 4.212385330072228e-05, - "loss": 107.7236, - "step": 41730 - }, - { - "epoch": 0.3372684006819706, - "grad_norm": 971.1940307617188, - "learning_rate": 4.2118766141948066e-05, - "loss": 99.9493, - "step": 41740 - }, - { - "epoch": 0.33734920288625475, - "grad_norm": 1106.3411865234375, - "learning_rate": 4.211367764821722e-05, - "loss": 130.0049, - "step": 41750 - }, - { - "epoch": 0.3374300050905389, - "grad_norm": 824.7821044921875, - "learning_rate": 4.2108587819926554e-05, - "loss": 93.8124, - "step": 41760 - }, - { - "epoch": 0.337510807294823, - "grad_norm": 1012.3502807617188, - "learning_rate": 4.210349665747299e-05, - "loss": 109.7114, - "step": 41770 - }, - { - "epoch": 0.33759160949910716, - "grad_norm": 1717.1815185546875, - "learning_rate": 4.209840416125353e-05, - "loss": 140.1303, - "step": 41780 - }, - { - "epoch": 0.3376724117033913, - "grad_norm": 1743.9361572265625, - "learning_rate": 4.209331033166531e-05, - "loss": 180.2291, - "step": 41790 - }, - { - "epoch": 0.3377532139076754, - "grad_norm": 1119.1121826171875, - "learning_rate": 4.208821516910557e-05, - "loss": 122.2918, - "step": 41800 - }, - { - "epoch": 0.3378340161119595, - "grad_norm": 976.0127563476562, - "learning_rate": 4.2083118673971613e-05, - "loss": 106.7708, - "step": 41810 - }, - { - "epoch": 0.33791481831624365, - "grad_norm": 1444.26806640625, - "learning_rate": 4.20780208466609e-05, - "loss": 142.1308, - "step": 41820 - }, - { - "epoch": 0.3379956205205278, - "grad_norm": 700.7904052734375, - "learning_rate": 4.207292168757095e-05, - "loss": 77.8881, - "step": 41830 - }, - { - "epoch": 0.3380764227248119, - "grad_norm": 1749.1107177734375, - "learning_rate": 4.206782119709942e-05, - "loss": 128.5482, - "step": 41840 - }, - { - "epoch": 0.33815722492909606, - "grad_norm": 1055.40771484375, - "learning_rate": 4.206271937564405e-05, - "loss": 104.5686, - "step": 41850 - }, - { - "epoch": 0.3382380271333802, - "grad_norm": 856.7396240234375, - "learning_rate": 4.2057616223602684e-05, - "loss": 116.8305, - "step": 41860 - }, - { - "epoch": 0.33831882933766433, - "grad_norm": 1309.1968994140625, - "learning_rate": 4.205251174137329e-05, - "loss": 100.0671, - "step": 41870 - }, - { - "epoch": 0.33839963154194846, - "grad_norm": 572.664306640625, - "learning_rate": 4.204740592935392e-05, - "loss": 179.8458, - "step": 41880 - }, - { - "epoch": 0.3384804337462326, - "grad_norm": 758.4063720703125, - "learning_rate": 4.204229878794273e-05, - "loss": 119.7842, - "step": 41890 - }, - { - "epoch": 0.33856123595051674, - "grad_norm": 744.6017456054688, - "learning_rate": 4.2037190317538e-05, - "loss": 84.0302, - "step": 41900 - }, - { - "epoch": 0.3386420381548009, - "grad_norm": 591.7713623046875, - "learning_rate": 4.203208051853808e-05, - "loss": 136.0603, - "step": 41910 - }, - { - "epoch": 0.338722840359085, - "grad_norm": 529.1870727539062, - "learning_rate": 4.202696939134146e-05, - "loss": 113.49, - "step": 41920 - }, - { - "epoch": 0.33880364256336915, - "grad_norm": 718.2554931640625, - "learning_rate": 4.20218569363467e-05, - "loss": 101.365, - "step": 41930 - }, - { - "epoch": 0.3388844447676533, - "grad_norm": 1323.03173828125, - "learning_rate": 4.2016743153952505e-05, - "loss": 138.6051, - "step": 41940 - }, - { - "epoch": 0.3389652469719374, - "grad_norm": 1175.0521240234375, - "learning_rate": 4.201162804455763e-05, - "loss": 91.6445, - "step": 41950 - }, - { - "epoch": 0.33904604917622155, - "grad_norm": 1001.1517944335938, - "learning_rate": 4.200651160856098e-05, - "loss": 143.7908, - "step": 41960 - }, - { - "epoch": 0.33912685138050563, - "grad_norm": 1416.6690673828125, - "learning_rate": 4.2001393846361536e-05, - "loss": 123.3682, - "step": 41970 - }, - { - "epoch": 0.33920765358478977, - "grad_norm": 701.4797973632812, - "learning_rate": 4.19962747583584e-05, - "loss": 88.0816, - "step": 41980 - }, - { - "epoch": 0.3392884557890739, - "grad_norm": 769.109375, - "learning_rate": 4.199115434495076e-05, - "loss": 145.2069, - "step": 41990 - }, - { - "epoch": 0.33936925799335804, - "grad_norm": 775.1168212890625, - "learning_rate": 4.198603260653792e-05, - "loss": 100.453, - "step": 42000 - }, - { - "epoch": 0.3394500601976422, - "grad_norm": 461.675537109375, - "learning_rate": 4.198090954351928e-05, - "loss": 109.0865, - "step": 42010 - }, - { - "epoch": 0.3395308624019263, - "grad_norm": 1106.8309326171875, - "learning_rate": 4.197578515629435e-05, - "loss": 120.445, - "step": 42020 - }, - { - "epoch": 0.33961166460621045, - "grad_norm": 830.0114135742188, - "learning_rate": 4.197065944526275e-05, - "loss": 135.8673, - "step": 42030 - }, - { - "epoch": 0.3396924668104946, - "grad_norm": 1482.881103515625, - "learning_rate": 4.196553241082418e-05, - "loss": 97.3678, - "step": 42040 - }, - { - "epoch": 0.3397732690147787, - "grad_norm": 1147.758544921875, - "learning_rate": 4.1960404053378454e-05, - "loss": 116.7118, - "step": 42050 - }, - { - "epoch": 0.33985407121906286, - "grad_norm": 910.6961669921875, - "learning_rate": 4.1955274373325506e-05, - "loss": 116.4943, - "step": 42060 - }, - { - "epoch": 0.339934873423347, - "grad_norm": 1390.40771484375, - "learning_rate": 4.1950143371065355e-05, - "loss": 99.1167, - "step": 42070 - }, - { - "epoch": 0.34001567562763113, - "grad_norm": 840.1258544921875, - "learning_rate": 4.194501104699812e-05, - "loss": 97.3132, - "step": 42080 - }, - { - "epoch": 0.34009647783191527, - "grad_norm": 436.78875732421875, - "learning_rate": 4.193987740152404e-05, - "loss": 100.2732, - "step": 42090 - }, - { - "epoch": 0.3401772800361994, - "grad_norm": 1276.5980224609375, - "learning_rate": 4.193474243504343e-05, - "loss": 134.5621, - "step": 42100 - }, - { - "epoch": 0.34025808224048354, - "grad_norm": 1543.0946044921875, - "learning_rate": 4.192960614795675e-05, - "loss": 114.9207, - "step": 42110 - }, - { - "epoch": 0.3403388844447677, - "grad_norm": 833.9490966796875, - "learning_rate": 4.192446854066452e-05, - "loss": 97.5959, - "step": 42120 - }, - { - "epoch": 0.3404196866490518, - "grad_norm": 850.1820068359375, - "learning_rate": 4.191932961356739e-05, - "loss": 123.7123, - "step": 42130 - }, - { - "epoch": 0.3405004888533359, - "grad_norm": 1075.9091796875, - "learning_rate": 4.1914189367066094e-05, - "loss": 86.7298, - "step": 42140 - }, - { - "epoch": 0.34058129105762003, - "grad_norm": 1365.4290771484375, - "learning_rate": 4.1909047801561484e-05, - "loss": 146.2664, - "step": 42150 - }, - { - "epoch": 0.34066209326190416, - "grad_norm": 783.529296875, - "learning_rate": 4.1903904917454516e-05, - "loss": 161.0411, - "step": 42160 - }, - { - "epoch": 0.3407428954661883, - "grad_norm": 1389.3060302734375, - "learning_rate": 4.189876071514624e-05, - "loss": 126.2128, - "step": 42170 - }, - { - "epoch": 0.34082369767047244, - "grad_norm": 1145.05224609375, - "learning_rate": 4.18936151950378e-05, - "loss": 120.6837, - "step": 42180 - }, - { - "epoch": 0.3409044998747566, - "grad_norm": 685.238525390625, - "learning_rate": 4.1888468357530476e-05, - "loss": 125.6368, - "step": 42190 - }, - { - "epoch": 0.3409853020790407, - "grad_norm": 1572.108642578125, - "learning_rate": 4.188332020302561e-05, - "loss": 106.5961, - "step": 42200 - }, - { - "epoch": 0.34106610428332484, - "grad_norm": 827.3007202148438, - "learning_rate": 4.187817073192468e-05, - "loss": 105.6062, - "step": 42210 - }, - { - "epoch": 0.341146906487609, - "grad_norm": 425.47900390625, - "learning_rate": 4.187301994462924e-05, - "loss": 97.6075, - "step": 42220 - }, - { - "epoch": 0.3412277086918931, - "grad_norm": 915.7979125976562, - "learning_rate": 4.1867867841540964e-05, - "loss": 101.7147, - "step": 42230 - }, - { - "epoch": 0.34130851089617725, - "grad_norm": 985.12060546875, - "learning_rate": 4.1862714423061624e-05, - "loss": 128.643, - "step": 42240 - }, - { - "epoch": 0.3413893131004614, - "grad_norm": 1204.0159912109375, - "learning_rate": 4.185755968959308e-05, - "loss": 113.6285, - "step": 42250 - }, - { - "epoch": 0.3414701153047455, - "grad_norm": 1115.7821044921875, - "learning_rate": 4.185240364153734e-05, - "loss": 110.7504, - "step": 42260 - }, - { - "epoch": 0.34155091750902966, - "grad_norm": 909.9058227539062, - "learning_rate": 4.1847246279296464e-05, - "loss": 137.9679, - "step": 42270 - }, - { - "epoch": 0.3416317197133138, - "grad_norm": 783.6122436523438, - "learning_rate": 4.184208760327263e-05, - "loss": 108.2928, - "step": 42280 - }, - { - "epoch": 0.34171252191759793, - "grad_norm": 812.3535766601562, - "learning_rate": 4.183692761386813e-05, - "loss": 96.809, - "step": 42290 - }, - { - "epoch": 0.34179332412188207, - "grad_norm": 1120.2716064453125, - "learning_rate": 4.183176631148534e-05, - "loss": 92.9935, - "step": 42300 - }, - { - "epoch": 0.3418741263261662, - "grad_norm": 1045.677978515625, - "learning_rate": 4.182660369652677e-05, - "loss": 119.3955, - "step": 42310 - }, - { - "epoch": 0.3419549285304503, - "grad_norm": 610.6566772460938, - "learning_rate": 4.1821439769395e-05, - "loss": 142.938, - "step": 42320 - }, - { - "epoch": 0.3420357307347344, - "grad_norm": 1348.732421875, - "learning_rate": 4.1816274530492713e-05, - "loss": 125.2905, - "step": 42330 - }, - { - "epoch": 0.34211653293901856, - "grad_norm": 851.5072631835938, - "learning_rate": 4.181110798022271e-05, - "loss": 109.9097, - "step": 42340 - }, - { - "epoch": 0.3421973351433027, - "grad_norm": 748.5755615234375, - "learning_rate": 4.180594011898791e-05, - "loss": 106.8618, - "step": 42350 - }, - { - "epoch": 0.34227813734758683, - "grad_norm": 979.4376831054688, - "learning_rate": 4.180077094719128e-05, - "loss": 109.3109, - "step": 42360 - }, - { - "epoch": 0.34235893955187097, - "grad_norm": 970.9088745117188, - "learning_rate": 4.1795600465235947e-05, - "loss": 103.6229, - "step": 42370 - }, - { - "epoch": 0.3424397417561551, - "grad_norm": 984.352783203125, - "learning_rate": 4.179042867352511e-05, - "loss": 120.7571, - "step": 42380 - }, - { - "epoch": 0.34252054396043924, - "grad_norm": 517.6790161132812, - "learning_rate": 4.1785255572462066e-05, - "loss": 93.7731, - "step": 42390 - }, - { - "epoch": 0.3426013461647234, - "grad_norm": 1307.16357421875, - "learning_rate": 4.178008116245024e-05, - "loss": 101.1722, - "step": 42400 - }, - { - "epoch": 0.3426821483690075, - "grad_norm": 1743.164306640625, - "learning_rate": 4.177490544389313e-05, - "loss": 116.7959, - "step": 42410 - }, - { - "epoch": 0.34276295057329165, - "grad_norm": 1130.1885986328125, - "learning_rate": 4.176972841719435e-05, - "loss": 121.5599, - "step": 42420 - }, - { - "epoch": 0.3428437527775758, - "grad_norm": 657.3247680664062, - "learning_rate": 4.176455008275764e-05, - "loss": 111.9452, - "step": 42430 - }, - { - "epoch": 0.3429245549818599, - "grad_norm": 1209.4779052734375, - "learning_rate": 4.1759370440986775e-05, - "loss": 109.4623, - "step": 42440 - }, - { - "epoch": 0.34300535718614406, - "grad_norm": 842.3145141601562, - "learning_rate": 4.1754189492285714e-05, - "loss": 120.6484, - "step": 42450 - }, - { - "epoch": 0.3430861593904282, - "grad_norm": 857.0770874023438, - "learning_rate": 4.174900723705845e-05, - "loss": 108.4882, - "step": 42460 - }, - { - "epoch": 0.3431669615947123, - "grad_norm": 821.6563110351562, - "learning_rate": 4.174382367570912e-05, - "loss": 108.0224, - "step": 42470 - }, - { - "epoch": 0.34324776379899646, - "grad_norm": 2286.49951171875, - "learning_rate": 4.1738638808641936e-05, - "loss": 161.7843, - "step": 42480 - }, - { - "epoch": 0.34332856600328054, - "grad_norm": 770.7763061523438, - "learning_rate": 4.1733452636261244e-05, - "loss": 80.6864, - "step": 42490 - }, - { - "epoch": 0.3434093682075647, - "grad_norm": 650.9833374023438, - "learning_rate": 4.172826515897146e-05, - "loss": 107.6622, - "step": 42500 - }, - { - "epoch": 0.3434901704118488, - "grad_norm": 922.6205444335938, - "learning_rate": 4.172307637717711e-05, - "loss": 110.4043, - "step": 42510 - }, - { - "epoch": 0.34357097261613295, - "grad_norm": 1296.037109375, - "learning_rate": 4.171788629128284e-05, - "loss": 130.2853, - "step": 42520 - }, - { - "epoch": 0.3436517748204171, - "grad_norm": 1275.6156005859375, - "learning_rate": 4.1712694901693374e-05, - "loss": 171.9688, - "step": 42530 - }, - { - "epoch": 0.3437325770247012, - "grad_norm": 1972.59716796875, - "learning_rate": 4.170750220881354e-05, - "loss": 127.4962, - "step": 42540 - }, - { - "epoch": 0.34381337922898536, - "grad_norm": 1204.11181640625, - "learning_rate": 4.170230821304829e-05, - "loss": 125.0976, - "step": 42550 - }, - { - "epoch": 0.3438941814332695, - "grad_norm": 884.7000732421875, - "learning_rate": 4.169711291480266e-05, - "loss": 94.658, - "step": 42560 - }, - { - "epoch": 0.34397498363755363, - "grad_norm": 962.3338012695312, - "learning_rate": 4.169191631448178e-05, - "loss": 119.4839, - "step": 42570 - }, - { - "epoch": 0.34405578584183777, - "grad_norm": 1040.0384521484375, - "learning_rate": 4.168671841249091e-05, - "loss": 89.0051, - "step": 42580 - }, - { - "epoch": 0.3441365880461219, - "grad_norm": 643.4529418945312, - "learning_rate": 4.168151920923536e-05, - "loss": 122.9513, - "step": 42590 - }, - { - "epoch": 0.34421739025040604, - "grad_norm": 827.9017333984375, - "learning_rate": 4.1676318705120616e-05, - "loss": 129.7726, - "step": 42600 - }, - { - "epoch": 0.3442981924546902, - "grad_norm": 1233.6368408203125, - "learning_rate": 4.1671116900552194e-05, - "loss": 137.5954, - "step": 42610 - }, - { - "epoch": 0.3443789946589743, - "grad_norm": 1292.6192626953125, - "learning_rate": 4.166591379593575e-05, - "loss": 105.8796, - "step": 42620 - }, - { - "epoch": 0.34445979686325845, - "grad_norm": 1072.096923828125, - "learning_rate": 4.166070939167703e-05, - "loss": 144.0038, - "step": 42630 - }, - { - "epoch": 0.3445405990675426, - "grad_norm": 1191.538330078125, - "learning_rate": 4.16555036881819e-05, - "loss": 130.2234, - "step": 42640 - }, - { - "epoch": 0.3446214012718267, - "grad_norm": 721.0374755859375, - "learning_rate": 4.165029668585629e-05, - "loss": 91.9487, - "step": 42650 - }, - { - "epoch": 0.3447022034761108, - "grad_norm": 1047.25927734375, - "learning_rate": 4.1645088385106266e-05, - "loss": 131.1794, - "step": 42660 - }, - { - "epoch": 0.34478300568039494, - "grad_norm": 1257.214111328125, - "learning_rate": 4.163987878633798e-05, - "loss": 124.3329, - "step": 42670 - }, - { - "epoch": 0.3448638078846791, - "grad_norm": 2070.51025390625, - "learning_rate": 4.1634667889957676e-05, - "loss": 110.4267, - "step": 42680 - }, - { - "epoch": 0.3449446100889632, - "grad_norm": 1299.32763671875, - "learning_rate": 4.1629455696371734e-05, - "loss": 113.428, - "step": 42690 - }, - { - "epoch": 0.34502541229324735, - "grad_norm": 472.93719482421875, - "learning_rate": 4.162424220598658e-05, - "loss": 116.1447, - "step": 42700 - }, - { - "epoch": 0.3451062144975315, - "grad_norm": 1111.9759521484375, - "learning_rate": 4.161902741920881e-05, - "loss": 106.8365, - "step": 42710 - }, - { - "epoch": 0.3451870167018156, - "grad_norm": 874.89306640625, - "learning_rate": 4.161381133644505e-05, - "loss": 94.6413, - "step": 42720 - }, - { - "epoch": 0.34526781890609975, - "grad_norm": 764.873779296875, - "learning_rate": 4.160859395810208e-05, - "loss": 150.5084, - "step": 42730 - }, - { - "epoch": 0.3453486211103839, - "grad_norm": 807.837646484375, - "learning_rate": 4.160337528458676e-05, - "loss": 115.3263, - "step": 42740 - }, - { - "epoch": 0.345429423314668, - "grad_norm": 1016.0419921875, - "learning_rate": 4.1598155316306044e-05, - "loss": 117.494, - "step": 42750 - }, - { - "epoch": 0.34551022551895216, - "grad_norm": 926.2868041992188, - "learning_rate": 4.1592934053667004e-05, - "loss": 109.1898, - "step": 42760 - }, - { - "epoch": 0.3455910277232363, - "grad_norm": 1401.1982421875, - "learning_rate": 4.15877114970768e-05, - "loss": 130.7878, - "step": 42770 - }, - { - "epoch": 0.34567182992752044, - "grad_norm": 2645.7509765625, - "learning_rate": 4.1582487646942706e-05, - "loss": 147.1319, - "step": 42780 - }, - { - "epoch": 0.34575263213180457, - "grad_norm": 953.051025390625, - "learning_rate": 4.157726250367207e-05, - "loss": 105.3992, - "step": 42790 - }, - { - "epoch": 0.3458334343360887, - "grad_norm": 1411.7845458984375, - "learning_rate": 4.157203606767238e-05, - "loss": 112.0605, - "step": 42800 - }, - { - "epoch": 0.34591423654037284, - "grad_norm": 920.7262573242188, - "learning_rate": 4.156680833935119e-05, - "loss": 112.1692, - "step": 42810 - }, - { - "epoch": 0.345995038744657, - "grad_norm": 764.4006958007812, - "learning_rate": 4.156157931911619e-05, - "loss": 127.5782, - "step": 42820 - }, - { - "epoch": 0.34607584094894106, - "grad_norm": 754.4057006835938, - "learning_rate": 4.155634900737513e-05, - "loss": 119.4949, - "step": 42830 - }, - { - "epoch": 0.3461566431532252, - "grad_norm": 1034.914794921875, - "learning_rate": 4.155111740453588e-05, - "loss": 100.6544, - "step": 42840 - }, - { - "epoch": 0.34623744535750933, - "grad_norm": 956.4243774414062, - "learning_rate": 4.154588451100642e-05, - "loss": 128.5736, - "step": 42850 - }, - { - "epoch": 0.34631824756179347, - "grad_norm": 1221.5029296875, - "learning_rate": 4.154065032719481e-05, - "loss": 112.7579, - "step": 42860 - }, - { - "epoch": 0.3463990497660776, - "grad_norm": 1284.328857421875, - "learning_rate": 4.153541485350924e-05, - "loss": 133.6358, - "step": 42870 - }, - { - "epoch": 0.34647985197036174, - "grad_norm": 846.0714111328125, - "learning_rate": 4.1530178090357976e-05, - "loss": 118.7366, - "step": 42880 - }, - { - "epoch": 0.3465606541746459, - "grad_norm": 763.9622802734375, - "learning_rate": 4.1524940038149384e-05, - "loss": 87.1779, - "step": 42890 - }, - { - "epoch": 0.34664145637893, - "grad_norm": 1357.3033447265625, - "learning_rate": 4.1519700697291944e-05, - "loss": 138.3137, - "step": 42900 - }, - { - "epoch": 0.34672225858321415, - "grad_norm": 1124.6358642578125, - "learning_rate": 4.151446006819423e-05, - "loss": 134.1479, - "step": 42910 - }, - { - "epoch": 0.3468030607874983, - "grad_norm": 1491.8726806640625, - "learning_rate": 4.150921815126493e-05, - "loss": 107.827, - "step": 42920 - }, - { - "epoch": 0.3468838629917824, - "grad_norm": 754.2879638671875, - "learning_rate": 4.150397494691279e-05, - "loss": 130.5941, - "step": 42930 - }, - { - "epoch": 0.34696466519606656, - "grad_norm": 1490.85693359375, - "learning_rate": 4.149873045554671e-05, - "loss": 106.1892, - "step": 42940 - }, - { - "epoch": 0.3470454674003507, - "grad_norm": 2076.156494140625, - "learning_rate": 4.149348467757566e-05, - "loss": 123.2753, - "step": 42950 - }, - { - "epoch": 0.34712626960463483, - "grad_norm": 1685.263427734375, - "learning_rate": 4.148823761340871e-05, - "loss": 120.873, - "step": 42960 - }, - { - "epoch": 0.34720707180891897, - "grad_norm": 1339.9715576171875, - "learning_rate": 4.148298926345504e-05, - "loss": 97.2042, - "step": 42970 - }, - { - "epoch": 0.3472878740132031, - "grad_norm": 1015.4769897460938, - "learning_rate": 4.1477739628123934e-05, - "loss": 141.2197, - "step": 42980 - }, - { - "epoch": 0.34736867621748724, - "grad_norm": 850.2290649414062, - "learning_rate": 4.147248870782477e-05, - "loss": 92.7138, - "step": 42990 - }, - { - "epoch": 0.3474494784217713, - "grad_norm": 991.05712890625, - "learning_rate": 4.146723650296701e-05, - "loss": 105.0196, - "step": 43000 - }, - { - "epoch": 0.34753028062605545, - "grad_norm": 489.93768310546875, - "learning_rate": 4.1461983013960245e-05, - "loss": 90.9819, - "step": 43010 - }, - { - "epoch": 0.3476110828303396, - "grad_norm": 1313.2481689453125, - "learning_rate": 4.145672824121416e-05, - "loss": 133.9542, - "step": 43020 - }, - { - "epoch": 0.3476918850346237, - "grad_norm": 1558.9395751953125, - "learning_rate": 4.145147218513852e-05, - "loss": 91.8571, - "step": 43030 - }, - { - "epoch": 0.34777268723890786, - "grad_norm": 1320.9664306640625, - "learning_rate": 4.144621484614319e-05, - "loss": 143.2685, - "step": 43040 - }, - { - "epoch": 0.347853489443192, - "grad_norm": 890.3096313476562, - "learning_rate": 4.1440956224638184e-05, - "loss": 92.9935, - "step": 43050 - }, - { - "epoch": 0.34793429164747613, - "grad_norm": 492.9075927734375, - "learning_rate": 4.1435696321033554e-05, - "loss": 110.0991, - "step": 43060 - }, - { - "epoch": 0.34801509385176027, - "grad_norm": 731.78759765625, - "learning_rate": 4.143043513573949e-05, - "loss": 99.8035, - "step": 43070 - }, - { - "epoch": 0.3480958960560444, - "grad_norm": 877.2884521484375, - "learning_rate": 4.142517266916625e-05, - "loss": 130.9701, - "step": 43080 - }, - { - "epoch": 0.34817669826032854, - "grad_norm": 1208.205810546875, - "learning_rate": 4.141990892172424e-05, - "loss": 122.929, - "step": 43090 - }, - { - "epoch": 0.3482575004646127, - "grad_norm": 1163.9649658203125, - "learning_rate": 4.1414643893823914e-05, - "loss": 113.165, - "step": 43100 - }, - { - "epoch": 0.3483383026688968, - "grad_norm": 1090.5457763671875, - "learning_rate": 4.1409377585875865e-05, - "loss": 102.771, - "step": 43110 - }, - { - "epoch": 0.34841910487318095, - "grad_norm": 3191.665771484375, - "learning_rate": 4.140410999829076e-05, - "loss": 185.8671, - "step": 43120 - }, - { - "epoch": 0.3484999070774651, - "grad_norm": 703.032470703125, - "learning_rate": 4.1398841131479395e-05, - "loss": 112.7112, - "step": 43130 - }, - { - "epoch": 0.3485807092817492, - "grad_norm": 1196.83837890625, - "learning_rate": 4.139357098585262e-05, - "loss": 121.8927, - "step": 43140 - }, - { - "epoch": 0.34866151148603336, - "grad_norm": 929.038330078125, - "learning_rate": 4.138829956182144e-05, - "loss": 127.6325, - "step": 43150 - }, - { - "epoch": 0.3487423136903175, - "grad_norm": 934.77294921875, - "learning_rate": 4.1383026859796905e-05, - "loss": 113.7552, - "step": 43160 - }, - { - "epoch": 0.34882311589460163, - "grad_norm": 776.7736206054688, - "learning_rate": 4.137775288019021e-05, - "loss": 98.5727, - "step": 43170 - }, - { - "epoch": 0.3489039180988857, - "grad_norm": 1644.178466796875, - "learning_rate": 4.137247762341262e-05, - "loss": 102.9494, - "step": 43180 - }, - { - "epoch": 0.34898472030316985, - "grad_norm": 954.8101196289062, - "learning_rate": 4.136720108987552e-05, - "loss": 110.2166, - "step": 43190 - }, - { - "epoch": 0.349065522507454, - "grad_norm": 1027.3714599609375, - "learning_rate": 4.136192327999037e-05, - "loss": 119.0482, - "step": 43200 - }, - { - "epoch": 0.3491463247117381, - "grad_norm": 1536.1224365234375, - "learning_rate": 4.135664419416877e-05, - "loss": 129.0882, - "step": 43210 - }, - { - "epoch": 0.34922712691602226, - "grad_norm": 866.5405883789062, - "learning_rate": 4.135136383282237e-05, - "loss": 129.9161, - "step": 43220 - }, - { - "epoch": 0.3493079291203064, - "grad_norm": 1716.2762451171875, - "learning_rate": 4.134608219636294e-05, - "loss": 124.4211, - "step": 43230 - }, - { - "epoch": 0.34938873132459053, - "grad_norm": 755.7040405273438, - "learning_rate": 4.1340799285202376e-05, - "loss": 100.1117, - "step": 43240 - }, - { - "epoch": 0.34946953352887467, - "grad_norm": 735.656494140625, - "learning_rate": 4.133551509975264e-05, - "loss": 106.7017, - "step": 43250 - }, - { - "epoch": 0.3495503357331588, - "grad_norm": 847.6460571289062, - "learning_rate": 4.13302296404258e-05, - "loss": 184.5711, - "step": 43260 - }, - { - "epoch": 0.34963113793744294, - "grad_norm": 784.8809814453125, - "learning_rate": 4.132494290763403e-05, - "loss": 124.351, - "step": 43270 - }, - { - "epoch": 0.3497119401417271, - "grad_norm": 1278.4813232421875, - "learning_rate": 4.131965490178959e-05, - "loss": 141.1306, - "step": 43280 - }, - { - "epoch": 0.3497927423460112, - "grad_norm": 860.0731201171875, - "learning_rate": 4.131436562330487e-05, - "loss": 129.1694, - "step": 43290 - }, - { - "epoch": 0.34987354455029535, - "grad_norm": 853.5748901367188, - "learning_rate": 4.130907507259233e-05, - "loss": 124.558, - "step": 43300 - }, - { - "epoch": 0.3499543467545795, - "grad_norm": 925.5911865234375, - "learning_rate": 4.130378325006453e-05, - "loss": 101.8456, - "step": 43310 - }, - { - "epoch": 0.3500351489588636, - "grad_norm": 654.1785278320312, - "learning_rate": 4.129849015613415e-05, - "loss": 115.167, - "step": 43320 - }, - { - "epoch": 0.35011595116314775, - "grad_norm": 911.1454467773438, - "learning_rate": 4.129319579121394e-05, - "loss": 107.7793, - "step": 43330 - }, - { - "epoch": 0.3501967533674319, - "grad_norm": 778.4485473632812, - "learning_rate": 4.1287900155716784e-05, - "loss": 99.6723, - "step": 43340 - }, - { - "epoch": 0.35027755557171597, - "grad_norm": 1008.1852416992188, - "learning_rate": 4.128260325005564e-05, - "loss": 121.5808, - "step": 43350 - }, - { - "epoch": 0.3503583577760001, - "grad_norm": 873.7073974609375, - "learning_rate": 4.127730507464356e-05, - "loss": 158.9801, - "step": 43360 - }, - { - "epoch": 0.35043915998028424, - "grad_norm": 754.9944458007812, - "learning_rate": 4.1272005629893714e-05, - "loss": 129.7986, - "step": 43370 - }, - { - "epoch": 0.3505199621845684, - "grad_norm": 1069.01220703125, - "learning_rate": 4.126670491621938e-05, - "loss": 116.624, - "step": 43380 - }, - { - "epoch": 0.3506007643888525, - "grad_norm": 1946.8424072265625, - "learning_rate": 4.1261402934033886e-05, - "loss": 117.7039, - "step": 43390 - }, - { - "epoch": 0.35068156659313665, - "grad_norm": 646.5797729492188, - "learning_rate": 4.125609968375072e-05, - "loss": 113.8247, - "step": 43400 - }, - { - "epoch": 0.3507623687974208, - "grad_norm": 1420.4061279296875, - "learning_rate": 4.125079516578344e-05, - "loss": 111.6475, - "step": 43410 - }, - { - "epoch": 0.3508431710017049, - "grad_norm": 883.5056762695312, - "learning_rate": 4.124548938054568e-05, - "loss": 94.5492, - "step": 43420 - }, - { - "epoch": 0.35092397320598906, - "grad_norm": 541.1071166992188, - "learning_rate": 4.1240182328451204e-05, - "loss": 117.5019, - "step": 43430 - }, - { - "epoch": 0.3510047754102732, - "grad_norm": 1389.0908203125, - "learning_rate": 4.123487400991388e-05, - "loss": 137.5142, - "step": 43440 - }, - { - "epoch": 0.35108557761455733, - "grad_norm": 422.32373046875, - "learning_rate": 4.1229564425347654e-05, - "loss": 109.4844, - "step": 43450 - }, - { - "epoch": 0.35116637981884147, - "grad_norm": 1107.2125244140625, - "learning_rate": 4.122425357516658e-05, - "loss": 103.4192, - "step": 43460 - }, - { - "epoch": 0.3512471820231256, - "grad_norm": 1315.267578125, - "learning_rate": 4.1218941459784796e-05, - "loss": 90.2331, - "step": 43470 - }, - { - "epoch": 0.35132798422740974, - "grad_norm": 967.4996337890625, - "learning_rate": 4.121362807961658e-05, - "loss": 132.3862, - "step": 43480 - }, - { - "epoch": 0.3514087864316939, - "grad_norm": 1145.662109375, - "learning_rate": 4.120831343507625e-05, - "loss": 129.2516, - "step": 43490 - }, - { - "epoch": 0.351489588635978, - "grad_norm": 914.6609497070312, - "learning_rate": 4.1202997526578276e-05, - "loss": 87.9083, - "step": 43500 - }, - { - "epoch": 0.35157039084026215, - "grad_norm": 707.6971435546875, - "learning_rate": 4.1197680354537186e-05, - "loss": 110.8037, - "step": 43510 - }, - { - "epoch": 0.35165119304454623, - "grad_norm": 1503.02490234375, - "learning_rate": 4.119236191936764e-05, - "loss": 80.6368, - "step": 43520 - }, - { - "epoch": 0.35173199524883036, - "grad_norm": 1397.5062255859375, - "learning_rate": 4.118704222148436e-05, - "loss": 118.6759, - "step": 43530 - }, - { - "epoch": 0.3518127974531145, - "grad_norm": 1631.902099609375, - "learning_rate": 4.118172126130221e-05, - "loss": 121.7544, - "step": 43540 - }, - { - "epoch": 0.35189359965739864, - "grad_norm": 763.1521606445312, - "learning_rate": 4.1176399039236116e-05, - "loss": 117.5934, - "step": 43550 - }, - { - "epoch": 0.3519744018616828, - "grad_norm": 1781.4283447265625, - "learning_rate": 4.117107555570111e-05, - "loss": 136.4733, - "step": 43560 - }, - { - "epoch": 0.3520552040659669, - "grad_norm": 707.01708984375, - "learning_rate": 4.116575081111235e-05, - "loss": 107.5936, - "step": 43570 - }, - { - "epoch": 0.35213600627025105, - "grad_norm": 759.3696899414062, - "learning_rate": 4.116042480588505e-05, - "loss": 99.5202, - "step": 43580 - }, - { - "epoch": 0.3522168084745352, - "grad_norm": 1009.7568969726562, - "learning_rate": 4.115509754043454e-05, - "loss": 140.618, - "step": 43590 - }, - { - "epoch": 0.3522976106788193, - "grad_norm": 1116.375732421875, - "learning_rate": 4.1149769015176275e-05, - "loss": 111.1632, - "step": 43600 - }, - { - "epoch": 0.35237841288310345, - "grad_norm": 914.0462036132812, - "learning_rate": 4.114443923052577e-05, - "loss": 139.1557, - "step": 43610 - }, - { - "epoch": 0.3524592150873876, - "grad_norm": 871.455078125, - "learning_rate": 4.113910818689864e-05, - "loss": 96.8046, - "step": 43620 - }, - { - "epoch": 0.3525400172916717, - "grad_norm": 855.2386474609375, - "learning_rate": 4.1133775884710634e-05, - "loss": 113.4506, - "step": 43630 - }, - { - "epoch": 0.35262081949595586, - "grad_norm": 585.1459350585938, - "learning_rate": 4.112844232437757e-05, - "loss": 78.5358, - "step": 43640 - }, - { - "epoch": 0.35270162170024, - "grad_norm": 583.0993041992188, - "learning_rate": 4.112310750631536e-05, - "loss": 125.2679, - "step": 43650 - }, - { - "epoch": 0.35278242390452413, - "grad_norm": 794.35888671875, - "learning_rate": 4.1117771430940035e-05, - "loss": 123.3755, - "step": 43660 - }, - { - "epoch": 0.35286322610880827, - "grad_norm": 3039.197021484375, - "learning_rate": 4.111243409866769e-05, - "loss": 124.7615, - "step": 43670 - }, - { - "epoch": 0.3529440283130924, - "grad_norm": 788.42138671875, - "learning_rate": 4.1107095509914584e-05, - "loss": 142.6205, - "step": 43680 - }, - { - "epoch": 0.3530248305173765, - "grad_norm": 533.8926391601562, - "learning_rate": 4.1101755665096996e-05, - "loss": 95.0822, - "step": 43690 - }, - { - "epoch": 0.3531056327216606, - "grad_norm": 1001.331298828125, - "learning_rate": 4.109641456463135e-05, - "loss": 128.2229, - "step": 43700 - }, - { - "epoch": 0.35318643492594476, - "grad_norm": 724.7665405273438, - "learning_rate": 4.109107220893415e-05, - "loss": 143.9145, - "step": 43710 - }, - { - "epoch": 0.3532672371302289, - "grad_norm": 842.193359375, - "learning_rate": 4.108572859842201e-05, - "loss": 129.6266, - "step": 43720 - }, - { - "epoch": 0.35334803933451303, - "grad_norm": 921.6456909179688, - "learning_rate": 4.108038373351163e-05, - "loss": 72.5173, - "step": 43730 - }, - { - "epoch": 0.35342884153879717, - "grad_norm": 545.473388671875, - "learning_rate": 4.107503761461983e-05, - "loss": 84.7183, - "step": 43740 - }, - { - "epoch": 0.3535096437430813, - "grad_norm": 804.3419189453125, - "learning_rate": 4.1069690242163484e-05, - "loss": 99.0364, - "step": 43750 - }, - { - "epoch": 0.35359044594736544, - "grad_norm": 494.2071533203125, - "learning_rate": 4.106434161655962e-05, - "loss": 103.0894, - "step": 43760 - }, - { - "epoch": 0.3536712481516496, - "grad_norm": 1004.2776489257812, - "learning_rate": 4.105899173822531e-05, - "loss": 111.627, - "step": 43770 - }, - { - "epoch": 0.3537520503559337, - "grad_norm": 586.3853759765625, - "learning_rate": 4.105364060757776e-05, - "loss": 119.0487, - "step": 43780 - }, - { - "epoch": 0.35383285256021785, - "grad_norm": 828.4921875, - "learning_rate": 4.104828822503427e-05, - "loss": 102.7416, - "step": 43790 - }, - { - "epoch": 0.353913654764502, - "grad_norm": 763.6985473632812, - "learning_rate": 4.104293459101222e-05, - "loss": 94.2293, - "step": 43800 - }, - { - "epoch": 0.3539944569687861, - "grad_norm": 840.3387451171875, - "learning_rate": 4.103757970592909e-05, - "loss": 117.9093, - "step": 43810 - }, - { - "epoch": 0.35407525917307026, - "grad_norm": 1129.329345703125, - "learning_rate": 4.1032223570202474e-05, - "loss": 111.6884, - "step": 43820 - }, - { - "epoch": 0.3541560613773544, - "grad_norm": 1034.421630859375, - "learning_rate": 4.102686618425006e-05, - "loss": 90.9869, - "step": 43830 - }, - { - "epoch": 0.35423686358163853, - "grad_norm": 1153.6153564453125, - "learning_rate": 4.1021507548489625e-05, - "loss": 103.3304, - "step": 43840 - }, - { - "epoch": 0.35431766578592266, - "grad_norm": 1116.0970458984375, - "learning_rate": 4.101614766333904e-05, - "loss": 142.7494, - "step": 43850 - }, - { - "epoch": 0.3543984679902068, - "grad_norm": 744.8369140625, - "learning_rate": 4.1010786529216284e-05, - "loss": 105.8628, - "step": 43860 - }, - { - "epoch": 0.3544792701944909, - "grad_norm": 636.3167724609375, - "learning_rate": 4.100542414653943e-05, - "loss": 105.9977, - "step": 43870 - }, - { - "epoch": 0.354560072398775, - "grad_norm": 1045.371337890625, - "learning_rate": 4.1000060515726647e-05, - "loss": 118.6155, - "step": 43880 - }, - { - "epoch": 0.35464087460305915, - "grad_norm": 847.4445190429688, - "learning_rate": 4.09946956371962e-05, - "loss": 103.0557, - "step": 43890 - }, - { - "epoch": 0.3547216768073433, - "grad_norm": 627.8738403320312, - "learning_rate": 4.098932951136645e-05, - "loss": 117.9863, - "step": 43900 - }, - { - "epoch": 0.3548024790116274, - "grad_norm": 543.4232177734375, - "learning_rate": 4.0983962138655873e-05, - "loss": 119.4373, - "step": 43910 - }, - { - "epoch": 0.35488328121591156, - "grad_norm": 909.0529174804688, - "learning_rate": 4.097859351948301e-05, - "loss": 110.3954, - "step": 43920 - }, - { - "epoch": 0.3549640834201957, - "grad_norm": 1489.5987548828125, - "learning_rate": 4.097322365426653e-05, - "loss": 104.6565, - "step": 43930 - }, - { - "epoch": 0.35504488562447983, - "grad_norm": 865.8026123046875, - "learning_rate": 4.0967852543425175e-05, - "loss": 116.9045, - "step": 43940 - }, - { - "epoch": 0.35512568782876397, - "grad_norm": 1028.508056640625, - "learning_rate": 4.096248018737781e-05, - "loss": 86.7975, - "step": 43950 - }, - { - "epoch": 0.3552064900330481, - "grad_norm": 994.4835815429688, - "learning_rate": 4.095710658654337e-05, - "loss": 102.6764, - "step": 43960 - }, - { - "epoch": 0.35528729223733224, - "grad_norm": 1301.734130859375, - "learning_rate": 4.09517317413409e-05, - "loss": 107.1787, - "step": 43970 - }, - { - "epoch": 0.3553680944416164, - "grad_norm": 989.0552368164062, - "learning_rate": 4.094635565218955e-05, - "loss": 135.2211, - "step": 43980 - }, - { - "epoch": 0.3554488966459005, - "grad_norm": 1049.1402587890625, - "learning_rate": 4.094097831950855e-05, - "loss": 87.5517, - "step": 43990 - }, - { - "epoch": 0.35552969885018465, - "grad_norm": 664.1399536132812, - "learning_rate": 4.093559974371725e-05, - "loss": 108.5663, - "step": 44000 - }, - { - "epoch": 0.3556105010544688, - "grad_norm": 1280.4056396484375, - "learning_rate": 4.0930219925235056e-05, - "loss": 90.2192, - "step": 44010 - }, - { - "epoch": 0.3556913032587529, - "grad_norm": 1233.9451904296875, - "learning_rate": 4.0924838864481516e-05, - "loss": 117.4154, - "step": 44020 - }, - { - "epoch": 0.35577210546303706, - "grad_norm": 829.8418579101562, - "learning_rate": 4.0919456561876256e-05, - "loss": 105.9069, - "step": 44030 - }, - { - "epoch": 0.35585290766732114, - "grad_norm": 1245.917724609375, - "learning_rate": 4.0914073017838996e-05, - "loss": 121.8114, - "step": 44040 - }, - { - "epoch": 0.3559337098716053, - "grad_norm": 947.978271484375, - "learning_rate": 4.090868823278956e-05, - "loss": 136.735, - "step": 44050 - }, - { - "epoch": 0.3560145120758894, - "grad_norm": 864.8270874023438, - "learning_rate": 4.090330220714785e-05, - "loss": 111.177, - "step": 44060 - }, - { - "epoch": 0.35609531428017355, - "grad_norm": 961.9524536132812, - "learning_rate": 4.089791494133389e-05, - "loss": 111.8821, - "step": 44070 - }, - { - "epoch": 0.3561761164844577, - "grad_norm": 1194.931640625, - "learning_rate": 4.0892526435767795e-05, - "loss": 171.4436, - "step": 44080 - }, - { - "epoch": 0.3562569186887418, - "grad_norm": 788.662109375, - "learning_rate": 4.088713669086977e-05, - "loss": 97.6128, - "step": 44090 - }, - { - "epoch": 0.35633772089302596, - "grad_norm": 1516.6883544921875, - "learning_rate": 4.088174570706011e-05, - "loss": 141.2855, - "step": 44100 - }, - { - "epoch": 0.3564185230973101, - "grad_norm": 3012.687255859375, - "learning_rate": 4.0876353484759224e-05, - "loss": 130.4492, - "step": 44110 - }, - { - "epoch": 0.3564993253015942, - "grad_norm": 997.9649047851562, - "learning_rate": 4.0870960024387596e-05, - "loss": 144.1817, - "step": 44120 - }, - { - "epoch": 0.35658012750587836, - "grad_norm": 478.57379150390625, - "learning_rate": 4.0865565326365835e-05, - "loss": 106.9292, - "step": 44130 - }, - { - "epoch": 0.3566609297101625, - "grad_norm": 1019.8602905273438, - "learning_rate": 4.0860169391114625e-05, - "loss": 84.3847, - "step": 44140 - }, - { - "epoch": 0.35674173191444664, - "grad_norm": 824.9840698242188, - "learning_rate": 4.085477221905474e-05, - "loss": 102.6047, - "step": 44150 - }, - { - "epoch": 0.35682253411873077, - "grad_norm": 786.1381225585938, - "learning_rate": 4.084937381060708e-05, - "loss": 162.6483, - "step": 44160 - }, - { - "epoch": 0.3569033363230149, - "grad_norm": 1434.0269775390625, - "learning_rate": 4.0843974166192614e-05, - "loss": 114.5614, - "step": 44170 - }, - { - "epoch": 0.35698413852729904, - "grad_norm": 722.53515625, - "learning_rate": 4.083857328623243e-05, - "loss": 107.1983, - "step": 44180 - }, - { - "epoch": 0.3570649407315832, - "grad_norm": 650.5493774414062, - "learning_rate": 4.083317117114768e-05, - "loss": 103.1519, - "step": 44190 - }, - { - "epoch": 0.3571457429358673, - "grad_norm": 1031.7781982421875, - "learning_rate": 4.082776782135964e-05, - "loss": 104.7165, - "step": 44200 - }, - { - "epoch": 0.3572265451401514, - "grad_norm": 452.5836486816406, - "learning_rate": 4.082236323728968e-05, - "loss": 100.6409, - "step": 44210 - }, - { - "epoch": 0.35730734734443553, - "grad_norm": 1525.192626953125, - "learning_rate": 4.0816957419359264e-05, - "loss": 103.1037, - "step": 44220 - }, - { - "epoch": 0.35738814954871967, - "grad_norm": 1433.3660888671875, - "learning_rate": 4.081155036798994e-05, - "loss": 117.2733, - "step": 44230 - }, - { - "epoch": 0.3574689517530038, - "grad_norm": 501.1810607910156, - "learning_rate": 4.080614208360336e-05, - "loss": 106.8299, - "step": 44240 - }, - { - "epoch": 0.35754975395728794, - "grad_norm": 1368.1141357421875, - "learning_rate": 4.080073256662127e-05, - "loss": 111.097, - "step": 44250 - }, - { - "epoch": 0.3576305561615721, - "grad_norm": 348.3703918457031, - "learning_rate": 4.079532181746553e-05, - "loss": 173.8365, - "step": 44260 - }, - { - "epoch": 0.3577113583658562, - "grad_norm": 1688.6666259765625, - "learning_rate": 4.078990983655807e-05, - "loss": 121.7604, - "step": 44270 - }, - { - "epoch": 0.35779216057014035, - "grad_norm": 889.8253784179688, - "learning_rate": 4.078449662432093e-05, - "loss": 105.5604, - "step": 44280 - }, - { - "epoch": 0.3578729627744245, - "grad_norm": 1174.5574951171875, - "learning_rate": 4.077908218117625e-05, - "loss": 115.9256, - "step": 44290 - }, - { - "epoch": 0.3579537649787086, - "grad_norm": 1489.6949462890625, - "learning_rate": 4.077366650754624e-05, - "loss": 107.8177, - "step": 44300 - }, - { - "epoch": 0.35803456718299276, - "grad_norm": 2550.353759765625, - "learning_rate": 4.0768249603853245e-05, - "loss": 135.8663, - "step": 44310 - }, - { - "epoch": 0.3581153693872769, - "grad_norm": 1357.8897705078125, - "learning_rate": 4.076283147051968e-05, - "loss": 109.1126, - "step": 44320 - }, - { - "epoch": 0.35819617159156103, - "grad_norm": 828.8513793945312, - "learning_rate": 4.075741210796806e-05, - "loss": 124.5464, - "step": 44330 - }, - { - "epoch": 0.35827697379584517, - "grad_norm": 607.1525268554688, - "learning_rate": 4.075199151662101e-05, - "loss": 141.2206, - "step": 44340 - }, - { - "epoch": 0.3583577760001293, - "grad_norm": 897.5729370117188, - "learning_rate": 4.074656969690122e-05, - "loss": 108.2055, - "step": 44350 - }, - { - "epoch": 0.35843857820441344, - "grad_norm": 1061.6270751953125, - "learning_rate": 4.0741146649231504e-05, - "loss": 101.5155, - "step": 44360 - }, - { - "epoch": 0.3585193804086976, - "grad_norm": 685.4229125976562, - "learning_rate": 4.0735722374034764e-05, - "loss": 142.5643, - "step": 44370 - }, - { - "epoch": 0.35860018261298165, - "grad_norm": 1104.4642333984375, - "learning_rate": 4.073029687173399e-05, - "loss": 108.319, - "step": 44380 - }, - { - "epoch": 0.3586809848172658, - "grad_norm": 1329.1788330078125, - "learning_rate": 4.0724870142752284e-05, - "loss": 131.9634, - "step": 44390 - }, - { - "epoch": 0.3587617870215499, - "grad_norm": 662.139404296875, - "learning_rate": 4.071944218751282e-05, - "loss": 105.585, - "step": 44400 - }, - { - "epoch": 0.35884258922583406, - "grad_norm": 4894.73388671875, - "learning_rate": 4.071401300643889e-05, - "loss": 144.4201, - "step": 44410 - }, - { - "epoch": 0.3589233914301182, - "grad_norm": 1231.5618896484375, - "learning_rate": 4.070858259995387e-05, - "loss": 119.1013, - "step": 44420 - }, - { - "epoch": 0.35900419363440234, - "grad_norm": 1240.66357421875, - "learning_rate": 4.0703150968481246e-05, - "loss": 142.4486, - "step": 44430 - }, - { - "epoch": 0.35908499583868647, - "grad_norm": 975.5540771484375, - "learning_rate": 4.069771811244457e-05, - "loss": 104.1362, - "step": 44440 - }, - { - "epoch": 0.3591657980429706, - "grad_norm": 701.2946166992188, - "learning_rate": 4.0692284032267516e-05, - "loss": 86.0585, - "step": 44450 - }, - { - "epoch": 0.35924660024725474, - "grad_norm": 933.6910400390625, - "learning_rate": 4.068684872837384e-05, - "loss": 119.5719, - "step": 44460 - }, - { - "epoch": 0.3593274024515389, - "grad_norm": 1057.3675537109375, - "learning_rate": 4.068141220118741e-05, - "loss": 111.1188, - "step": 44470 - }, - { - "epoch": 0.359408204655823, - "grad_norm": 1585.7354736328125, - "learning_rate": 4.067597445113216e-05, - "loss": 90.0756, - "step": 44480 - }, - { - "epoch": 0.35948900686010715, - "grad_norm": 962.670654296875, - "learning_rate": 4.067053547863215e-05, - "loss": 139.3422, - "step": 44490 - }, - { - "epoch": 0.3595698090643913, - "grad_norm": 1390.1109619140625, - "learning_rate": 4.066509528411152e-05, - "loss": 96.5031, - "step": 44500 - }, - { - "epoch": 0.3596506112686754, - "grad_norm": 1044.8287353515625, - "learning_rate": 4.0659653867994496e-05, - "loss": 125.2117, - "step": 44510 - }, - { - "epoch": 0.35973141347295956, - "grad_norm": 888.5601196289062, - "learning_rate": 4.065421123070543e-05, - "loss": 117.659, - "step": 44520 - }, - { - "epoch": 0.3598122156772437, - "grad_norm": 424.09112548828125, - "learning_rate": 4.064876737266874e-05, - "loss": 118.3877, - "step": 44530 - }, - { - "epoch": 0.35989301788152783, - "grad_norm": 657.3685913085938, - "learning_rate": 4.064332229430895e-05, - "loss": 84.5217, - "step": 44540 - }, - { - "epoch": 0.35997382008581197, - "grad_norm": 910.1583251953125, - "learning_rate": 4.063787599605068e-05, - "loss": 103.6948, - "step": 44550 - }, - { - "epoch": 0.36005462229009605, - "grad_norm": 861.8853759765625, - "learning_rate": 4.063242847831864e-05, - "loss": 92.8685, - "step": 44560 - }, - { - "epoch": 0.3601354244943802, - "grad_norm": 696.6035766601562, - "learning_rate": 4.062697974153764e-05, - "loss": 114.5127, - "step": 44570 - }, - { - "epoch": 0.3602162266986643, - "grad_norm": 1066.3929443359375, - "learning_rate": 4.062152978613258e-05, - "loss": 88.9502, - "step": 44580 - }, - { - "epoch": 0.36029702890294846, - "grad_norm": 918.521484375, - "learning_rate": 4.061607861252847e-05, - "loss": 98.2792, - "step": 44590 - }, - { - "epoch": 0.3603778311072326, - "grad_norm": 782.3241577148438, - "learning_rate": 4.0610626221150394e-05, - "loss": 112.1129, - "step": 44600 - }, - { - "epoch": 0.36045863331151673, - "grad_norm": 1005.10107421875, - "learning_rate": 4.060517261242355e-05, - "loss": 109.5379, - "step": 44610 - }, - { - "epoch": 0.36053943551580087, - "grad_norm": 705.2352905273438, - "learning_rate": 4.0599717786773204e-05, - "loss": 88.3341, - "step": 44620 - }, - { - "epoch": 0.360620237720085, - "grad_norm": 733.4053955078125, - "learning_rate": 4.059426174462476e-05, - "loss": 103.9209, - "step": 44630 - }, - { - "epoch": 0.36070103992436914, - "grad_norm": 1421.2803955078125, - "learning_rate": 4.058880448640367e-05, - "loss": 154.9132, - "step": 44640 - }, - { - "epoch": 0.3607818421286533, - "grad_norm": 3702.77197265625, - "learning_rate": 4.0583346012535506e-05, - "loss": 185.5, - "step": 44650 - }, - { - "epoch": 0.3608626443329374, - "grad_norm": 1145.00634765625, - "learning_rate": 4.057788632344593e-05, - "loss": 122.0838, - "step": 44660 - }, - { - "epoch": 0.36094344653722155, - "grad_norm": 746.0342407226562, - "learning_rate": 4.0572425419560714e-05, - "loss": 95.497, - "step": 44670 - }, - { - "epoch": 0.3610242487415057, - "grad_norm": 772.5443115234375, - "learning_rate": 4.0566963301305705e-05, - "loss": 125.7171, - "step": 44680 - }, - { - "epoch": 0.3611050509457898, - "grad_norm": 852.5970458984375, - "learning_rate": 4.056149996910683e-05, - "loss": 98.6204, - "step": 44690 - }, - { - "epoch": 0.36118585315007395, - "grad_norm": 944.2754516601562, - "learning_rate": 4.055603542339016e-05, - "loss": 97.6722, - "step": 44700 - }, - { - "epoch": 0.3612666553543581, - "grad_norm": 723.1927490234375, - "learning_rate": 4.055056966458182e-05, - "loss": 96.7065, - "step": 44710 - }, - { - "epoch": 0.3613474575586422, - "grad_norm": 858.5045776367188, - "learning_rate": 4.054510269310803e-05, - "loss": 129.2189, - "step": 44720 - }, - { - "epoch": 0.3614282597629263, - "grad_norm": 1604.6932373046875, - "learning_rate": 4.053963450939513e-05, - "loss": 118.1124, - "step": 44730 - }, - { - "epoch": 0.36150906196721044, - "grad_norm": 985.6595458984375, - "learning_rate": 4.053416511386954e-05, - "loss": 110.3825, - "step": 44740 - }, - { - "epoch": 0.3615898641714946, - "grad_norm": 742.3359375, - "learning_rate": 4.052869450695776e-05, - "loss": 99.7039, - "step": 44750 - }, - { - "epoch": 0.3616706663757787, - "grad_norm": 1539.3206787109375, - "learning_rate": 4.0523222689086414e-05, - "loss": 138.849, - "step": 44760 - }, - { - "epoch": 0.36175146858006285, - "grad_norm": 1063.43505859375, - "learning_rate": 4.05177496606822e-05, - "loss": 94.7029, - "step": 44770 - }, - { - "epoch": 0.361832270784347, - "grad_norm": 1687.0396728515625, - "learning_rate": 4.051227542217192e-05, - "loss": 135.7232, - "step": 44780 - }, - { - "epoch": 0.3619130729886311, - "grad_norm": 992.52734375, - "learning_rate": 4.0506799973982465e-05, - "loss": 77.5983, - "step": 44790 - }, - { - "epoch": 0.36199387519291526, - "grad_norm": 456.19305419921875, - "learning_rate": 4.050132331654082e-05, - "loss": 75.4506, - "step": 44800 - }, - { - "epoch": 0.3620746773971994, - "grad_norm": 1297.0634765625, - "learning_rate": 4.0495845450274064e-05, - "loss": 141.9966, - "step": 44810 - }, - { - "epoch": 0.36215547960148353, - "grad_norm": 1086.6385498046875, - "learning_rate": 4.0490366375609376e-05, - "loss": 135.3717, - "step": 44820 - }, - { - "epoch": 0.36223628180576767, - "grad_norm": 1013.94091796875, - "learning_rate": 4.048488609297402e-05, - "loss": 104.9543, - "step": 44830 - }, - { - "epoch": 0.3623170840100518, - "grad_norm": 1491.25341796875, - "learning_rate": 4.047940460279537e-05, - "loss": 115.93, - "step": 44840 - }, - { - "epoch": 0.36239788621433594, - "grad_norm": 1178.126220703125, - "learning_rate": 4.047392190550087e-05, - "loss": 108.6628, - "step": 44850 - }, - { - "epoch": 0.3624786884186201, - "grad_norm": 756.2459716796875, - "learning_rate": 4.0468438001518084e-05, - "loss": 111.006, - "step": 44860 - }, - { - "epoch": 0.3625594906229042, - "grad_norm": 1015.8656616210938, - "learning_rate": 4.0462952891274655e-05, - "loss": 96.9842, - "step": 44870 - }, - { - "epoch": 0.36264029282718835, - "grad_norm": 1833.7064208984375, - "learning_rate": 4.045746657519831e-05, - "loss": 115.097, - "step": 44880 - }, - { - "epoch": 0.3627210950314725, - "grad_norm": 1117.7265625, - "learning_rate": 4.0451979053716906e-05, - "loss": 97.3755, - "step": 44890 - }, - { - "epoch": 0.36280189723575657, - "grad_norm": 1326.3154296875, - "learning_rate": 4.044649032725836e-05, - "loss": 101.178, - "step": 44900 - }, - { - "epoch": 0.3628826994400407, - "grad_norm": 1074.0645751953125, - "learning_rate": 4.0441000396250694e-05, - "loss": 118.8476, - "step": 44910 - }, - { - "epoch": 0.36296350164432484, - "grad_norm": 2036.4703369140625, - "learning_rate": 4.043550926112203e-05, - "loss": 110.1735, - "step": 44920 - }, - { - "epoch": 0.363044303848609, - "grad_norm": 904.8440551757812, - "learning_rate": 4.043001692230056e-05, - "loss": 122.527, - "step": 44930 - }, - { - "epoch": 0.3631251060528931, - "grad_norm": 944.021728515625, - "learning_rate": 4.042452338021461e-05, - "loss": 123.9108, - "step": 44940 - }, - { - "epoch": 0.36320590825717725, - "grad_norm": 826.4424438476562, - "learning_rate": 4.041902863529256e-05, - "loss": 112.2311, - "step": 44950 - }, - { - "epoch": 0.3632867104614614, - "grad_norm": 926.697509765625, - "learning_rate": 4.041353268796293e-05, - "loss": 131.2814, - "step": 44960 - }, - { - "epoch": 0.3633675126657455, - "grad_norm": 1078.572021484375, - "learning_rate": 4.0408035538654264e-05, - "loss": 82.4051, - "step": 44970 - }, - { - "epoch": 0.36344831487002965, - "grad_norm": 899.0211791992188, - "learning_rate": 4.0402537187795274e-05, - "loss": 119.6712, - "step": 44980 - }, - { - "epoch": 0.3635291170743138, - "grad_norm": 2808.316162109375, - "learning_rate": 4.039703763581472e-05, - "loss": 128.4753, - "step": 44990 - }, - { - "epoch": 0.3636099192785979, - "grad_norm": 2648.02880859375, - "learning_rate": 4.039153688314145e-05, - "loss": 141.8718, - "step": 45000 - }, - { - "epoch": 0.36369072148288206, - "grad_norm": 1533.374267578125, - "learning_rate": 4.038603493020447e-05, - "loss": 134.919, - "step": 45010 - }, - { - "epoch": 0.3637715236871662, - "grad_norm": 827.4744873046875, - "learning_rate": 4.0380531777432794e-05, - "loss": 132.5408, - "step": 45020 - }, - { - "epoch": 0.36385232589145033, - "grad_norm": 1395.5308837890625, - "learning_rate": 4.037502742525559e-05, - "loss": 135.0988, - "step": 45030 - }, - { - "epoch": 0.36393312809573447, - "grad_norm": 1321.936767578125, - "learning_rate": 4.036952187410208e-05, - "loss": 100.1209, - "step": 45040 - }, - { - "epoch": 0.3640139303000186, - "grad_norm": 872.9254150390625, - "learning_rate": 4.036401512440161e-05, - "loss": 92.8261, - "step": 45050 - }, - { - "epoch": 0.36409473250430274, - "grad_norm": 698.3904418945312, - "learning_rate": 4.035850717658362e-05, - "loss": 139.8461, - "step": 45060 - }, - { - "epoch": 0.3641755347085868, - "grad_norm": 843.5409545898438, - "learning_rate": 4.0352998031077604e-05, - "loss": 115.4409, - "step": 45070 - }, - { - "epoch": 0.36425633691287096, - "grad_norm": 725.7376098632812, - "learning_rate": 4.0347487688313194e-05, - "loss": 118.1425, - "step": 45080 - }, - { - "epoch": 0.3643371391171551, - "grad_norm": 1042.483154296875, - "learning_rate": 4.0341976148720095e-05, - "loss": 128.5539, - "step": 45090 - }, - { - "epoch": 0.36441794132143923, - "grad_norm": 1181.8375244140625, - "learning_rate": 4.033646341272811e-05, - "loss": 95.8117, - "step": 45100 - }, - { - "epoch": 0.36449874352572337, - "grad_norm": 1426.7315673828125, - "learning_rate": 4.033094948076713e-05, - "loss": 107.0105, - "step": 45110 - }, - { - "epoch": 0.3645795457300075, - "grad_norm": 858.8604736328125, - "learning_rate": 4.032543435326714e-05, - "loss": 80.0289, - "step": 45120 - }, - { - "epoch": 0.36466034793429164, - "grad_norm": 929.8585205078125, - "learning_rate": 4.0319918030658225e-05, - "loss": 131.627, - "step": 45130 - }, - { - "epoch": 0.3647411501385758, - "grad_norm": 1022.883544921875, - "learning_rate": 4.031440051337056e-05, - "loss": 135.2449, - "step": 45140 - }, - { - "epoch": 0.3648219523428599, - "grad_norm": 1541.503662109375, - "learning_rate": 4.030888180183441e-05, - "loss": 116.274, - "step": 45150 - }, - { - "epoch": 0.36490275454714405, - "grad_norm": 1403.1505126953125, - "learning_rate": 4.030336189648014e-05, - "loss": 107.1634, - "step": 45160 - }, - { - "epoch": 0.3649835567514282, - "grad_norm": 1660.9656982421875, - "learning_rate": 4.02978407977382e-05, - "loss": 173.029, - "step": 45170 - }, - { - "epoch": 0.3650643589557123, - "grad_norm": 1563.0169677734375, - "learning_rate": 4.029231850603914e-05, - "loss": 138.9478, - "step": 45180 - }, - { - "epoch": 0.36514516115999646, - "grad_norm": 1061.5228271484375, - "learning_rate": 4.0286795021813594e-05, - "loss": 87.0904, - "step": 45190 - }, - { - "epoch": 0.3652259633642806, - "grad_norm": 2305.993896484375, - "learning_rate": 4.028127034549229e-05, - "loss": 140.5057, - "step": 45200 - }, - { - "epoch": 0.36530676556856473, - "grad_norm": 967.2716064453125, - "learning_rate": 4.0275744477506074e-05, - "loss": 118.8009, - "step": 45210 - }, - { - "epoch": 0.36538756777284886, - "grad_norm": 910.1617431640625, - "learning_rate": 4.027021741828584e-05, - "loss": 90.7656, - "step": 45220 - }, - { - "epoch": 0.365468369977133, - "grad_norm": 907.4196166992188, - "learning_rate": 4.026468916826262e-05, - "loss": 118.8105, - "step": 45230 - }, - { - "epoch": 0.3655491721814171, - "grad_norm": 1099.6981201171875, - "learning_rate": 4.0259159727867504e-05, - "loss": 124.3331, - "step": 45240 - }, - { - "epoch": 0.3656299743857012, - "grad_norm": 1357.2811279296875, - "learning_rate": 4.02536290975317e-05, - "loss": 108.5904, - "step": 45250 - }, - { - "epoch": 0.36571077658998535, - "grad_norm": 649.9727783203125, - "learning_rate": 4.024809727768648e-05, - "loss": 101.9834, - "step": 45260 - }, - { - "epoch": 0.3657915787942695, - "grad_norm": 636.0609130859375, - "learning_rate": 4.024256426876325e-05, - "loss": 102.3655, - "step": 45270 - }, - { - "epoch": 0.3658723809985536, - "grad_norm": 1231.4210205078125, - "learning_rate": 4.023703007119347e-05, - "loss": 109.1033, - "step": 45280 - }, - { - "epoch": 0.36595318320283776, - "grad_norm": 943.4900512695312, - "learning_rate": 4.023149468540871e-05, - "loss": 115.2099, - "step": 45290 - }, - { - "epoch": 0.3660339854071219, - "grad_norm": 1264.373779296875, - "learning_rate": 4.022595811184064e-05, - "loss": 92.9157, - "step": 45300 - }, - { - "epoch": 0.36611478761140603, - "grad_norm": 1296.948974609375, - "learning_rate": 4.022042035092101e-05, - "loss": 111.1479, - "step": 45310 - }, - { - "epoch": 0.36619558981569017, - "grad_norm": 932.8269653320312, - "learning_rate": 4.021488140308165e-05, - "loss": 131.4255, - "step": 45320 - }, - { - "epoch": 0.3662763920199743, - "grad_norm": 708.4837646484375, - "learning_rate": 4.020934126875452e-05, - "loss": 104.3602, - "step": 45330 - }, - { - "epoch": 0.36635719422425844, - "grad_norm": 1196.604736328125, - "learning_rate": 4.020379994837164e-05, - "loss": 106.6842, - "step": 45340 - }, - { - "epoch": 0.3664379964285426, - "grad_norm": 1495.8026123046875, - "learning_rate": 4.019825744236514e-05, - "loss": 115.4471, - "step": 45350 - }, - { - "epoch": 0.3665187986328267, - "grad_norm": 722.3506469726562, - "learning_rate": 4.019271375116722e-05, - "loss": 139.3934, - "step": 45360 - }, - { - "epoch": 0.36659960083711085, - "grad_norm": 800.5509643554688, - "learning_rate": 4.0187168875210216e-05, - "loss": 122.9944, - "step": 45370 - }, - { - "epoch": 0.366680403041395, - "grad_norm": 2166.52880859375, - "learning_rate": 4.0181622814926504e-05, - "loss": 139.8402, - "step": 45380 - }, - { - "epoch": 0.3667612052456791, - "grad_norm": 736.9072265625, - "learning_rate": 4.0176075570748596e-05, - "loss": 96.8592, - "step": 45390 - }, - { - "epoch": 0.36684200744996326, - "grad_norm": 734.638671875, - "learning_rate": 4.017052714310906e-05, - "loss": 90.1122, - "step": 45400 - }, - { - "epoch": 0.3669228096542474, - "grad_norm": 826.3543701171875, - "learning_rate": 4.0164977532440584e-05, - "loss": 90.6904, - "step": 45410 - }, - { - "epoch": 0.3670036118585315, - "grad_norm": 1084.279541015625, - "learning_rate": 4.015942673917593e-05, - "loss": 110.204, - "step": 45420 - }, - { - "epoch": 0.3670844140628156, - "grad_norm": 1672.404541015625, - "learning_rate": 4.0153874763747976e-05, - "loss": 118.2581, - "step": 45430 - }, - { - "epoch": 0.36716521626709975, - "grad_norm": 766.892578125, - "learning_rate": 4.0148321606589656e-05, - "loss": 102.5131, - "step": 45440 - }, - { - "epoch": 0.3672460184713839, - "grad_norm": 1021.5000610351562, - "learning_rate": 4.014276726813404e-05, - "loss": 66.053, - "step": 45450 - }, - { - "epoch": 0.367326820675668, - "grad_norm": 901.6074829101562, - "learning_rate": 4.013721174881425e-05, - "loss": 100.8502, - "step": 45460 - }, - { - "epoch": 0.36740762287995216, - "grad_norm": 1826.5863037109375, - "learning_rate": 4.0131655049063514e-05, - "loss": 107.0407, - "step": 45470 - }, - { - "epoch": 0.3674884250842363, - "grad_norm": 906.1949462890625, - "learning_rate": 4.012609716931517e-05, - "loss": 116.4952, - "step": 45480 - }, - { - "epoch": 0.36756922728852043, - "grad_norm": 832.67919921875, - "learning_rate": 4.012053811000262e-05, - "loss": 96.2739, - "step": 45490 - }, - { - "epoch": 0.36765002949280456, - "grad_norm": 678.4616088867188, - "learning_rate": 4.011497787155938e-05, - "loss": 123.4746, - "step": 45500 - }, - { - "epoch": 0.3677308316970887, - "grad_norm": 1137.18701171875, - "learning_rate": 4.010941645441904e-05, - "loss": 147.9518, - "step": 45510 - }, - { - "epoch": 0.36781163390137284, - "grad_norm": 1348.2738037109375, - "learning_rate": 4.01038538590153e-05, - "loss": 138.0168, - "step": 45520 - }, - { - "epoch": 0.367892436105657, - "grad_norm": 1145.2213134765625, - "learning_rate": 4.009829008578192e-05, - "loss": 139.9666, - "step": 45530 - }, - { - "epoch": 0.3679732383099411, - "grad_norm": 672.6651000976562, - "learning_rate": 4.009272513515281e-05, - "loss": 91.6633, - "step": 45540 - }, - { - "epoch": 0.36805404051422524, - "grad_norm": 905.5855102539062, - "learning_rate": 4.0087159007561916e-05, - "loss": 106.3264, - "step": 45550 - }, - { - "epoch": 0.3681348427185094, - "grad_norm": 1798.634765625, - "learning_rate": 4.00815917034433e-05, - "loss": 137.9609, - "step": 45560 - }, - { - "epoch": 0.3682156449227935, - "grad_norm": 811.5584716796875, - "learning_rate": 4.0076023223231105e-05, - "loss": 104.9507, - "step": 45570 - }, - { - "epoch": 0.36829644712707765, - "grad_norm": 907.8243408203125, - "learning_rate": 4.007045356735959e-05, - "loss": 80.0976, - "step": 45580 - }, - { - "epoch": 0.36837724933136173, - "grad_norm": 778.5613403320312, - "learning_rate": 4.006488273626307e-05, - "loss": 129.3595, - "step": 45590 - }, - { - "epoch": 0.36845805153564587, - "grad_norm": 870.7586669921875, - "learning_rate": 4.005931073037596e-05, - "loss": 86.3473, - "step": 45600 - }, - { - "epoch": 0.36853885373993, - "grad_norm": 1179.7633056640625, - "learning_rate": 4.0053737550132816e-05, - "loss": 109.7348, - "step": 45610 - }, - { - "epoch": 0.36861965594421414, - "grad_norm": 1121.81396484375, - "learning_rate": 4.0048163195968214e-05, - "loss": 141.7834, - "step": 45620 - }, - { - "epoch": 0.3687004581484983, - "grad_norm": 913.0496215820312, - "learning_rate": 4.004258766831686e-05, - "loss": 127.4815, - "step": 45630 - }, - { - "epoch": 0.3687812603527824, - "grad_norm": 1237.495361328125, - "learning_rate": 4.003701096761355e-05, - "loss": 118.3682, - "step": 45640 - }, - { - "epoch": 0.36886206255706655, - "grad_norm": 1130.521240234375, - "learning_rate": 4.003143309429317e-05, - "loss": 110.9911, - "step": 45650 - }, - { - "epoch": 0.3689428647613507, - "grad_norm": 1001.0836181640625, - "learning_rate": 4.0025854048790677e-05, - "loss": 106.1093, - "step": 45660 - }, - { - "epoch": 0.3690236669656348, - "grad_norm": 599.9057006835938, - "learning_rate": 4.0020273831541155e-05, - "loss": 108.987, - "step": 45670 - }, - { - "epoch": 0.36910446916991896, - "grad_norm": 1347.5233154296875, - "learning_rate": 4.001469244297975e-05, - "loss": 142.4018, - "step": 45680 - }, - { - "epoch": 0.3691852713742031, - "grad_norm": 601.7135620117188, - "learning_rate": 4.0009109883541715e-05, - "loss": 109.225, - "step": 45690 - }, - { - "epoch": 0.36926607357848723, - "grad_norm": 673.5532836914062, - "learning_rate": 4.000352615366239e-05, - "loss": 129.8118, - "step": 45700 - }, - { - "epoch": 0.36934687578277137, - "grad_norm": 717.1967163085938, - "learning_rate": 3.999794125377721e-05, - "loss": 80.12, - "step": 45710 - }, - { - "epoch": 0.3694276779870555, - "grad_norm": 905.3340454101562, - "learning_rate": 3.999235518432168e-05, - "loss": 108.1828, - "step": 45720 - }, - { - "epoch": 0.36950848019133964, - "grad_norm": 530.4837036132812, - "learning_rate": 3.998676794573142e-05, - "loss": 127.0283, - "step": 45730 - }, - { - "epoch": 0.3695892823956238, - "grad_norm": 1023.5977783203125, - "learning_rate": 3.9981179538442146e-05, - "loss": 111.7951, - "step": 45740 - }, - { - "epoch": 0.3696700845999079, - "grad_norm": 1385.0367431640625, - "learning_rate": 3.997558996288965e-05, - "loss": 119.8228, - "step": 45750 - }, - { - "epoch": 0.369750886804192, - "grad_norm": 1058.2545166015625, - "learning_rate": 3.996999921950981e-05, - "loss": 130.2118, - "step": 45760 - }, - { - "epoch": 0.3698316890084761, - "grad_norm": 1248.064208984375, - "learning_rate": 3.99644073087386e-05, - "loss": 137.5963, - "step": 45770 - }, - { - "epoch": 0.36991249121276026, - "grad_norm": 770.498779296875, - "learning_rate": 3.9958814231012115e-05, - "loss": 92.9137, - "step": 45780 - }, - { - "epoch": 0.3699932934170444, - "grad_norm": 880.776611328125, - "learning_rate": 3.995321998676648e-05, - "loss": 119.595, - "step": 45790 - }, - { - "epoch": 0.37007409562132854, - "grad_norm": 1090.2403564453125, - "learning_rate": 3.9947624576437975e-05, - "loss": 83.434, - "step": 45800 - }, - { - "epoch": 0.37015489782561267, - "grad_norm": 775.6031494140625, - "learning_rate": 3.994202800046292e-05, - "loss": 88.9717, - "step": 45810 - }, - { - "epoch": 0.3702357000298968, - "grad_norm": 771.652099609375, - "learning_rate": 3.993643025927776e-05, - "loss": 95.3491, - "step": 45820 - }, - { - "epoch": 0.37031650223418094, - "grad_norm": 1804.3773193359375, - "learning_rate": 3.9930831353319023e-05, - "loss": 90.8954, - "step": 45830 - }, - { - "epoch": 0.3703973044384651, - "grad_norm": 571.1260375976562, - "learning_rate": 3.99252312830233e-05, - "loss": 84.8551, - "step": 45840 - }, - { - "epoch": 0.3704781066427492, - "grad_norm": 1240.39404296875, - "learning_rate": 3.9919630048827314e-05, - "loss": 100.1945, - "step": 45850 - }, - { - "epoch": 0.37055890884703335, - "grad_norm": 952.48828125, - "learning_rate": 3.9914027651167866e-05, - "loss": 118.3895, - "step": 45860 - }, - { - "epoch": 0.3706397110513175, - "grad_norm": 1919.24072265625, - "learning_rate": 3.990842409048183e-05, - "loss": 130.4967, - "step": 45870 - }, - { - "epoch": 0.3707205132556016, - "grad_norm": 837.4407348632812, - "learning_rate": 3.990281936720619e-05, - "loss": 123.1333, - "step": 45880 - }, - { - "epoch": 0.37080131545988576, - "grad_norm": 505.752197265625, - "learning_rate": 3.9897213481778006e-05, - "loss": 91.5303, - "step": 45890 - }, - { - "epoch": 0.3708821176641699, - "grad_norm": 606.3091430664062, - "learning_rate": 3.989160643463445e-05, - "loss": 105.4998, - "step": 45900 - }, - { - "epoch": 0.37096291986845403, - "grad_norm": 758.8417358398438, - "learning_rate": 3.9885998226212764e-05, - "loss": 127.1263, - "step": 45910 - }, - { - "epoch": 0.37104372207273817, - "grad_norm": 765.279296875, - "learning_rate": 3.988038885695028e-05, - "loss": 120.4611, - "step": 45920 - }, - { - "epoch": 0.37112452427702225, - "grad_norm": 2315.994384765625, - "learning_rate": 3.987477832728444e-05, - "loss": 84.7662, - "step": 45930 - }, - { - "epoch": 0.3712053264813064, - "grad_norm": 1396.89208984375, - "learning_rate": 3.986916663765275e-05, - "loss": 104.4311, - "step": 45940 - }, - { - "epoch": 0.3712861286855905, - "grad_norm": 1148.427001953125, - "learning_rate": 3.9863553788492834e-05, - "loss": 93.0212, - "step": 45950 - }, - { - "epoch": 0.37136693088987466, - "grad_norm": 1189.8863525390625, - "learning_rate": 3.985793978024239e-05, - "loss": 99.6976, - "step": 45960 - }, - { - "epoch": 0.3714477330941588, - "grad_norm": 703.5571899414062, - "learning_rate": 3.985232461333921e-05, - "loss": 110.4056, - "step": 45970 - }, - { - "epoch": 0.37152853529844293, - "grad_norm": 603.1417236328125, - "learning_rate": 3.984670828822118e-05, - "loss": 92.1224, - "step": 45980 - }, - { - "epoch": 0.37160933750272707, - "grad_norm": 1940.2027587890625, - "learning_rate": 3.9841090805326264e-05, - "loss": 148.6085, - "step": 45990 - }, - { - "epoch": 0.3716901397070112, - "grad_norm": 540.3896484375, - "learning_rate": 3.983547216509254e-05, - "loss": 79.0965, - "step": 46000 - }, - { - "epoch": 0.37177094191129534, - "grad_norm": 784.1470947265625, - "learning_rate": 3.9829852367958144e-05, - "loss": 101.6512, - "step": 46010 - }, - { - "epoch": 0.3718517441155795, - "grad_norm": 656.9019165039062, - "learning_rate": 3.9824231414361324e-05, - "loss": 98.2294, - "step": 46020 - }, - { - "epoch": 0.3719325463198636, - "grad_norm": 808.6337890625, - "learning_rate": 3.9818609304740414e-05, - "loss": 105.8767, - "step": 46030 - }, - { - "epoch": 0.37201334852414775, - "grad_norm": 1407.4505615234375, - "learning_rate": 3.981298603953385e-05, - "loss": 127.9394, - "step": 46040 - }, - { - "epoch": 0.3720941507284319, - "grad_norm": 997.3860473632812, - "learning_rate": 3.980736161918013e-05, - "loss": 91.8471, - "step": 46050 - }, - { - "epoch": 0.372174952932716, - "grad_norm": 1035.155517578125, - "learning_rate": 3.980173604411786e-05, - "loss": 108.5042, - "step": 46060 - }, - { - "epoch": 0.37225575513700015, - "grad_norm": 746.734130859375, - "learning_rate": 3.979610931478574e-05, - "loss": 136.0266, - "step": 46070 - }, - { - "epoch": 0.3723365573412843, - "grad_norm": 708.9320678710938, - "learning_rate": 3.979048143162255e-05, - "loss": 115.9524, - "step": 46080 - }, - { - "epoch": 0.3724173595455684, - "grad_norm": 850.4466552734375, - "learning_rate": 3.9784852395067166e-05, - "loss": 96.2973, - "step": 46090 - }, - { - "epoch": 0.37249816174985256, - "grad_norm": 718.0418090820312, - "learning_rate": 3.977922220555855e-05, - "loss": 107.5985, - "step": 46100 - }, - { - "epoch": 0.37257896395413664, - "grad_norm": 905.7068481445312, - "learning_rate": 3.977359086353576e-05, - "loss": 139.6739, - "step": 46110 - }, - { - "epoch": 0.3726597661584208, - "grad_norm": 649.9468383789062, - "learning_rate": 3.976795836943793e-05, - "loss": 97.0027, - "step": 46120 - }, - { - "epoch": 0.3727405683627049, - "grad_norm": 960.3931274414062, - "learning_rate": 3.97623247237043e-05, - "loss": 105.8629, - "step": 46130 - }, - { - "epoch": 0.37282137056698905, - "grad_norm": 1264.8843994140625, - "learning_rate": 3.9756689926774196e-05, - "loss": 127.3272, - "step": 46140 - }, - { - "epoch": 0.3729021727712732, - "grad_norm": 1072.907470703125, - "learning_rate": 3.9751053979087035e-05, - "loss": 109.9439, - "step": 46150 - }, - { - "epoch": 0.3729829749755573, - "grad_norm": 842.3941650390625, - "learning_rate": 3.97454168810823e-05, - "loss": 120.5815, - "step": 46160 - }, - { - "epoch": 0.37306377717984146, - "grad_norm": 900.7999267578125, - "learning_rate": 3.973977863319961e-05, - "loss": 136.89, - "step": 46170 - }, - { - "epoch": 0.3731445793841256, - "grad_norm": 1484.1253662109375, - "learning_rate": 3.973413923587862e-05, - "loss": 87.9951, - "step": 46180 - }, - { - "epoch": 0.37322538158840973, - "grad_norm": 832.4470825195312, - "learning_rate": 3.9728498689559126e-05, - "loss": 128.0379, - "step": 46190 - }, - { - "epoch": 0.37330618379269387, - "grad_norm": 746.1774291992188, - "learning_rate": 3.9722856994680966e-05, - "loss": 117.0908, - "step": 46200 - }, - { - "epoch": 0.373386985996978, - "grad_norm": 1237.2900390625, - "learning_rate": 3.971721415168411e-05, - "loss": 108.8308, - "step": 46210 - }, - { - "epoch": 0.37346778820126214, - "grad_norm": 980.23779296875, - "learning_rate": 3.9711570161008596e-05, - "loss": 106.2167, - "step": 46220 - }, - { - "epoch": 0.3735485904055463, - "grad_norm": 686.0955200195312, - "learning_rate": 3.9705925023094554e-05, - "loss": 117.2731, - "step": 46230 - }, - { - "epoch": 0.3736293926098304, - "grad_norm": 1569.9000244140625, - "learning_rate": 3.970027873838219e-05, - "loss": 115.6727, - "step": 46240 - }, - { - "epoch": 0.37371019481411455, - "grad_norm": 769.4691162109375, - "learning_rate": 3.969463130731183e-05, - "loss": 104.5978, - "step": 46250 - }, - { - "epoch": 0.3737909970183987, - "grad_norm": 1436.328857421875, - "learning_rate": 3.9688982730323865e-05, - "loss": 144.5803, - "step": 46260 - }, - { - "epoch": 0.3738717992226828, - "grad_norm": 738.8613891601562, - "learning_rate": 3.968333300785878e-05, - "loss": 140.2385, - "step": 46270 - }, - { - "epoch": 0.3739526014269669, - "grad_norm": 965.0674438476562, - "learning_rate": 3.967768214035715e-05, - "loss": 101.6887, - "step": 46280 - }, - { - "epoch": 0.37403340363125104, - "grad_norm": 964.080078125, - "learning_rate": 3.967203012825965e-05, - "loss": 130.8488, - "step": 46290 - }, - { - "epoch": 0.3741142058355352, - "grad_norm": 752.899658203125, - "learning_rate": 3.966637697200703e-05, - "loss": 84.6486, - "step": 46300 - }, - { - "epoch": 0.3741950080398193, - "grad_norm": 832.9948120117188, - "learning_rate": 3.966072267204014e-05, - "loss": 86.7594, - "step": 46310 - }, - { - "epoch": 0.37427581024410345, - "grad_norm": 981.3856201171875, - "learning_rate": 3.965506722879991e-05, - "loss": 106.08, - "step": 46320 - }, - { - "epoch": 0.3743566124483876, - "grad_norm": 705.4466552734375, - "learning_rate": 3.964941064272736e-05, - "loss": 114.1771, - "step": 46330 - }, - { - "epoch": 0.3744374146526717, - "grad_norm": 1248.6888427734375, - "learning_rate": 3.964375291426361e-05, - "loss": 105.6377, - "step": 46340 - }, - { - "epoch": 0.37451821685695585, - "grad_norm": 525.76611328125, - "learning_rate": 3.963809404384985e-05, - "loss": 66.1932, - "step": 46350 - }, - { - "epoch": 0.37459901906124, - "grad_norm": 1109.52783203125, - "learning_rate": 3.963243403192739e-05, - "loss": 123.7088, - "step": 46360 - }, - { - "epoch": 0.3746798212655241, - "grad_norm": 734.3974609375, - "learning_rate": 3.962677287893758e-05, - "loss": 112.7313, - "step": 46370 - }, - { - "epoch": 0.37476062346980826, - "grad_norm": 755.6273193359375, - "learning_rate": 3.962111058532192e-05, - "loss": 95.6671, - "step": 46380 - }, - { - "epoch": 0.3748414256740924, - "grad_norm": 986.2940063476562, - "learning_rate": 3.9615447151521945e-05, - "loss": 112.6458, - "step": 46390 - }, - { - "epoch": 0.37492222787837653, - "grad_norm": 948.8670043945312, - "learning_rate": 3.960978257797931e-05, - "loss": 109.8484, - "step": 46400 - }, - { - "epoch": 0.37500303008266067, - "grad_norm": 861.470947265625, - "learning_rate": 3.960411686513574e-05, - "loss": 93.6887, - "step": 46410 - }, - { - "epoch": 0.3750838322869448, - "grad_norm": 1140.5228271484375, - "learning_rate": 3.9598450013433075e-05, - "loss": 99.4153, - "step": 46420 - }, - { - "epoch": 0.37516463449122894, - "grad_norm": 739.126953125, - "learning_rate": 3.959278202331322e-05, - "loss": 95.8196, - "step": 46430 - }, - { - "epoch": 0.3752454366955131, - "grad_norm": 899.4149780273438, - "learning_rate": 3.9587112895218184e-05, - "loss": 112.1879, - "step": 46440 - }, - { - "epoch": 0.37532623889979716, - "grad_norm": 1387.8011474609375, - "learning_rate": 3.958144262959004e-05, - "loss": 114.7851, - "step": 46450 - }, - { - "epoch": 0.3754070411040813, - "grad_norm": 373.55596923828125, - "learning_rate": 3.957577122687098e-05, - "loss": 109.7158, - "step": 46460 - }, - { - "epoch": 0.37548784330836543, - "grad_norm": 608.638916015625, - "learning_rate": 3.9570098687503274e-05, - "loss": 97.0395, - "step": 46470 - }, - { - "epoch": 0.37556864551264957, - "grad_norm": 863.2166748046875, - "learning_rate": 3.9564425011929265e-05, - "loss": 109.1623, - "step": 46480 - }, - { - "epoch": 0.3756494477169337, - "grad_norm": 960.890869140625, - "learning_rate": 3.955875020059141e-05, - "loss": 118.0388, - "step": 46490 - }, - { - "epoch": 0.37573024992121784, - "grad_norm": 1727.66650390625, - "learning_rate": 3.955307425393224e-05, - "loss": 110.9611, - "step": 46500 - }, - { - "epoch": 0.375811052125502, - "grad_norm": 898.266357421875, - "learning_rate": 3.954739717239437e-05, - "loss": 138.2874, - "step": 46510 - }, - { - "epoch": 0.3758918543297861, - "grad_norm": 1126.9788818359375, - "learning_rate": 3.954171895642052e-05, - "loss": 124.6798, - "step": 46520 - }, - { - "epoch": 0.37597265653407025, - "grad_norm": 1646.521728515625, - "learning_rate": 3.953603960645349e-05, - "loss": 109.147, - "step": 46530 - }, - { - "epoch": 0.3760534587383544, - "grad_norm": 872.5701293945312, - "learning_rate": 3.953035912293616e-05, - "loss": 140.6439, - "step": 46540 - }, - { - "epoch": 0.3761342609426385, - "grad_norm": 930.3521118164062, - "learning_rate": 3.95246775063115e-05, - "loss": 107.6195, - "step": 46550 - }, - { - "epoch": 0.37621506314692266, - "grad_norm": 827.6351928710938, - "learning_rate": 3.951899475702259e-05, - "loss": 120.3546, - "step": 46560 - }, - { - "epoch": 0.3762958653512068, - "grad_norm": 728.7617797851562, - "learning_rate": 3.951331087551257e-05, - "loss": 161.0274, - "step": 46570 - }, - { - "epoch": 0.37637666755549093, - "grad_norm": 416.2427062988281, - "learning_rate": 3.950762586222468e-05, - "loss": 87.998, - "step": 46580 - }, - { - "epoch": 0.37645746975977507, - "grad_norm": 979.9637451171875, - "learning_rate": 3.950193971760226e-05, - "loss": 111.0409, - "step": 46590 - }, - { - "epoch": 0.3765382719640592, - "grad_norm": 1036.21240234375, - "learning_rate": 3.9496252442088733e-05, - "loss": 104.176, - "step": 46600 - }, - { - "epoch": 0.37661907416834334, - "grad_norm": 609.0379028320312, - "learning_rate": 3.949056403612758e-05, - "loss": 115.3516, - "step": 46610 - }, - { - "epoch": 0.3766998763726274, - "grad_norm": 898.330810546875, - "learning_rate": 3.948487450016242e-05, - "loss": 158.0358, - "step": 46620 - }, - { - "epoch": 0.37678067857691155, - "grad_norm": 617.0128784179688, - "learning_rate": 3.947918383463691e-05, - "loss": 112.8965, - "step": 46630 - }, - { - "epoch": 0.3768614807811957, - "grad_norm": 852.5950927734375, - "learning_rate": 3.947349203999484e-05, - "loss": 88.0935, - "step": 46640 - }, - { - "epoch": 0.3769422829854798, - "grad_norm": 962.6497802734375, - "learning_rate": 3.946779911668006e-05, - "loss": 90.8999, - "step": 46650 - }, - { - "epoch": 0.37702308518976396, - "grad_norm": 938.2866821289062, - "learning_rate": 3.946210506513651e-05, - "loss": 96.7737, - "step": 46660 - }, - { - "epoch": 0.3771038873940481, - "grad_norm": 989.3900756835938, - "learning_rate": 3.945640988580824e-05, - "loss": 119.6526, - "step": 46670 - }, - { - "epoch": 0.37718468959833223, - "grad_norm": 1187.77880859375, - "learning_rate": 3.945071357913935e-05, - "loss": 93.2659, - "step": 46680 - }, - { - "epoch": 0.37726549180261637, - "grad_norm": 594.395263671875, - "learning_rate": 3.9445016145574074e-05, - "loss": 122.9094, - "step": 46690 - }, - { - "epoch": 0.3773462940069005, - "grad_norm": 1945.2366943359375, - "learning_rate": 3.943931758555669e-05, - "loss": 102.1613, - "step": 46700 - }, - { - "epoch": 0.37742709621118464, - "grad_norm": 688.2275390625, - "learning_rate": 3.9433617899531597e-05, - "loss": 105.5468, - "step": 46710 - }, - { - "epoch": 0.3775078984154688, - "grad_norm": 1169.5274658203125, - "learning_rate": 3.942791708794326e-05, - "loss": 97.8202, - "step": 46720 - }, - { - "epoch": 0.3775887006197529, - "grad_norm": 481.2340393066406, - "learning_rate": 3.942221515123623e-05, - "loss": 91.6597, - "step": 46730 - }, - { - "epoch": 0.37766950282403705, - "grad_norm": 460.6240539550781, - "learning_rate": 3.9416512089855184e-05, - "loss": 92.9448, - "step": 46740 - }, - { - "epoch": 0.3777503050283212, - "grad_norm": 1157.7467041015625, - "learning_rate": 3.941080790424484e-05, - "loss": 83.4063, - "step": 46750 - }, - { - "epoch": 0.3778311072326053, - "grad_norm": 535.7223510742188, - "learning_rate": 3.940510259485002e-05, - "loss": 108.5435, - "step": 46760 - }, - { - "epoch": 0.37791190943688946, - "grad_norm": 555.3814697265625, - "learning_rate": 3.939939616211563e-05, - "loss": 140.6824, - "step": 46770 - }, - { - "epoch": 0.3779927116411736, - "grad_norm": 980.8284301757812, - "learning_rate": 3.939368860648669e-05, - "loss": 120.5927, - "step": 46780 - }, - { - "epoch": 0.37807351384545773, - "grad_norm": 1431.340087890625, - "learning_rate": 3.938797992840828e-05, - "loss": 107.9531, - "step": 46790 - }, - { - "epoch": 0.3781543160497418, - "grad_norm": 328.6495056152344, - "learning_rate": 3.938227012832557e-05, - "loss": 97.3689, - "step": 46800 - }, - { - "epoch": 0.37823511825402595, - "grad_norm": 1474.0789794921875, - "learning_rate": 3.937655920668382e-05, - "loss": 115.5767, - "step": 46810 - }, - { - "epoch": 0.3783159204583101, - "grad_norm": 847.3453979492188, - "learning_rate": 3.937084716392838e-05, - "loss": 118.3871, - "step": 46820 - }, - { - "epoch": 0.3783967226625942, - "grad_norm": 1071.615234375, - "learning_rate": 3.936513400050469e-05, - "loss": 101.3431, - "step": 46830 - }, - { - "epoch": 0.37847752486687836, - "grad_norm": 759.0928955078125, - "learning_rate": 3.9359419716858274e-05, - "loss": 109.863, - "step": 46840 - }, - { - "epoch": 0.3785583270711625, - "grad_norm": 943.630615234375, - "learning_rate": 3.935370431343475e-05, - "loss": 100.4314, - "step": 46850 - }, - { - "epoch": 0.37863912927544663, - "grad_norm": 397.9150695800781, - "learning_rate": 3.93479877906798e-05, - "loss": 117.9238, - "step": 46860 - }, - { - "epoch": 0.37871993147973076, - "grad_norm": 845.817138671875, - "learning_rate": 3.934227014903922e-05, - "loss": 129.303, - "step": 46870 - }, - { - "epoch": 0.3788007336840149, - "grad_norm": 1049.787109375, - "learning_rate": 3.933655138895889e-05, - "loss": 98.6589, - "step": 46880 - }, - { - "epoch": 0.37888153588829904, - "grad_norm": 1184.728271484375, - "learning_rate": 3.9330831510884755e-05, - "loss": 124.6491, - "step": 46890 - }, - { - "epoch": 0.3789623380925832, - "grad_norm": 849.4947509765625, - "learning_rate": 3.932511051526289e-05, - "loss": 176.6078, - "step": 46900 - }, - { - "epoch": 0.3790431402968673, - "grad_norm": 1300.6776123046875, - "learning_rate": 3.93193884025394e-05, - "loss": 125.0592, - "step": 46910 - }, - { - "epoch": 0.37912394250115145, - "grad_norm": 1836.6527099609375, - "learning_rate": 3.931366517316052e-05, - "loss": 120.8265, - "step": 46920 - }, - { - "epoch": 0.3792047447054356, - "grad_norm": 1016.9020385742188, - "learning_rate": 3.9307940827572555e-05, - "loss": 154.6555, - "step": 46930 - }, - { - "epoch": 0.3792855469097197, - "grad_norm": 717.0535278320312, - "learning_rate": 3.930221536622191e-05, - "loss": 103.9901, - "step": 46940 - }, - { - "epoch": 0.37936634911400385, - "grad_norm": 472.31201171875, - "learning_rate": 3.9296488789555066e-05, - "loss": 95.0994, - "step": 46950 - }, - { - "epoch": 0.379447151318288, - "grad_norm": 1067.9345703125, - "learning_rate": 3.9290761098018585e-05, - "loss": 101.1833, - "step": 46960 - }, - { - "epoch": 0.37952795352257207, - "grad_norm": 673.5323486328125, - "learning_rate": 3.928503229205913e-05, - "loss": 97.822, - "step": 46970 - }, - { - "epoch": 0.3796087557268562, - "grad_norm": 1927.61669921875, - "learning_rate": 3.927930237212345e-05, - "loss": 125.9678, - "step": 46980 - }, - { - "epoch": 0.37968955793114034, - "grad_norm": 599.8441162109375, - "learning_rate": 3.927357133865836e-05, - "loss": 87.7857, - "step": 46990 - }, - { - "epoch": 0.3797703601354245, - "grad_norm": 1053.293701171875, - "learning_rate": 3.92678391921108e-05, - "loss": 121.0238, - "step": 47000 - }, - { - "epoch": 0.3798511623397086, - "grad_norm": 809.90478515625, - "learning_rate": 3.926210593292775e-05, - "loss": 81.9891, - "step": 47010 - }, - { - "epoch": 0.37993196454399275, - "grad_norm": 1108.720703125, - "learning_rate": 3.925637156155633e-05, - "loss": 114.6053, - "step": 47020 - }, - { - "epoch": 0.3800127667482769, - "grad_norm": 1081.187255859375, - "learning_rate": 3.925063607844369e-05, - "loss": 118.0476, - "step": 47030 - }, - { - "epoch": 0.380093568952561, - "grad_norm": 505.32012939453125, - "learning_rate": 3.924489948403711e-05, - "loss": 122.5231, - "step": 47040 - }, - { - "epoch": 0.38017437115684516, - "grad_norm": 1709.1114501953125, - "learning_rate": 3.923916177878394e-05, - "loss": 77.4536, - "step": 47050 - }, - { - "epoch": 0.3802551733611293, - "grad_norm": 890.4212646484375, - "learning_rate": 3.9233422963131616e-05, - "loss": 101.4886, - "step": 47060 - }, - { - "epoch": 0.38033597556541343, - "grad_norm": 1238.6334228515625, - "learning_rate": 3.922768303752766e-05, - "loss": 146.0303, - "step": 47070 - }, - { - "epoch": 0.38041677776969757, - "grad_norm": 1397.5245361328125, - "learning_rate": 3.922194200241969e-05, - "loss": 82.7807, - "step": 47080 - }, - { - "epoch": 0.3804975799739817, - "grad_norm": 782.902587890625, - "learning_rate": 3.92161998582554e-05, - "loss": 113.6578, - "step": 47090 - }, - { - "epoch": 0.38057838217826584, - "grad_norm": 645.85205078125, - "learning_rate": 3.9210456605482576e-05, - "loss": 113.206, - "step": 47100 - }, - { - "epoch": 0.38065918438255, - "grad_norm": 896.6744995117188, - "learning_rate": 3.9204712244549085e-05, - "loss": 127.7837, - "step": 47110 - }, - { - "epoch": 0.3807399865868341, - "grad_norm": 966.9940795898438, - "learning_rate": 3.919896677590289e-05, - "loss": 109.3714, - "step": 47120 - }, - { - "epoch": 0.38082078879111825, - "grad_norm": 1268.2430419921875, - "learning_rate": 3.9193220199992025e-05, - "loss": 133.6057, - "step": 47130 - }, - { - "epoch": 0.38090159099540233, - "grad_norm": 1536.5810546875, - "learning_rate": 3.918747251726463e-05, - "loss": 92.5105, - "step": 47140 - }, - { - "epoch": 0.38098239319968646, - "grad_norm": 800.822509765625, - "learning_rate": 3.9181723728168916e-05, - "loss": 113.4151, - "step": 47150 - }, - { - "epoch": 0.3810631954039706, - "grad_norm": 1093.1663818359375, - "learning_rate": 3.9175973833153186e-05, - "loss": 105.4528, - "step": 47160 - }, - { - "epoch": 0.38114399760825474, - "grad_norm": 1145.0089111328125, - "learning_rate": 3.9170222832665825e-05, - "loss": 98.0383, - "step": 47170 - }, - { - "epoch": 0.3812247998125389, - "grad_norm": 813.7997436523438, - "learning_rate": 3.9164470727155314e-05, - "loss": 134.5856, - "step": 47180 - }, - { - "epoch": 0.381305602016823, - "grad_norm": 1475.7596435546875, - "learning_rate": 3.9158717517070214e-05, - "loss": 115.2972, - "step": 47190 - }, - { - "epoch": 0.38138640422110714, - "grad_norm": 2016.873291015625, - "learning_rate": 3.915296320285917e-05, - "loss": 88.4795, - "step": 47200 - }, - { - "epoch": 0.3814672064253913, - "grad_norm": 881.2467651367188, - "learning_rate": 3.9147207784970914e-05, - "loss": 100.8392, - "step": 47210 - }, - { - "epoch": 0.3815480086296754, - "grad_norm": 1427.3326416015625, - "learning_rate": 3.914145126385426e-05, - "loss": 113.5537, - "step": 47220 - }, - { - "epoch": 0.38162881083395955, - "grad_norm": 1880.5430908203125, - "learning_rate": 3.9135693639958125e-05, - "loss": 101.0941, - "step": 47230 - }, - { - "epoch": 0.3817096130382437, - "grad_norm": 2073.45703125, - "learning_rate": 3.91299349137315e-05, - "loss": 121.9174, - "step": 47240 - }, - { - "epoch": 0.3817904152425278, - "grad_norm": 861.439208984375, - "learning_rate": 3.912417508562345e-05, - "loss": 107.3421, - "step": 47250 - }, - { - "epoch": 0.38187121744681196, - "grad_norm": 1439.5596923828125, - "learning_rate": 3.911841415608315e-05, - "loss": 131.3347, - "step": 47260 - }, - { - "epoch": 0.3819520196510961, - "grad_norm": 615.0855102539062, - "learning_rate": 3.9112652125559845e-05, - "loss": 65.4491, - "step": 47270 - }, - { - "epoch": 0.38203282185538023, - "grad_norm": 1054.773193359375, - "learning_rate": 3.9106888994502864e-05, - "loss": 126.6475, - "step": 47280 - }, - { - "epoch": 0.38211362405966437, - "grad_norm": 667.0865478515625, - "learning_rate": 3.910112476336164e-05, - "loss": 132.1218, - "step": 47290 - }, - { - "epoch": 0.3821944262639485, - "grad_norm": 702.2008666992188, - "learning_rate": 3.909535943258567e-05, - "loss": 119.4745, - "step": 47300 - }, - { - "epoch": 0.3822752284682326, - "grad_norm": 767.3807983398438, - "learning_rate": 3.9089593002624555e-05, - "loss": 119.6439, - "step": 47310 - }, - { - "epoch": 0.3823560306725167, - "grad_norm": 949.8206176757812, - "learning_rate": 3.908382547392796e-05, - "loss": 95.3152, - "step": 47320 - }, - { - "epoch": 0.38243683287680086, - "grad_norm": 1082.715576171875, - "learning_rate": 3.907805684694566e-05, - "loss": 114.4739, - "step": 47330 - }, - { - "epoch": 0.382517635081085, - "grad_norm": 1111.605712890625, - "learning_rate": 3.907228712212751e-05, - "loss": 119.8606, - "step": 47340 - }, - { - "epoch": 0.38259843728536913, - "grad_norm": 486.0558776855469, - "learning_rate": 3.906651629992342e-05, - "loss": 92.9736, - "step": 47350 - }, - { - "epoch": 0.38267923948965327, - "grad_norm": 943.7283325195312, - "learning_rate": 3.9060744380783435e-05, - "loss": 190.304, - "step": 47360 - }, - { - "epoch": 0.3827600416939374, - "grad_norm": 1046.2020263671875, - "learning_rate": 3.905497136515766e-05, - "loss": 129.0609, - "step": 47370 - }, - { - "epoch": 0.38284084389822154, - "grad_norm": 911.72119140625, - "learning_rate": 3.9049197253496264e-05, - "loss": 117.5524, - "step": 47380 - }, - { - "epoch": 0.3829216461025057, - "grad_norm": 1293.475830078125, - "learning_rate": 3.9043422046249544e-05, - "loss": 104.8004, - "step": 47390 - }, - { - "epoch": 0.3830024483067898, - "grad_norm": 1876.8126220703125, - "learning_rate": 3.903764574386786e-05, - "loss": 116.388, - "step": 47400 - }, - { - "epoch": 0.38308325051107395, - "grad_norm": 1358.7467041015625, - "learning_rate": 3.9031868346801656e-05, - "loss": 119.3034, - "step": 47410 - }, - { - "epoch": 0.3831640527153581, - "grad_norm": 1070.7967529296875, - "learning_rate": 3.902608985550147e-05, - "loss": 104.3396, - "step": 47420 - }, - { - "epoch": 0.3832448549196422, - "grad_norm": 386.59814453125, - "learning_rate": 3.902031027041793e-05, - "loss": 81.6748, - "step": 47430 - }, - { - "epoch": 0.38332565712392636, - "grad_norm": 614.6427001953125, - "learning_rate": 3.9014529592001705e-05, - "loss": 131.3421, - "step": 47440 - }, - { - "epoch": 0.3834064593282105, - "grad_norm": 1415.6502685546875, - "learning_rate": 3.900874782070362e-05, - "loss": 102.3757, - "step": 47450 - }, - { - "epoch": 0.3834872615324946, - "grad_norm": 884.9436645507812, - "learning_rate": 3.900296495697453e-05, - "loss": 148.1732, - "step": 47460 - }, - { - "epoch": 0.38356806373677876, - "grad_norm": 1199.4864501953125, - "learning_rate": 3.899718100126541e-05, - "loss": 131.5045, - "step": 47470 - }, - { - "epoch": 0.3836488659410629, - "grad_norm": 1135.696044921875, - "learning_rate": 3.899139595402729e-05, - "loss": 114.3465, - "step": 47480 - }, - { - "epoch": 0.383729668145347, - "grad_norm": 904.6636352539062, - "learning_rate": 3.898560981571131e-05, - "loss": 115.2415, - "step": 47490 - }, - { - "epoch": 0.3838104703496311, - "grad_norm": 1164.4591064453125, - "learning_rate": 3.897982258676867e-05, - "loss": 99.6709, - "step": 47500 - }, - { - "epoch": 0.38389127255391525, - "grad_norm": 1913.4586181640625, - "learning_rate": 3.897403426765069e-05, - "loss": 107.1727, - "step": 47510 - }, - { - "epoch": 0.3839720747581994, - "grad_norm": 727.5775146484375, - "learning_rate": 3.896824485880874e-05, - "loss": 128.9794, - "step": 47520 - }, - { - "epoch": 0.3840528769624835, - "grad_norm": 590.7037353515625, - "learning_rate": 3.896245436069431e-05, - "loss": 133.306, - "step": 47530 - }, - { - "epoch": 0.38413367916676766, - "grad_norm": 532.99462890625, - "learning_rate": 3.895666277375892e-05, - "loss": 107.8768, - "step": 47540 - }, - { - "epoch": 0.3842144813710518, - "grad_norm": 8771.3466796875, - "learning_rate": 3.895087009845425e-05, - "loss": 135.4057, - "step": 47550 - }, - { - "epoch": 0.38429528357533593, - "grad_norm": 1101.330322265625, - "learning_rate": 3.894507633523199e-05, - "loss": 105.8354, - "step": 47560 - }, - { - "epoch": 0.38437608577962007, - "grad_norm": 956.5853881835938, - "learning_rate": 3.8939281484543974e-05, - "loss": 128.3691, - "step": 47570 - }, - { - "epoch": 0.3844568879839042, - "grad_norm": 1051.7442626953125, - "learning_rate": 3.8933485546842094e-05, - "loss": 159.3747, - "step": 47580 - }, - { - "epoch": 0.38453769018818834, - "grad_norm": 699.3751220703125, - "learning_rate": 3.892768852257831e-05, - "loss": 80.1826, - "step": 47590 - }, - { - "epoch": 0.3846184923924725, - "grad_norm": 907.2599487304688, - "learning_rate": 3.8921890412204705e-05, - "loss": 142.5197, - "step": 47600 - }, - { - "epoch": 0.3846992945967566, - "grad_norm": 615.1507568359375, - "learning_rate": 3.891609121617342e-05, - "loss": 88.2244, - "step": 47610 - }, - { - "epoch": 0.38478009680104075, - "grad_norm": 753.0390625, - "learning_rate": 3.891029093493669e-05, - "loss": 89.3676, - "step": 47620 - }, - { - "epoch": 0.3848608990053249, - "grad_norm": 1601.640625, - "learning_rate": 3.890448956894682e-05, - "loss": 96.2632, - "step": 47630 - }, - { - "epoch": 0.384941701209609, - "grad_norm": 743.3245239257812, - "learning_rate": 3.889868711865624e-05, - "loss": 137.9839, - "step": 47640 - }, - { - "epoch": 0.38502250341389316, - "grad_norm": 830.4083251953125, - "learning_rate": 3.8892883584517415e-05, - "loss": 113.4498, - "step": 47650 - }, - { - "epoch": 0.38510330561817724, - "grad_norm": 2680.26171875, - "learning_rate": 3.8887078966982925e-05, - "loss": 95.9871, - "step": 47660 - }, - { - "epoch": 0.3851841078224614, - "grad_norm": 1318.6839599609375, - "learning_rate": 3.888127326650542e-05, - "loss": 122.0557, - "step": 47670 - }, - { - "epoch": 0.3852649100267455, - "grad_norm": 1037.0538330078125, - "learning_rate": 3.887546648353765e-05, - "loss": 104.5398, - "step": 47680 - }, - { - "epoch": 0.38534571223102965, - "grad_norm": 957.9652099609375, - "learning_rate": 3.886965861853244e-05, - "loss": 97.5719, - "step": 47690 - }, - { - "epoch": 0.3854265144353138, - "grad_norm": 843.6104125976562, - "learning_rate": 3.8863849671942685e-05, - "loss": 94.7224, - "step": 47700 - }, - { - "epoch": 0.3855073166395979, - "grad_norm": 944.6458740234375, - "learning_rate": 3.885803964422139e-05, - "loss": 107.4823, - "step": 47710 - }, - { - "epoch": 0.38558811884388206, - "grad_norm": 1156.75634765625, - "learning_rate": 3.885222853582163e-05, - "loss": 125.3506, - "step": 47720 - }, - { - "epoch": 0.3856689210481662, - "grad_norm": 1195.7545166015625, - "learning_rate": 3.884641634719657e-05, - "loss": 126.4053, - "step": 47730 - }, - { - "epoch": 0.3857497232524503, - "grad_norm": 740.35498046875, - "learning_rate": 3.8840603078799445e-05, - "loss": 104.9637, - "step": 47740 - }, - { - "epoch": 0.38583052545673446, - "grad_norm": 2012.3245849609375, - "learning_rate": 3.883478873108361e-05, - "loss": 84.3562, - "step": 47750 - }, - { - "epoch": 0.3859113276610186, - "grad_norm": 1098.08154296875, - "learning_rate": 3.8828973304502446e-05, - "loss": 117.8925, - "step": 47760 - }, - { - "epoch": 0.38599212986530274, - "grad_norm": 969.8004150390625, - "learning_rate": 3.8823156799509484e-05, - "loss": 111.5561, - "step": 47770 - }, - { - "epoch": 0.38607293206958687, - "grad_norm": 769.5950317382812, - "learning_rate": 3.881733921655829e-05, - "loss": 144.606, - "step": 47780 - }, - { - "epoch": 0.386153734273871, - "grad_norm": 922.6931762695312, - "learning_rate": 3.8811520556102535e-05, - "loss": 96.3772, - "step": 47790 - }, - { - "epoch": 0.38623453647815514, - "grad_norm": 826.7108764648438, - "learning_rate": 3.880570081859597e-05, - "loss": 142.5743, - "step": 47800 - }, - { - "epoch": 0.3863153386824393, - "grad_norm": 959.8422241210938, - "learning_rate": 3.8799880004492425e-05, - "loss": 95.6075, - "step": 47810 - }, - { - "epoch": 0.3863961408867234, - "grad_norm": 1383.726806640625, - "learning_rate": 3.879405811424583e-05, - "loss": 104.0035, - "step": 47820 - }, - { - "epoch": 0.3864769430910075, - "grad_norm": 1326.766845703125, - "learning_rate": 3.878823514831018e-05, - "loss": 102.0029, - "step": 47830 - }, - { - "epoch": 0.38655774529529163, - "grad_norm": 1339.0462646484375, - "learning_rate": 3.8782411107139564e-05, - "loss": 263.5607, - "step": 47840 - }, - { - "epoch": 0.38663854749957577, - "grad_norm": 519.9404907226562, - "learning_rate": 3.877658599118815e-05, - "loss": 117.9621, - "step": 47850 - }, - { - "epoch": 0.3867193497038599, - "grad_norm": 2115.120361328125, - "learning_rate": 3.87707598009102e-05, - "loss": 122.6829, - "step": 47860 - }, - { - "epoch": 0.38680015190814404, - "grad_norm": 783.2742309570312, - "learning_rate": 3.876493253676004e-05, - "loss": 108.2522, - "step": 47870 - }, - { - "epoch": 0.3868809541124282, - "grad_norm": 843.0562744140625, - "learning_rate": 3.875910419919211e-05, - "loss": 116.3621, - "step": 47880 - }, - { - "epoch": 0.3869617563167123, - "grad_norm": 435.97222900390625, - "learning_rate": 3.8753274788660894e-05, - "loss": 83.0896, - "step": 47890 - }, - { - "epoch": 0.38704255852099645, - "grad_norm": 983.96533203125, - "learning_rate": 3.8747444305621e-05, - "loss": 96.8669, - "step": 47900 - }, - { - "epoch": 0.3871233607252806, - "grad_norm": 964.2138671875, - "learning_rate": 3.874161275052709e-05, - "loss": 129.1014, - "step": 47910 - }, - { - "epoch": 0.3872041629295647, - "grad_norm": 1121.8646240234375, - "learning_rate": 3.873578012383393e-05, - "loss": 118.4479, - "step": 47920 - }, - { - "epoch": 0.38728496513384886, - "grad_norm": 654.1787109375, - "learning_rate": 3.8729946425996345e-05, - "loss": 97.7785, - "step": 47930 - }, - { - "epoch": 0.387365767338133, - "grad_norm": 602.5122680664062, - "learning_rate": 3.872411165746927e-05, - "loss": 99.3045, - "step": 47940 - }, - { - "epoch": 0.38744656954241713, - "grad_norm": 918.0730590820312, - "learning_rate": 3.8718275818707715e-05, - "loss": 114.7021, - "step": 47950 - }, - { - "epoch": 0.38752737174670127, - "grad_norm": 768.8223266601562, - "learning_rate": 3.871243891016676e-05, - "loss": 110.7579, - "step": 47960 - }, - { - "epoch": 0.3876081739509854, - "grad_norm": 578.4861450195312, - "learning_rate": 3.870660093230159e-05, - "loss": 91.4064, - "step": 47970 - }, - { - "epoch": 0.38768897615526954, - "grad_norm": 906.9380493164062, - "learning_rate": 3.870076188556746e-05, - "loss": 82.5018, - "step": 47980 - }, - { - "epoch": 0.3877697783595537, - "grad_norm": 1280.52001953125, - "learning_rate": 3.869492177041971e-05, - "loss": 145.7058, - "step": 47990 - }, - { - "epoch": 0.38785058056383775, - "grad_norm": 1550.6630859375, - "learning_rate": 3.868908058731376e-05, - "loss": 122.7622, - "step": 48000 - }, - { - "epoch": 0.3879313827681219, - "grad_norm": 1323.330078125, - "learning_rate": 3.868323833670512e-05, - "loss": 142.0948, - "step": 48010 - }, - { - "epoch": 0.388012184972406, - "grad_norm": 464.24200439453125, - "learning_rate": 3.867739501904938e-05, - "loss": 114.638, - "step": 48020 - }, - { - "epoch": 0.38809298717669016, - "grad_norm": 2301.8740234375, - "learning_rate": 3.8671550634802216e-05, - "loss": 99.95, - "step": 48030 - }, - { - "epoch": 0.3881737893809743, - "grad_norm": 880.1580200195312, - "learning_rate": 3.8665705184419386e-05, - "loss": 85.972, - "step": 48040 - }, - { - "epoch": 0.38825459158525844, - "grad_norm": 1140.240478515625, - "learning_rate": 3.865985866835673e-05, - "loss": 110.4823, - "step": 48050 - }, - { - "epoch": 0.38833539378954257, - "grad_norm": 815.4284057617188, - "learning_rate": 3.865401108707017e-05, - "loss": 86.9864, - "step": 48060 - }, - { - "epoch": 0.3884161959938267, - "grad_norm": 4303.68408203125, - "learning_rate": 3.864816244101571e-05, - "loss": 159.1667, - "step": 48070 - }, - { - "epoch": 0.38849699819811084, - "grad_norm": 982.6038208007812, - "learning_rate": 3.864231273064944e-05, - "loss": 115.1189, - "step": 48080 - }, - { - "epoch": 0.388577800402395, - "grad_norm": 1396.8936767578125, - "learning_rate": 3.863646195642754e-05, - "loss": 148.0539, - "step": 48090 - }, - { - "epoch": 0.3886586026066791, - "grad_norm": 789.3301391601562, - "learning_rate": 3.8630610118806254e-05, - "loss": 104.2562, - "step": 48100 - }, - { - "epoch": 0.38873940481096325, - "grad_norm": 839.6363525390625, - "learning_rate": 3.862475721824193e-05, - "loss": 101.9156, - "step": 48110 - }, - { - "epoch": 0.3888202070152474, - "grad_norm": 712.3304443359375, - "learning_rate": 3.861890325519098e-05, - "loss": 105.9098, - "step": 48120 - }, - { - "epoch": 0.3889010092195315, - "grad_norm": 1239.8448486328125, - "learning_rate": 3.861304823010991e-05, - "loss": 145.9392, - "step": 48130 - }, - { - "epoch": 0.38898181142381566, - "grad_norm": 1212.0184326171875, - "learning_rate": 3.8607192143455326e-05, - "loss": 81.8331, - "step": 48140 - }, - { - "epoch": 0.3890626136280998, - "grad_norm": 1430.6044921875, - "learning_rate": 3.860133499568387e-05, - "loss": 99.6007, - "step": 48150 - }, - { - "epoch": 0.38914341583238393, - "grad_norm": 1163.176025390625, - "learning_rate": 3.859547678725231e-05, - "loss": 141.2144, - "step": 48160 - }, - { - "epoch": 0.389224218036668, - "grad_norm": 1022.246337890625, - "learning_rate": 3.8589617518617485e-05, - "loss": 101.7664, - "step": 48170 - }, - { - "epoch": 0.38930502024095215, - "grad_norm": 893.2597045898438, - "learning_rate": 3.858375719023629e-05, - "loss": 101.0294, - "step": 48180 - }, - { - "epoch": 0.3893858224452363, - "grad_norm": 947.1692504882812, - "learning_rate": 3.857789580256575e-05, - "loss": 116.4051, - "step": 48190 - }, - { - "epoch": 0.3894666246495204, - "grad_norm": 844.1005859375, - "learning_rate": 3.8572033356062943e-05, - "loss": 127.1178, - "step": 48200 - }, - { - "epoch": 0.38954742685380456, - "grad_norm": 623.7581787109375, - "learning_rate": 3.856616985118502e-05, - "loss": 81.5322, - "step": 48210 - }, - { - "epoch": 0.3896282290580887, - "grad_norm": 1067.190673828125, - "learning_rate": 3.856030528838925e-05, - "loss": 87.3352, - "step": 48220 - }, - { - "epoch": 0.38970903126237283, - "grad_norm": 1010.7390747070312, - "learning_rate": 3.8554439668132946e-05, - "loss": 88.8513, - "step": 48230 - }, - { - "epoch": 0.38978983346665697, - "grad_norm": 704.7854614257812, - "learning_rate": 3.854857299087353e-05, - "loss": 70.9982, - "step": 48240 - }, - { - "epoch": 0.3898706356709411, - "grad_norm": 1288.3310546875, - "learning_rate": 3.85427052570685e-05, - "loss": 116.1578, - "step": 48250 - }, - { - "epoch": 0.38995143787522524, - "grad_norm": 744.3843994140625, - "learning_rate": 3.853683646717543e-05, - "loss": 85.2874, - "step": 48260 - }, - { - "epoch": 0.3900322400795094, - "grad_norm": 2380.376220703125, - "learning_rate": 3.8530966621651976e-05, - "loss": 127.602, - "step": 48270 - }, - { - "epoch": 0.3901130422837935, - "grad_norm": 741.3364868164062, - "learning_rate": 3.852509572095588e-05, - "loss": 95.108, - "step": 48280 - }, - { - "epoch": 0.39019384448807765, - "grad_norm": 1109.61474609375, - "learning_rate": 3.851922376554499e-05, - "loss": 97.8444, - "step": 48290 - }, - { - "epoch": 0.3902746466923618, - "grad_norm": 890.3165283203125, - "learning_rate": 3.851335075587718e-05, - "loss": 105.9365, - "step": 48300 - }, - { - "epoch": 0.3903554488966459, - "grad_norm": 1131.7193603515625, - "learning_rate": 3.850747669241046e-05, - "loss": 230.5025, - "step": 48310 - }, - { - "epoch": 0.39043625110093005, - "grad_norm": 900.428955078125, - "learning_rate": 3.85016015756029e-05, - "loss": 142.0994, - "step": 48320 - }, - { - "epoch": 0.3905170533052142, - "grad_norm": 926.1004028320312, - "learning_rate": 3.849572540591264e-05, - "loss": 101.7708, - "step": 48330 - }, - { - "epoch": 0.3905978555094983, - "grad_norm": 903.0505981445312, - "learning_rate": 3.848984818379793e-05, - "loss": 156.7163, - "step": 48340 - }, - { - "epoch": 0.3906786577137824, - "grad_norm": 893.8513793945312, - "learning_rate": 3.8483969909717087e-05, - "loss": 111.0976, - "step": 48350 - }, - { - "epoch": 0.39075945991806654, - "grad_norm": 977.8648071289062, - "learning_rate": 3.84780905841285e-05, - "loss": 117.1988, - "step": 48360 - }, - { - "epoch": 0.3908402621223507, - "grad_norm": 542.1943969726562, - "learning_rate": 3.847221020749067e-05, - "loss": 98.5145, - "step": 48370 - }, - { - "epoch": 0.3909210643266348, - "grad_norm": 930.1452026367188, - "learning_rate": 3.846632878026214e-05, - "loss": 117.262, - "step": 48380 - }, - { - "epoch": 0.39100186653091895, - "grad_norm": 1221.2763671875, - "learning_rate": 3.846044630290158e-05, - "loss": 104.202, - "step": 48390 - }, - { - "epoch": 0.3910826687352031, - "grad_norm": 852.5763549804688, - "learning_rate": 3.8454562775867684e-05, - "loss": 130.5889, - "step": 48400 - }, - { - "epoch": 0.3911634709394872, - "grad_norm": 776.0374755859375, - "learning_rate": 3.844867819961928e-05, - "loss": 107.53, - "step": 48410 - }, - { - "epoch": 0.39124427314377136, - "grad_norm": 831.7626953125, - "learning_rate": 3.8442792574615275e-05, - "loss": 78.9652, - "step": 48420 - }, - { - "epoch": 0.3913250753480555, - "grad_norm": 1111.0281982421875, - "learning_rate": 3.843690590131462e-05, - "loss": 139.3929, - "step": 48430 - }, - { - "epoch": 0.39140587755233963, - "grad_norm": 611.9830932617188, - "learning_rate": 3.843101818017637e-05, - "loss": 117.7313, - "step": 48440 - }, - { - "epoch": 0.39148667975662377, - "grad_norm": 1825.8955078125, - "learning_rate": 3.842512941165968e-05, - "loss": 136.6222, - "step": 48450 - }, - { - "epoch": 0.3915674819609079, - "grad_norm": 728.9234619140625, - "learning_rate": 3.841923959622375e-05, - "loss": 105.8978, - "step": 48460 - }, - { - "epoch": 0.39164828416519204, - "grad_norm": 1047.6793212890625, - "learning_rate": 3.841334873432789e-05, - "loss": 124.6825, - "step": 48470 - }, - { - "epoch": 0.3917290863694762, - "grad_norm": 471.7818908691406, - "learning_rate": 3.840745682643147e-05, - "loss": 108.5956, - "step": 48480 - }, - { - "epoch": 0.3918098885737603, - "grad_norm": 1083.5595703125, - "learning_rate": 3.8401563872993966e-05, - "loss": 96.6816, - "step": 48490 - }, - { - "epoch": 0.39189069077804445, - "grad_norm": 808.9841918945312, - "learning_rate": 3.8395669874474915e-05, - "loss": 86.5018, - "step": 48500 - }, - { - "epoch": 0.3919714929823286, - "grad_norm": 712.6146850585938, - "learning_rate": 3.838977483133395e-05, - "loss": 106.1014, - "step": 48510 - }, - { - "epoch": 0.39205229518661266, - "grad_norm": 1028.868896484375, - "learning_rate": 3.8383878744030776e-05, - "loss": 92.1688, - "step": 48520 - }, - { - "epoch": 0.3921330973908968, - "grad_norm": 872.17431640625, - "learning_rate": 3.837798161302518e-05, - "loss": 120.555, - "step": 48530 - }, - { - "epoch": 0.39221389959518094, - "grad_norm": 554.2619018554688, - "learning_rate": 3.837208343877703e-05, - "loss": 96.4218, - "step": 48540 - }, - { - "epoch": 0.3922947017994651, - "grad_norm": 1091.57568359375, - "learning_rate": 3.836618422174628e-05, - "loss": 117.0071, - "step": 48550 - }, - { - "epoch": 0.3923755040037492, - "grad_norm": 1027.942626953125, - "learning_rate": 3.836028396239297e-05, - "loss": 96.0251, - "step": 48560 - }, - { - "epoch": 0.39245630620803335, - "grad_norm": 1116.5589599609375, - "learning_rate": 3.835438266117721e-05, - "loss": 117.1661, - "step": 48570 - }, - { - "epoch": 0.3925371084123175, - "grad_norm": 776.1079711914062, - "learning_rate": 3.834848031855919e-05, - "loss": 112.4619, - "step": 48580 - }, - { - "epoch": 0.3926179106166016, - "grad_norm": 1050.098876953125, - "learning_rate": 3.8342576934999184e-05, - "loss": 128.2885, - "step": 48590 - }, - { - "epoch": 0.39269871282088575, - "grad_norm": 982.8236694335938, - "learning_rate": 3.8336672510957574e-05, - "loss": 118.9931, - "step": 48600 - }, - { - "epoch": 0.3927795150251699, - "grad_norm": 637.8267822265625, - "learning_rate": 3.8330767046894765e-05, - "loss": 80.6884, - "step": 48610 - }, - { - "epoch": 0.392860317229454, - "grad_norm": 1200.0357666015625, - "learning_rate": 3.83248605432713e-05, - "loss": 134.4803, - "step": 48620 - }, - { - "epoch": 0.39294111943373816, - "grad_norm": 542.851318359375, - "learning_rate": 3.831895300054777e-05, - "loss": 93.7368, - "step": 48630 - }, - { - "epoch": 0.3930219216380223, - "grad_norm": 1551.2066650390625, - "learning_rate": 3.8313044419184873e-05, - "loss": 132.1422, - "step": 48640 - }, - { - "epoch": 0.39310272384230643, - "grad_norm": 893.1751708984375, - "learning_rate": 3.830713479964335e-05, - "loss": 123.4514, - "step": 48650 - }, - { - "epoch": 0.39318352604659057, - "grad_norm": 1519.560302734375, - "learning_rate": 3.830122414238406e-05, - "loss": 105.9823, - "step": 48660 - }, - { - "epoch": 0.3932643282508747, - "grad_norm": 1759.6954345703125, - "learning_rate": 3.8295312447867924e-05, - "loss": 98.295, - "step": 48670 - }, - { - "epoch": 0.39334513045515884, - "grad_norm": 1172.5924072265625, - "learning_rate": 3.828939971655595e-05, - "loss": 101.0303, - "step": 48680 - }, - { - "epoch": 0.3934259326594429, - "grad_norm": 1086.4512939453125, - "learning_rate": 3.8283485948909224e-05, - "loss": 110.5973, - "step": 48690 - }, - { - "epoch": 0.39350673486372706, - "grad_norm": 1172.8258056640625, - "learning_rate": 3.827757114538892e-05, - "loss": 122.9468, - "step": 48700 - }, - { - "epoch": 0.3935875370680112, - "grad_norm": 2226.800048828125, - "learning_rate": 3.827165530645627e-05, - "loss": 152.8825, - "step": 48710 - }, - { - "epoch": 0.39366833927229533, - "grad_norm": 970.2445068359375, - "learning_rate": 3.826573843257262e-05, - "loss": 148.9703, - "step": 48720 - }, - { - "epoch": 0.39374914147657947, - "grad_norm": 665.7371826171875, - "learning_rate": 3.8259820524199374e-05, - "loss": 115.7845, - "step": 48730 - }, - { - "epoch": 0.3938299436808636, - "grad_norm": 1206.349853515625, - "learning_rate": 3.8253901581798016e-05, - "loss": 95.5076, - "step": 48740 - }, - { - "epoch": 0.39391074588514774, - "grad_norm": 1900.4669189453125, - "learning_rate": 3.824798160583012e-05, - "loss": 90.9281, - "step": 48750 - }, - { - "epoch": 0.3939915480894319, - "grad_norm": 987.526123046875, - "learning_rate": 3.824206059675736e-05, - "loss": 123.4036, - "step": 48760 - }, - { - "epoch": 0.394072350293716, - "grad_norm": 998.2064208984375, - "learning_rate": 3.8236138555041434e-05, - "loss": 132.5648, - "step": 48770 - }, - { - "epoch": 0.39415315249800015, - "grad_norm": 1024.06884765625, - "learning_rate": 3.823021548114417e-05, - "loss": 104.357, - "step": 48780 - }, - { - "epoch": 0.3942339547022843, - "grad_norm": 801.3633422851562, - "learning_rate": 3.8224291375527464e-05, - "loss": 91.556, - "step": 48790 - }, - { - "epoch": 0.3943147569065684, - "grad_norm": 849.6737060546875, - "learning_rate": 3.821836623865329e-05, - "loss": 128.0681, - "step": 48800 - }, - { - "epoch": 0.39439555911085256, - "grad_norm": 868.9702758789062, - "learning_rate": 3.821244007098371e-05, - "loss": 92.4779, - "step": 48810 - }, - { - "epoch": 0.3944763613151367, - "grad_norm": 1013.6299438476562, - "learning_rate": 3.820651287298084e-05, - "loss": 110.6801, - "step": 48820 - }, - { - "epoch": 0.39455716351942083, - "grad_norm": 1080.29541015625, - "learning_rate": 3.8200584645106904e-05, - "loss": 127.4536, - "step": 48830 - }, - { - "epoch": 0.39463796572370496, - "grad_norm": 1676.865966796875, - "learning_rate": 3.81946553878242e-05, - "loss": 119.7225, - "step": 48840 - }, - { - "epoch": 0.3947187679279891, - "grad_norm": 500.4206848144531, - "learning_rate": 3.8188725101595094e-05, - "loss": 86.0306, - "step": 48850 - }, - { - "epoch": 0.3947995701322732, - "grad_norm": 657.9845581054688, - "learning_rate": 3.8182793786882065e-05, - "loss": 114.1835, - "step": 48860 - }, - { - "epoch": 0.3948803723365573, - "grad_norm": 1220.0479736328125, - "learning_rate": 3.817686144414762e-05, - "loss": 143.6967, - "step": 48870 - }, - { - "epoch": 0.39496117454084145, - "grad_norm": 648.8759765625, - "learning_rate": 3.8170928073854396e-05, - "loss": 124.3825, - "step": 48880 - }, - { - "epoch": 0.3950419767451256, - "grad_norm": 833.6063232421875, - "learning_rate": 3.8164993676465074e-05, - "loss": 114.7155, - "step": 48890 - }, - { - "epoch": 0.3951227789494097, - "grad_norm": 450.5781555175781, - "learning_rate": 3.8159058252442446e-05, - "loss": 104.8298, - "step": 48900 - }, - { - "epoch": 0.39520358115369386, - "grad_norm": 839.3585815429688, - "learning_rate": 3.815312180224937e-05, - "loss": 110.0447, - "step": 48910 - }, - { - "epoch": 0.395284383357978, - "grad_norm": 1569.9244384765625, - "learning_rate": 3.814718432634876e-05, - "loss": 110.5237, - "step": 48920 - }, - { - "epoch": 0.39536518556226213, - "grad_norm": 1111.059326171875, - "learning_rate": 3.814124582520365e-05, - "loss": 98.4849, - "step": 48930 - }, - { - "epoch": 0.39544598776654627, - "grad_norm": 541.0776977539062, - "learning_rate": 3.813530629927714e-05, - "loss": 101.2151, - "step": 48940 - }, - { - "epoch": 0.3955267899708304, - "grad_norm": 1196.9219970703125, - "learning_rate": 3.81293657490324e-05, - "loss": 138.9273, - "step": 48950 - }, - { - "epoch": 0.39560759217511454, - "grad_norm": 358.8376159667969, - "learning_rate": 3.8123424174932674e-05, - "loss": 97.2147, - "step": 48960 - }, - { - "epoch": 0.3956883943793987, - "grad_norm": 1564.3173828125, - "learning_rate": 3.811748157744132e-05, - "loss": 151.6429, - "step": 48970 - }, - { - "epoch": 0.3957691965836828, - "grad_norm": 1674.610595703125, - "learning_rate": 3.811153795702174e-05, - "loss": 83.0258, - "step": 48980 - }, - { - "epoch": 0.39584999878796695, - "grad_norm": 1245.1431884765625, - "learning_rate": 3.810559331413743e-05, - "loss": 106.4154, - "step": 48990 - }, - { - "epoch": 0.3959308009922511, - "grad_norm": 557.8291625976562, - "learning_rate": 3.8099647649251986e-05, - "loss": 108.5222, - "step": 49000 - }, - { - "epoch": 0.3960116031965352, - "grad_norm": 920.4149780273438, - "learning_rate": 3.809370096282902e-05, - "loss": 90.0952, - "step": 49010 - }, - { - "epoch": 0.39609240540081936, - "grad_norm": 940.1210327148438, - "learning_rate": 3.808775325533232e-05, - "loss": 111.3996, - "step": 49020 - }, - { - "epoch": 0.3961732076051035, - "grad_norm": 808.3553466796875, - "learning_rate": 3.808180452722566e-05, - "loss": 117.7351, - "step": 49030 - }, - { - "epoch": 0.3962540098093876, - "grad_norm": 1477.4814453125, - "learning_rate": 3.8075854778972955e-05, - "loss": 131.9885, - "step": 49040 - }, - { - "epoch": 0.3963348120136717, - "grad_norm": 1384.9583740234375, - "learning_rate": 3.8069904011038165e-05, - "loss": 100.1604, - "step": 49050 - }, - { - "epoch": 0.39641561421795585, - "grad_norm": 1345.448486328125, - "learning_rate": 3.806395222388536e-05, - "loss": 94.8697, - "step": 49060 - }, - { - "epoch": 0.39649641642224, - "grad_norm": 879.7883911132812, - "learning_rate": 3.805799941797865e-05, - "loss": 101.6998, - "step": 49070 - }, - { - "epoch": 0.3965772186265241, - "grad_norm": 995.2506713867188, - "learning_rate": 3.805204559378227e-05, - "loss": 95.2373, - "step": 49080 - }, - { - "epoch": 0.39665802083080826, - "grad_norm": 915.7434692382812, - "learning_rate": 3.804609075176049e-05, - "loss": 119.5596, - "step": 49090 - }, - { - "epoch": 0.3967388230350924, - "grad_norm": 1140.800048828125, - "learning_rate": 3.80401348923777e-05, - "loss": 102.5866, - "step": 49100 - }, - { - "epoch": 0.39681962523937653, - "grad_norm": 839.4356689453125, - "learning_rate": 3.803417801609833e-05, - "loss": 80.816, - "step": 49110 - }, - { - "epoch": 0.39690042744366066, - "grad_norm": 1874.65087890625, - "learning_rate": 3.802822012338694e-05, - "loss": 100.6852, - "step": 49120 - }, - { - "epoch": 0.3969812296479448, - "grad_norm": 825.3193359375, - "learning_rate": 3.802226121470811e-05, - "loss": 112.7548, - "step": 49130 - }, - { - "epoch": 0.39706203185222894, - "grad_norm": 1071.1832275390625, - "learning_rate": 3.8016301290526534e-05, - "loss": 102.3265, - "step": 49140 - }, - { - "epoch": 0.39714283405651307, - "grad_norm": 883.2966918945312, - "learning_rate": 3.8010340351306997e-05, - "loss": 93.1575, - "step": 49150 - }, - { - "epoch": 0.3972236362607972, - "grad_norm": 785.39404296875, - "learning_rate": 3.8004378397514315e-05, - "loss": 101.9369, - "step": 49160 - }, - { - "epoch": 0.39730443846508134, - "grad_norm": 1457.160888671875, - "learning_rate": 3.7998415429613444e-05, - "loss": 87.9702, - "step": 49170 - }, - { - "epoch": 0.3973852406693655, - "grad_norm": 1371.6361083984375, - "learning_rate": 3.799245144806937e-05, - "loss": 113.4954, - "step": 49180 - }, - { - "epoch": 0.3974660428736496, - "grad_norm": 780.65966796875, - "learning_rate": 3.798648645334718e-05, - "loss": 85.4794, - "step": 49190 - }, - { - "epoch": 0.39754684507793375, - "grad_norm": 1088.294189453125, - "learning_rate": 3.798052044591204e-05, - "loss": 122.7251, - "step": 49200 - }, - { - "epoch": 0.39762764728221783, - "grad_norm": 846.1474609375, - "learning_rate": 3.797455342622919e-05, - "loss": 104.8749, - "step": 49210 - }, - { - "epoch": 0.39770844948650197, - "grad_norm": 1245.458984375, - "learning_rate": 3.796858539476394e-05, - "loss": 95.8945, - "step": 49220 - }, - { - "epoch": 0.3977892516907861, - "grad_norm": 1455.3116455078125, - "learning_rate": 3.796261635198171e-05, - "loss": 159.7943, - "step": 49230 - }, - { - "epoch": 0.39787005389507024, - "grad_norm": 1094.027099609375, - "learning_rate": 3.7956646298347956e-05, - "loss": 121.7488, - "step": 49240 - }, - { - "epoch": 0.3979508560993544, - "grad_norm": 630.1639404296875, - "learning_rate": 3.795067523432826e-05, - "loss": 87.5995, - "step": 49250 - }, - { - "epoch": 0.3980316583036385, - "grad_norm": 1141.268798828125, - "learning_rate": 3.7944703160388234e-05, - "loss": 90.0884, - "step": 49260 - }, - { - "epoch": 0.39811246050792265, - "grad_norm": 1918.583251953125, - "learning_rate": 3.793873007699361e-05, - "loss": 115.6504, - "step": 49270 - }, - { - "epoch": 0.3981932627122068, - "grad_norm": 651.5230102539062, - "learning_rate": 3.793275598461017e-05, - "loss": 104.0089, - "step": 49280 - }, - { - "epoch": 0.3982740649164909, - "grad_norm": 1044.0704345703125, - "learning_rate": 3.792678088370379e-05, - "loss": 91.0117, - "step": 49290 - }, - { - "epoch": 0.39835486712077506, - "grad_norm": 902.1245727539062, - "learning_rate": 3.792080477474043e-05, - "loss": 114.9952, - "step": 49300 - }, - { - "epoch": 0.3984356693250592, - "grad_norm": 628.331787109375, - "learning_rate": 3.7914827658186103e-05, - "loss": 104.1434, - "step": 49310 - }, - { - "epoch": 0.39851647152934333, - "grad_norm": 1023.20947265625, - "learning_rate": 3.790884953450692e-05, - "loss": 97.8669, - "step": 49320 - }, - { - "epoch": 0.39859727373362747, - "grad_norm": 890.8483276367188, - "learning_rate": 3.790287040416908e-05, - "loss": 99.0271, - "step": 49330 - }, - { - "epoch": 0.3986780759379116, - "grad_norm": 544.1021118164062, - "learning_rate": 3.789689026763883e-05, - "loss": 89.6945, - "step": 49340 - }, - { - "epoch": 0.39875887814219574, - "grad_norm": 646.9542846679688, - "learning_rate": 3.789090912538253e-05, - "loss": 103.924, - "step": 49350 - }, - { - "epoch": 0.3988396803464799, - "grad_norm": 1023.1674194335938, - "learning_rate": 3.788492697786658e-05, - "loss": 154.0046, - "step": 49360 - }, - { - "epoch": 0.398920482550764, - "grad_norm": 978.5900268554688, - "learning_rate": 3.7878943825557516e-05, - "loss": 85.1029, - "step": 49370 - }, - { - "epoch": 0.3990012847550481, - "grad_norm": 1045.2568359375, - "learning_rate": 3.7872959668921884e-05, - "loss": 117.258, - "step": 49380 - }, - { - "epoch": 0.3990820869593322, - "grad_norm": 491.3598937988281, - "learning_rate": 3.7866974508426354e-05, - "loss": 71.8844, - "step": 49390 - }, - { - "epoch": 0.39916288916361636, - "grad_norm": 931.4103393554688, - "learning_rate": 3.786098834453766e-05, - "loss": 130.5435, - "step": 49400 - }, - { - "epoch": 0.3992436913679005, - "grad_norm": 1376.8134765625, - "learning_rate": 3.7855001177722615e-05, - "loss": 98.7885, - "step": 49410 - }, - { - "epoch": 0.39932449357218464, - "grad_norm": 1824.26171875, - "learning_rate": 3.7849013008448115e-05, - "loss": 152.0401, - "step": 49420 - }, - { - "epoch": 0.39940529577646877, - "grad_norm": 854.7713012695312, - "learning_rate": 3.784302383718113e-05, - "loss": 110.5708, - "step": 49430 - }, - { - "epoch": 0.3994860979807529, - "grad_norm": 738.2937622070312, - "learning_rate": 3.783703366438868e-05, - "loss": 100.0844, - "step": 49440 - }, - { - "epoch": 0.39956690018503704, - "grad_norm": 891.9318237304688, - "learning_rate": 3.783104249053793e-05, - "loss": 100.2332, - "step": 49450 - }, - { - "epoch": 0.3996477023893212, - "grad_norm": 1001.6572875976562, - "learning_rate": 3.782505031609607e-05, - "loss": 113.3177, - "step": 49460 - }, - { - "epoch": 0.3997285045936053, - "grad_norm": 995.598388671875, - "learning_rate": 3.781905714153037e-05, - "loss": 122.9727, - "step": 49470 - }, - { - "epoch": 0.39980930679788945, - "grad_norm": 1513.737060546875, - "learning_rate": 3.78130629673082e-05, - "loss": 93.6239, - "step": 49480 - }, - { - "epoch": 0.3998901090021736, - "grad_norm": 1263.060302734375, - "learning_rate": 3.780706779389701e-05, - "loss": 148.5079, - "step": 49490 - }, - { - "epoch": 0.3999709112064577, - "grad_norm": 1503.219970703125, - "learning_rate": 3.780107162176429e-05, - "loss": 113.78, - "step": 49500 - }, - { - "epoch": 0.40005171341074186, - "grad_norm": 2012.9635009765625, - "learning_rate": 3.779507445137766e-05, - "loss": 132.4036, - "step": 49510 - }, - { - "epoch": 0.400132515615026, - "grad_norm": 981.3453979492188, - "learning_rate": 3.778907628320477e-05, - "loss": 115.5002, - "step": 49520 - }, - { - "epoch": 0.40021331781931013, - "grad_norm": 951.5513305664062, - "learning_rate": 3.7783077117713386e-05, - "loss": 98.4825, - "step": 49530 - }, - { - "epoch": 0.40029412002359427, - "grad_norm": 872.0963745117188, - "learning_rate": 3.777707695537133e-05, - "loss": 97.9821, - "step": 49540 - }, - { - "epoch": 0.40037492222787835, - "grad_norm": 679.4779052734375, - "learning_rate": 3.77710757966465e-05, - "loss": 104.1641, - "step": 49550 - }, - { - "epoch": 0.4004557244321625, - "grad_norm": 1317.8074951171875, - "learning_rate": 3.776507364200689e-05, - "loss": 111.4042, - "step": 49560 - }, - { - "epoch": 0.4005365266364466, - "grad_norm": 1315.1026611328125, - "learning_rate": 3.7759070491920544e-05, - "loss": 102.448, - "step": 49570 - }, - { - "epoch": 0.40061732884073076, - "grad_norm": 851.5083618164062, - "learning_rate": 3.775306634685562e-05, - "loss": 121.0183, - "step": 49580 - }, - { - "epoch": 0.4006981310450149, - "grad_norm": 1336.6109619140625, - "learning_rate": 3.774706120728032e-05, - "loss": 115.1073, - "step": 49590 - }, - { - "epoch": 0.40077893324929903, - "grad_norm": 946.0890502929688, - "learning_rate": 3.7741055073662946e-05, - "loss": 102.1051, - "step": 49600 - }, - { - "epoch": 0.40085973545358317, - "grad_norm": 1445.52685546875, - "learning_rate": 3.773504794647187e-05, - "loss": 122.2198, - "step": 49610 - }, - { - "epoch": 0.4009405376578673, - "grad_norm": 615.635009765625, - "learning_rate": 3.772903982617552e-05, - "loss": 84.8195, - "step": 49620 - }, - { - "epoch": 0.40102133986215144, - "grad_norm": 1791.3621826171875, - "learning_rate": 3.772303071324244e-05, - "loss": 95.5332, - "step": 49630 - }, - { - "epoch": 0.4011021420664356, - "grad_norm": 771.7538452148438, - "learning_rate": 3.771702060814123e-05, - "loss": 148.4292, - "step": 49640 - }, - { - "epoch": 0.4011829442707197, - "grad_norm": 1151.8759765625, - "learning_rate": 3.771100951134057e-05, - "loss": 119.4643, - "step": 49650 - }, - { - "epoch": 0.40126374647500385, - "grad_norm": 880.4342651367188, - "learning_rate": 3.770499742330922e-05, - "loss": 108.4433, - "step": 49660 - }, - { - "epoch": 0.401344548679288, - "grad_norm": 1182.04296875, - "learning_rate": 3.7698984344515997e-05, - "loss": 123.5943, - "step": 49670 - }, - { - "epoch": 0.4014253508835721, - "grad_norm": 1287.990478515625, - "learning_rate": 3.769297027542985e-05, - "loss": 102.9006, - "step": 49680 - }, - { - "epoch": 0.40150615308785625, - "grad_norm": 880.35546875, - "learning_rate": 3.768695521651973e-05, - "loss": 93.3549, - "step": 49690 - }, - { - "epoch": 0.4015869552921404, - "grad_norm": 699.7244262695312, - "learning_rate": 3.7680939168254733e-05, - "loss": 112.1545, - "step": 49700 - }, - { - "epoch": 0.4016677574964245, - "grad_norm": 833.00732421875, - "learning_rate": 3.767492213110397e-05, - "loss": 127.5782, - "step": 49710 - }, - { - "epoch": 0.40174855970070866, - "grad_norm": 916.9799194335938, - "learning_rate": 3.7668904105536706e-05, - "loss": 103.1526, - "step": 49720 - }, - { - "epoch": 0.40182936190499274, - "grad_norm": 8585.0244140625, - "learning_rate": 3.76628850920222e-05, - "loss": 120.5418, - "step": 49730 - }, - { - "epoch": 0.4019101641092769, - "grad_norm": 662.9511108398438, - "learning_rate": 3.765686509102985e-05, - "loss": 92.2522, - "step": 49740 - }, - { - "epoch": 0.401990966313561, - "grad_norm": 1336.999755859375, - "learning_rate": 3.765084410302909e-05, - "loss": 117.1887, - "step": 49750 - }, - { - "epoch": 0.40207176851784515, - "grad_norm": 1059.5394287109375, - "learning_rate": 3.764482212848948e-05, - "loss": 88.6488, - "step": 49760 - }, - { - "epoch": 0.4021525707221293, - "grad_norm": 1180.2091064453125, - "learning_rate": 3.763879916788059e-05, - "loss": 121.7206, - "step": 49770 - }, - { - "epoch": 0.4022333729264134, - "grad_norm": 1307.3485107421875, - "learning_rate": 3.7632775221672115e-05, - "loss": 110.8538, - "step": 49780 - }, - { - "epoch": 0.40231417513069756, - "grad_norm": 1006.7772216796875, - "learning_rate": 3.7626750290333824e-05, - "loss": 107.7702, - "step": 49790 - }, - { - "epoch": 0.4023949773349817, - "grad_norm": 1202.384521484375, - "learning_rate": 3.762072437433555e-05, - "loss": 98.8179, - "step": 49800 - }, - { - "epoch": 0.40247577953926583, - "grad_norm": 1161.1400146484375, - "learning_rate": 3.76146974741472e-05, - "loss": 77.7882, - "step": 49810 - }, - { - "epoch": 0.40255658174354997, - "grad_norm": 1637.2685546875, - "learning_rate": 3.760866959023877e-05, - "loss": 113.8575, - "step": 49820 - }, - { - "epoch": 0.4026373839478341, - "grad_norm": 1057.4425048828125, - "learning_rate": 3.7602640723080315e-05, - "loss": 93.3096, - "step": 49830 - }, - { - "epoch": 0.40271818615211824, - "grad_norm": 2315.655517578125, - "learning_rate": 3.759661087314199e-05, - "loss": 103.6515, - "step": 49840 - }, - { - "epoch": 0.4027989883564024, - "grad_norm": 1005.734619140625, - "learning_rate": 3.759058004089402e-05, - "loss": 98.4473, - "step": 49850 - }, - { - "epoch": 0.4028797905606865, - "grad_norm": 1052.2489013671875, - "learning_rate": 3.7584548226806696e-05, - "loss": 87.7145, - "step": 49860 - }, - { - "epoch": 0.40296059276497065, - "grad_norm": 688.6419677734375, - "learning_rate": 3.7578515431350384e-05, - "loss": 129.4379, - "step": 49870 - }, - { - "epoch": 0.4030413949692548, - "grad_norm": 1881.64306640625, - "learning_rate": 3.757248165499555e-05, - "loss": 107.3066, - "step": 49880 - }, - { - "epoch": 0.4031221971735389, - "grad_norm": 610.1168212890625, - "learning_rate": 3.75664468982127e-05, - "loss": 107.2274, - "step": 49890 - }, - { - "epoch": 0.403202999377823, - "grad_norm": 1466.4088134765625, - "learning_rate": 3.7560411161472456e-05, - "loss": 134.4363, - "step": 49900 - }, - { - "epoch": 0.40328380158210714, - "grad_norm": 875.1807250976562, - "learning_rate": 3.7554374445245474e-05, - "loss": 118.8458, - "step": 49910 - }, - { - "epoch": 0.4033646037863913, - "grad_norm": 970.3364868164062, - "learning_rate": 3.7548336750002544e-05, - "loss": 94.0335, - "step": 49920 - }, - { - "epoch": 0.4034454059906754, - "grad_norm": 623.0574951171875, - "learning_rate": 3.754229807621446e-05, - "loss": 88.5621, - "step": 49930 - }, - { - "epoch": 0.40352620819495955, - "grad_norm": 847.7864379882812, - "learning_rate": 3.753625842435216e-05, - "loss": 96.5243, - "step": 49940 - }, - { - "epoch": 0.4036070103992437, - "grad_norm": 981.3753051757812, - "learning_rate": 3.7530217794886606e-05, - "loss": 116.7011, - "step": 49950 - }, - { - "epoch": 0.4036878126035278, - "grad_norm": 807.5214233398438, - "learning_rate": 3.752417618828888e-05, - "loss": 126.4324, - "step": 49960 - }, - { - "epoch": 0.40376861480781195, - "grad_norm": 1647.3134765625, - "learning_rate": 3.75181336050301e-05, - "loss": 145.2096, - "step": 49970 - }, - { - "epoch": 0.4038494170120961, - "grad_norm": 957.318115234375, - "learning_rate": 3.751209004558149e-05, - "loss": 97.6435, - "step": 49980 - }, - { - "epoch": 0.4039302192163802, - "grad_norm": 1030.7269287109375, - "learning_rate": 3.7506045510414335e-05, - "loss": 91.9053, - "step": 49990 - }, - { - "epoch": 0.40401102142066436, - "grad_norm": 1244.4815673828125, - "learning_rate": 3.7500000000000003e-05, - "loss": 109.372, - "step": 50000 - }, - { - "epoch": 0.4040918236249485, - "grad_norm": 661.6587524414062, - "learning_rate": 3.749395351480993e-05, - "loss": 78.7978, - "step": 50010 - }, - { - "epoch": 0.40417262582923263, - "grad_norm": 837.1110229492188, - "learning_rate": 3.748790605531565e-05, - "loss": 117.6233, - "step": 50020 - }, - { - "epoch": 0.40425342803351677, - "grad_norm": 2745.869384765625, - "learning_rate": 3.748185762198873e-05, - "loss": 169.9265, - "step": 50030 - }, - { - "epoch": 0.4043342302378009, - "grad_norm": 940.3863525390625, - "learning_rate": 3.7475808215300854e-05, - "loss": 81.8296, - "step": 50040 - }, - { - "epoch": 0.40441503244208504, - "grad_norm": 1148.2200927734375, - "learning_rate": 3.746975783572377e-05, - "loss": 131.5211, - "step": 50050 - }, - { - "epoch": 0.4044958346463692, - "grad_norm": 693.340576171875, - "learning_rate": 3.7463706483729296e-05, - "loss": 108.3347, - "step": 50060 - }, - { - "epoch": 0.40457663685065326, - "grad_norm": 1058.9921875, - "learning_rate": 3.745765415978933e-05, - "loss": 84.1018, - "step": 50070 - }, - { - "epoch": 0.4046574390549374, - "grad_norm": 860.1167602539062, - "learning_rate": 3.7451600864375844e-05, - "loss": 140.3248, - "step": 50080 - }, - { - "epoch": 0.40473824125922153, - "grad_norm": 850.0064086914062, - "learning_rate": 3.744554659796088e-05, - "loss": 101.9651, - "step": 50090 - }, - { - "epoch": 0.40481904346350567, - "grad_norm": 741.18505859375, - "learning_rate": 3.7439491361016564e-05, - "loss": 98.3306, - "step": 50100 - }, - { - "epoch": 0.4048998456677898, - "grad_norm": 1203.65576171875, - "learning_rate": 3.743343515401511e-05, - "loss": 117.7542, - "step": 50110 - }, - { - "epoch": 0.40498064787207394, - "grad_norm": 1732.1729736328125, - "learning_rate": 3.742737797742878e-05, - "loss": 121.0276, - "step": 50120 - }, - { - "epoch": 0.4050614500763581, - "grad_norm": 591.224365234375, - "learning_rate": 3.742131983172992e-05, - "loss": 75.8332, - "step": 50130 - }, - { - "epoch": 0.4051422522806422, - "grad_norm": 1003.4283447265625, - "learning_rate": 3.741526071739097e-05, - "loss": 93.1292, - "step": 50140 - }, - { - "epoch": 0.40522305448492635, - "grad_norm": 986.044921875, - "learning_rate": 3.7409200634884426e-05, - "loss": 85.939, - "step": 50150 - }, - { - "epoch": 0.4053038566892105, - "grad_norm": 1247.590576171875, - "learning_rate": 3.740313958468287e-05, - "loss": 101.2132, - "step": 50160 - }, - { - "epoch": 0.4053846588934946, - "grad_norm": 556.7891845703125, - "learning_rate": 3.739707756725894e-05, - "loss": 99.3369, - "step": 50170 - }, - { - "epoch": 0.40546546109777876, - "grad_norm": 705.8170776367188, - "learning_rate": 3.7391014583085385e-05, - "loss": 104.5082, - "step": 50180 - }, - { - "epoch": 0.4055462633020629, - "grad_norm": 1113.252685546875, - "learning_rate": 3.7384950632634995e-05, - "loss": 118.6211, - "step": 50190 - }, - { - "epoch": 0.40562706550634703, - "grad_norm": 993.8496704101562, - "learning_rate": 3.7378885716380664e-05, - "loss": 78.2997, - "step": 50200 - }, - { - "epoch": 0.40570786771063116, - "grad_norm": 3591.88525390625, - "learning_rate": 3.7372819834795335e-05, - "loss": 135.0083, - "step": 50210 - }, - { - "epoch": 0.4057886699149153, - "grad_norm": 1816.0736083984375, - "learning_rate": 3.736675298835203e-05, - "loss": 139.0248, - "step": 50220 - }, - { - "epoch": 0.40586947211919944, - "grad_norm": 713.2103881835938, - "learning_rate": 3.736068517752388e-05, - "loss": 85.9299, - "step": 50230 - }, - { - "epoch": 0.4059502743234835, - "grad_norm": 1047.2734375, - "learning_rate": 3.7354616402784035e-05, - "loss": 112.9601, - "step": 50240 - }, - { - "epoch": 0.40603107652776765, - "grad_norm": 864.8804321289062, - "learning_rate": 3.7348546664605777e-05, - "loss": 96.3611, - "step": 50250 - }, - { - "epoch": 0.4061118787320518, - "grad_norm": 1151.7315673828125, - "learning_rate": 3.734247596346242e-05, - "loss": 113.741, - "step": 50260 - }, - { - "epoch": 0.4061926809363359, - "grad_norm": 501.4530029296875, - "learning_rate": 3.733640429982738e-05, - "loss": 103.2432, - "step": 50270 - }, - { - "epoch": 0.40627348314062006, - "grad_norm": 955.2877197265625, - "learning_rate": 3.7330331674174125e-05, - "loss": 97.8112, - "step": 50280 - }, - { - "epoch": 0.4063542853449042, - "grad_norm": 659.0596313476562, - "learning_rate": 3.732425808697622e-05, - "loss": 85.9338, - "step": 50290 - }, - { - "epoch": 0.40643508754918833, - "grad_norm": 1239.520751953125, - "learning_rate": 3.731818353870729e-05, - "loss": 103.3174, - "step": 50300 - }, - { - "epoch": 0.40651588975347247, - "grad_norm": 1258.587158203125, - "learning_rate": 3.731210802984105e-05, - "loss": 138.3198, - "step": 50310 - }, - { - "epoch": 0.4065966919577566, - "grad_norm": 882.3109130859375, - "learning_rate": 3.7306031560851275e-05, - "loss": 96.8254, - "step": 50320 - }, - { - "epoch": 0.40667749416204074, - "grad_norm": 1585.6331787109375, - "learning_rate": 3.729995413221183e-05, - "loss": 137.984, - "step": 50330 - }, - { - "epoch": 0.4067582963663249, - "grad_norm": 1639.8424072265625, - "learning_rate": 3.729387574439662e-05, - "loss": 117.095, - "step": 50340 - }, - { - "epoch": 0.406839098570609, - "grad_norm": 850.7450561523438, - "learning_rate": 3.7287796397879674e-05, - "loss": 92.0205, - "step": 50350 - }, - { - "epoch": 0.40691990077489315, - "grad_norm": 460.08050537109375, - "learning_rate": 3.7281716093135063e-05, - "loss": 86.575, - "step": 50360 - }, - { - "epoch": 0.4070007029791773, - "grad_norm": 811.8711547851562, - "learning_rate": 3.7275634830636957e-05, - "loss": 85.8686, - "step": 50370 - }, - { - "epoch": 0.4070815051834614, - "grad_norm": 788.0709228515625, - "learning_rate": 3.726955261085956e-05, - "loss": 92.6048, - "step": 50380 - }, - { - "epoch": 0.40716230738774556, - "grad_norm": 645.41162109375, - "learning_rate": 3.726346943427719e-05, - "loss": 90.503, - "step": 50390 - }, - { - "epoch": 0.4072431095920297, - "grad_norm": 920.7049560546875, - "learning_rate": 3.725738530136422e-05, - "loss": 100.6208, - "step": 50400 - }, - { - "epoch": 0.40732391179631383, - "grad_norm": 772.3811645507812, - "learning_rate": 3.7251300212595106e-05, - "loss": 135.8557, - "step": 50410 - }, - { - "epoch": 0.4074047140005979, - "grad_norm": 1401.8865966796875, - "learning_rate": 3.7245214168444386e-05, - "loss": 114.3305, - "step": 50420 - }, - { - "epoch": 0.40748551620488205, - "grad_norm": 799.9236450195312, - "learning_rate": 3.723912716938665e-05, - "loss": 123.9118, - "step": 50430 - }, - { - "epoch": 0.4075663184091662, - "grad_norm": 1152.5408935546875, - "learning_rate": 3.723303921589657e-05, - "loss": 118.8732, - "step": 50440 - }, - { - "epoch": 0.4076471206134503, - "grad_norm": 840.8353881835938, - "learning_rate": 3.722695030844891e-05, - "loss": 107.6568, - "step": 50450 - }, - { - "epoch": 0.40772792281773446, - "grad_norm": 804.3788452148438, - "learning_rate": 3.722086044751849e-05, - "loss": 86.1861, - "step": 50460 - }, - { - "epoch": 0.4078087250220186, - "grad_norm": 1485.6064453125, - "learning_rate": 3.721476963358021e-05, - "loss": 100.7896, - "step": 50470 - }, - { - "epoch": 0.40788952722630273, - "grad_norm": 2747.5830078125, - "learning_rate": 3.720867786710904e-05, - "loss": 125.7545, - "step": 50480 - }, - { - "epoch": 0.40797032943058686, - "grad_norm": 837.3493041992188, - "learning_rate": 3.7202585148580036e-05, - "loss": 112.4385, - "step": 50490 - }, - { - "epoch": 0.408051131634871, - "grad_norm": 865.9047241210938, - "learning_rate": 3.719649147846832e-05, - "loss": 98.1793, - "step": 50500 - }, - { - "epoch": 0.40813193383915514, - "grad_norm": 878.3766479492188, - "learning_rate": 3.719039685724909e-05, - "loss": 116.6063, - "step": 50510 - }, - { - "epoch": 0.4082127360434393, - "grad_norm": 612.1708984375, - "learning_rate": 3.71843012853976e-05, - "loss": 124.0494, - "step": 50520 - }, - { - "epoch": 0.4082935382477234, - "grad_norm": 463.56573486328125, - "learning_rate": 3.7178204763389216e-05, - "loss": 95.0724, - "step": 50530 - }, - { - "epoch": 0.40837434045200754, - "grad_norm": 1179.2078857421875, - "learning_rate": 3.717210729169935e-05, - "loss": 124.9203, - "step": 50540 - }, - { - "epoch": 0.4084551426562917, - "grad_norm": 974.0314331054688, - "learning_rate": 3.71660088708035e-05, - "loss": 105.1903, - "step": 50550 - }, - { - "epoch": 0.4085359448605758, - "grad_norm": 1113.869384765625, - "learning_rate": 3.7159909501177226e-05, - "loss": 119.2629, - "step": 50560 - }, - { - "epoch": 0.40861674706485995, - "grad_norm": 1169.345703125, - "learning_rate": 3.7153809183296176e-05, - "loss": 102.9903, - "step": 50570 - }, - { - "epoch": 0.4086975492691441, - "grad_norm": 1160.8529052734375, - "learning_rate": 3.7147707917636046e-05, - "loss": 106.7996, - "step": 50580 - }, - { - "epoch": 0.40877835147342817, - "grad_norm": 897.9153442382812, - "learning_rate": 3.714160570467266e-05, - "loss": 117.943, - "step": 50590 - }, - { - "epoch": 0.4088591536777123, - "grad_norm": 694.47265625, - "learning_rate": 3.713550254488185e-05, - "loss": 145.4832, - "step": 50600 - }, - { - "epoch": 0.40893995588199644, - "grad_norm": 1059.1656494140625, - "learning_rate": 3.712939843873957e-05, - "loss": 202.4079, - "step": 50610 - }, - { - "epoch": 0.4090207580862806, - "grad_norm": 982.1298217773438, - "learning_rate": 3.712329338672182e-05, - "loss": 108.8503, - "step": 50620 - }, - { - "epoch": 0.4091015602905647, - "grad_norm": 823.262939453125, - "learning_rate": 3.71171873893047e-05, - "loss": 88.9028, - "step": 50630 - }, - { - "epoch": 0.40918236249484885, - "grad_norm": 1039.7957763671875, - "learning_rate": 3.711108044696436e-05, - "loss": 109.8441, - "step": 50640 - }, - { - "epoch": 0.409263164699133, - "grad_norm": 1186.4931640625, - "learning_rate": 3.710497256017702e-05, - "loss": 121.2047, - "step": 50650 - }, - { - "epoch": 0.4093439669034171, - "grad_norm": 1041.74609375, - "learning_rate": 3.7098863729419e-05, - "loss": 121.4461, - "step": 50660 - }, - { - "epoch": 0.40942476910770126, - "grad_norm": 2225.27685546875, - "learning_rate": 3.7092753955166674e-05, - "loss": 118.1753, - "step": 50670 - }, - { - "epoch": 0.4095055713119854, - "grad_norm": 1082.9833984375, - "learning_rate": 3.7086643237896504e-05, - "loss": 97.207, - "step": 50680 - }, - { - "epoch": 0.40958637351626953, - "grad_norm": 829.1814575195312, - "learning_rate": 3.7080531578085e-05, - "loss": 111.0327, - "step": 50690 - }, - { - "epoch": 0.40966717572055367, - "grad_norm": 1204.4862060546875, - "learning_rate": 3.7074418976208766e-05, - "loss": 94.4628, - "step": 50700 - }, - { - "epoch": 0.4097479779248378, - "grad_norm": 1778.496337890625, - "learning_rate": 3.706830543274449e-05, - "loss": 94.935, - "step": 50710 - }, - { - "epoch": 0.40982878012912194, - "grad_norm": 749.0870971679688, - "learning_rate": 3.706219094816891e-05, - "loss": 134.7976, - "step": 50720 - }, - { - "epoch": 0.4099095823334061, - "grad_norm": 633.5966796875, - "learning_rate": 3.705607552295883e-05, - "loss": 61.1449, - "step": 50730 - }, - { - "epoch": 0.4099903845376902, - "grad_norm": 1107.017578125, - "learning_rate": 3.704995915759117e-05, - "loss": 103.0143, - "step": 50740 - }, - { - "epoch": 0.41007118674197435, - "grad_norm": 585.5606689453125, - "learning_rate": 3.704384185254288e-05, - "loss": 103.1766, - "step": 50750 - }, - { - "epoch": 0.41015198894625843, - "grad_norm": 905.7990112304688, - "learning_rate": 3.7037723608291015e-05, - "loss": 104.5099, - "step": 50760 - }, - { - "epoch": 0.41023279115054256, - "grad_norm": 1153.3941650390625, - "learning_rate": 3.703160442531266e-05, - "loss": 94.4515, - "step": 50770 - }, - { - "epoch": 0.4103135933548267, - "grad_norm": 1010.5194702148438, - "learning_rate": 3.7025484304085034e-05, - "loss": 116.79, - "step": 50780 - }, - { - "epoch": 0.41039439555911084, - "grad_norm": 897.5564575195312, - "learning_rate": 3.701936324508537e-05, - "loss": 106.9305, - "step": 50790 - }, - { - "epoch": 0.41047519776339497, - "grad_norm": 675.4556884765625, - "learning_rate": 3.701324124879102e-05, - "loss": 149.8529, - "step": 50800 - }, - { - "epoch": 0.4105559999676791, - "grad_norm": 969.3845825195312, - "learning_rate": 3.7007118315679384e-05, - "loss": 99.1897, - "step": 50810 - }, - { - "epoch": 0.41063680217196324, - "grad_norm": 891.3025512695312, - "learning_rate": 3.700099444622794e-05, - "loss": 138.7589, - "step": 50820 - }, - { - "epoch": 0.4107176043762474, - "grad_norm": 951.863525390625, - "learning_rate": 3.699486964091423e-05, - "loss": 113.8601, - "step": 50830 - }, - { - "epoch": 0.4107984065805315, - "grad_norm": 1332.307861328125, - "learning_rate": 3.6988743900215894e-05, - "loss": 91.1788, - "step": 50840 - }, - { - "epoch": 0.41087920878481565, - "grad_norm": 509.7235412597656, - "learning_rate": 3.698261722461063e-05, - "loss": 99.2897, - "step": 50850 - }, - { - "epoch": 0.4109600109890998, - "grad_norm": 888.1732788085938, - "learning_rate": 3.69764896145762e-05, - "loss": 65.548, - "step": 50860 - }, - { - "epoch": 0.4110408131933839, - "grad_norm": 761.6651000976562, - "learning_rate": 3.697036107059044e-05, - "loss": 100.544, - "step": 50870 - }, - { - "epoch": 0.41112161539766806, - "grad_norm": 510.2133483886719, - "learning_rate": 3.696423159313129e-05, - "loss": 99.3175, - "step": 50880 - }, - { - "epoch": 0.4112024176019522, - "grad_norm": 921.33251953125, - "learning_rate": 3.6958101182676726e-05, - "loss": 93.0245, - "step": 50890 - }, - { - "epoch": 0.41128321980623633, - "grad_norm": 1295.573486328125, - "learning_rate": 3.695196983970481e-05, - "loss": 95.7695, - "step": 50900 - }, - { - "epoch": 0.41136402201052047, - "grad_norm": 1064.9324951171875, - "learning_rate": 3.6945837564693666e-05, - "loss": 103.1979, - "step": 50910 - }, - { - "epoch": 0.4114448242148046, - "grad_norm": 923.8102416992188, - "learning_rate": 3.693970435812153e-05, - "loss": 98.3734, - "step": 50920 - }, - { - "epoch": 0.4115256264190887, - "grad_norm": 2516.975341796875, - "learning_rate": 3.693357022046665e-05, - "loss": 113.0149, - "step": 50930 - }, - { - "epoch": 0.4116064286233728, - "grad_norm": 1049.3848876953125, - "learning_rate": 3.6927435152207406e-05, - "loss": 114.1854, - "step": 50940 - }, - { - "epoch": 0.41168723082765696, - "grad_norm": 918.2132568359375, - "learning_rate": 3.69212991538222e-05, - "loss": 83.8144, - "step": 50950 - }, - { - "epoch": 0.4117680330319411, - "grad_norm": 871.5775146484375, - "learning_rate": 3.6915162225789546e-05, - "loss": 129.2131, - "step": 50960 - }, - { - "epoch": 0.41184883523622523, - "grad_norm": 1316.456298828125, - "learning_rate": 3.690902436858801e-05, - "loss": 130.7913, - "step": 50970 - }, - { - "epoch": 0.41192963744050937, - "grad_norm": 744.1181030273438, - "learning_rate": 3.690288558269623e-05, - "loss": 118.9183, - "step": 50980 - }, - { - "epoch": 0.4120104396447935, - "grad_norm": 698.674560546875, - "learning_rate": 3.689674586859292e-05, - "loss": 86.7074, - "step": 50990 - }, - { - "epoch": 0.41209124184907764, - "grad_norm": 700.3701171875, - "learning_rate": 3.689060522675689e-05, - "loss": 129.0277, - "step": 51000 - }, - { - "epoch": 0.4121720440533618, - "grad_norm": 671.8871459960938, - "learning_rate": 3.688446365766696e-05, - "loss": 97.8604, - "step": 51010 - }, - { - "epoch": 0.4122528462576459, - "grad_norm": 638.5401611328125, - "learning_rate": 3.6878321161802104e-05, - "loss": 124.8841, - "step": 51020 - }, - { - "epoch": 0.41233364846193005, - "grad_norm": 962.9974975585938, - "learning_rate": 3.687217773964129e-05, - "loss": 113.8739, - "step": 51030 - }, - { - "epoch": 0.4124144506662142, - "grad_norm": 1488.8475341796875, - "learning_rate": 3.686603339166362e-05, - "loss": 111.1327, - "step": 51040 - }, - { - "epoch": 0.4124952528704983, - "grad_norm": 1666.353271484375, - "learning_rate": 3.685988811834823e-05, - "loss": 120.7597, - "step": 51050 - }, - { - "epoch": 0.41257605507478246, - "grad_norm": 823.7409057617188, - "learning_rate": 3.685374192017436e-05, - "loss": 114.139, - "step": 51060 - }, - { - "epoch": 0.4126568572790666, - "grad_norm": 1042.3953857421875, - "learning_rate": 3.684759479762127e-05, - "loss": 93.4635, - "step": 51070 - }, - { - "epoch": 0.4127376594833507, - "grad_norm": 1100.34521484375, - "learning_rate": 3.6841446751168355e-05, - "loss": 96.6085, - "step": 51080 - }, - { - "epoch": 0.41281846168763486, - "grad_norm": 1006.7479248046875, - "learning_rate": 3.683529778129503e-05, - "loss": 145.0384, - "step": 51090 - }, - { - "epoch": 0.41289926389191894, - "grad_norm": 2133.508056640625, - "learning_rate": 3.682914788848083e-05, - "loss": 102.9401, - "step": 51100 - }, - { - "epoch": 0.4129800660962031, - "grad_norm": 979.1788940429688, - "learning_rate": 3.682299707320532e-05, - "loss": 117.7248, - "step": 51110 - }, - { - "epoch": 0.4130608683004872, - "grad_norm": 2207.940185546875, - "learning_rate": 3.681684533594815e-05, - "loss": 160.3749, - "step": 51120 - }, - { - "epoch": 0.41314167050477135, - "grad_norm": 665.2557983398438, - "learning_rate": 3.6810692677189046e-05, - "loss": 106.1242, - "step": 51130 - }, - { - "epoch": 0.4132224727090555, - "grad_norm": 2216.060791015625, - "learning_rate": 3.680453909740782e-05, - "loss": 113.829, - "step": 51140 - }, - { - "epoch": 0.4133032749133396, - "grad_norm": 608.702880859375, - "learning_rate": 3.6798384597084325e-05, - "loss": 94.2593, - "step": 51150 - }, - { - "epoch": 0.41338407711762376, - "grad_norm": 1091.565673828125, - "learning_rate": 3.679222917669851e-05, - "loss": 113.2256, - "step": 51160 - }, - { - "epoch": 0.4134648793219079, - "grad_norm": 945.6337280273438, - "learning_rate": 3.678607283673037e-05, - "loss": 100.4261, - "step": 51170 - }, - { - "epoch": 0.41354568152619203, - "grad_norm": 1709.76318359375, - "learning_rate": 3.6779915577660015e-05, - "loss": 104.6235, - "step": 51180 - }, - { - "epoch": 0.41362648373047617, - "grad_norm": 780.2813720703125, - "learning_rate": 3.677375739996759e-05, - "loss": 123.7976, - "step": 51190 - }, - { - "epoch": 0.4137072859347603, - "grad_norm": 694.8121337890625, - "learning_rate": 3.6767598304133324e-05, - "loss": 91.5883, - "step": 51200 - }, - { - "epoch": 0.41378808813904444, - "grad_norm": 810.7626953125, - "learning_rate": 3.67614382906375e-05, - "loss": 92.6607, - "step": 51210 - }, - { - "epoch": 0.4138688903433286, - "grad_norm": 594.18310546875, - "learning_rate": 3.67552773599605e-05, - "loss": 105.1821, - "step": 51220 - }, - { - "epoch": 0.4139496925476127, - "grad_norm": 1207.786865234375, - "learning_rate": 3.6749115512582786e-05, - "loss": 85.6308, - "step": 51230 - }, - { - "epoch": 0.41403049475189685, - "grad_norm": 799.7379760742188, - "learning_rate": 3.674295274898485e-05, - "loss": 150.7621, - "step": 51240 - }, - { - "epoch": 0.414111296956181, - "grad_norm": 943.1004028320312, - "learning_rate": 3.673678906964727e-05, - "loss": 148.9106, - "step": 51250 - }, - { - "epoch": 0.4141920991604651, - "grad_norm": 758.6460571289062, - "learning_rate": 3.673062447505072e-05, - "loss": 104.1167, - "step": 51260 - }, - { - "epoch": 0.41427290136474926, - "grad_norm": 6140.54443359375, - "learning_rate": 3.672445896567592e-05, - "loss": 106.9115, - "step": 51270 - }, - { - "epoch": 0.41435370356903334, - "grad_norm": 736.86865234375, - "learning_rate": 3.6718292542003666e-05, - "loss": 89.2326, - "step": 51280 - }, - { - "epoch": 0.4144345057733175, - "grad_norm": 675.8764038085938, - "learning_rate": 3.671212520451484e-05, - "loss": 116.2615, - "step": 51290 - }, - { - "epoch": 0.4145153079776016, - "grad_norm": 743.4627075195312, - "learning_rate": 3.6705956953690364e-05, - "loss": 106.7714, - "step": 51300 - }, - { - "epoch": 0.41459611018188575, - "grad_norm": 879.1180419921875, - "learning_rate": 3.669978779001127e-05, - "loss": 105.1862, - "step": 51310 - }, - { - "epoch": 0.4146769123861699, - "grad_norm": 1089.2691650390625, - "learning_rate": 3.6693617713958634e-05, - "loss": 110.1805, - "step": 51320 - }, - { - "epoch": 0.414757714590454, - "grad_norm": 1009.084228515625, - "learning_rate": 3.668744672601361e-05, - "loss": 87.4429, - "step": 51330 - }, - { - "epoch": 0.41483851679473815, - "grad_norm": 573.4092407226562, - "learning_rate": 3.668127482665743e-05, - "loss": 129.1954, - "step": 51340 - }, - { - "epoch": 0.4149193189990223, - "grad_norm": 818.50537109375, - "learning_rate": 3.667510201637139e-05, - "loss": 111.7285, - "step": 51350 - }, - { - "epoch": 0.4150001212033064, - "grad_norm": 705.1763305664062, - "learning_rate": 3.6668928295636854e-05, - "loss": 94.6878, - "step": 51360 - }, - { - "epoch": 0.41508092340759056, - "grad_norm": 830.5982666015625, - "learning_rate": 3.666275366493526e-05, - "loss": 115.1375, - "step": 51370 - }, - { - "epoch": 0.4151617256118747, - "grad_norm": 1119.63671875, - "learning_rate": 3.665657812474812e-05, - "loss": 134.7961, - "step": 51380 - }, - { - "epoch": 0.41524252781615884, - "grad_norm": 592.1851196289062, - "learning_rate": 3.665040167555702e-05, - "loss": 89.114, - "step": 51390 - }, - { - "epoch": 0.41532333002044297, - "grad_norm": 670.46875, - "learning_rate": 3.664422431784361e-05, - "loss": 87.7141, - "step": 51400 - }, - { - "epoch": 0.4154041322247271, - "grad_norm": 1682.107421875, - "learning_rate": 3.6638046052089616e-05, - "loss": 108.7615, - "step": 51410 - }, - { - "epoch": 0.41548493442901124, - "grad_norm": 1103.8193359375, - "learning_rate": 3.663186687877682e-05, - "loss": 123.0206, - "step": 51420 - }, - { - "epoch": 0.4155657366332954, - "grad_norm": 790.7777099609375, - "learning_rate": 3.6625686798387106e-05, - "loss": 108.7138, - "step": 51430 - }, - { - "epoch": 0.4156465388375795, - "grad_norm": 853.0620727539062, - "learning_rate": 3.661950581140239e-05, - "loss": 101.1588, - "step": 51440 - }, - { - "epoch": 0.4157273410418636, - "grad_norm": 815.1986694335938, - "learning_rate": 3.66133239183047e-05, - "loss": 101.045, - "step": 51450 - }, - { - "epoch": 0.41580814324614773, - "grad_norm": 4498.54638671875, - "learning_rate": 3.6607141119576084e-05, - "loss": 127.4073, - "step": 51460 - }, - { - "epoch": 0.41588894545043187, - "grad_norm": 583.3117065429688, - "learning_rate": 3.660095741569871e-05, - "loss": 90.2438, - "step": 51470 - }, - { - "epoch": 0.415969747654716, - "grad_norm": 1291.91552734375, - "learning_rate": 3.659477280715479e-05, - "loss": 99.0459, - "step": 51480 - }, - { - "epoch": 0.41605054985900014, - "grad_norm": 1048.017822265625, - "learning_rate": 3.658858729442662e-05, - "loss": 105.2154, - "step": 51490 - }, - { - "epoch": 0.4161313520632843, - "grad_norm": 1121.027587890625, - "learning_rate": 3.6582400877996546e-05, - "loss": 112.8955, - "step": 51500 - }, - { - "epoch": 0.4162121542675684, - "grad_norm": 1002.3828125, - "learning_rate": 3.657621355834701e-05, - "loss": 103.9797, - "step": 51510 - }, - { - "epoch": 0.41629295647185255, - "grad_norm": 725.4869995117188, - "learning_rate": 3.657002533596049e-05, - "loss": 91.9598, - "step": 51520 - }, - { - "epoch": 0.4163737586761367, - "grad_norm": 801.9194946289062, - "learning_rate": 3.656383621131959e-05, - "loss": 119.9081, - "step": 51530 - }, - { - "epoch": 0.4164545608804208, - "grad_norm": 1042.4119873046875, - "learning_rate": 3.655764618490692e-05, - "loss": 121.2274, - "step": 51540 - }, - { - "epoch": 0.41653536308470496, - "grad_norm": 1344.8443603515625, - "learning_rate": 3.655145525720522e-05, - "loss": 119.8163, - "step": 51550 - }, - { - "epoch": 0.4166161652889891, - "grad_norm": 803.3026123046875, - "learning_rate": 3.654526342869724e-05, - "loss": 90.2914, - "step": 51560 - }, - { - "epoch": 0.41669696749327323, - "grad_norm": 788.026611328125, - "learning_rate": 3.6539070699865853e-05, - "loss": 92.7277, - "step": 51570 - }, - { - "epoch": 0.41677776969755737, - "grad_norm": 902.9691772460938, - "learning_rate": 3.6532877071193974e-05, - "loss": 110.9967, - "step": 51580 - }, - { - "epoch": 0.4168585719018415, - "grad_norm": 973.3849487304688, - "learning_rate": 3.6526682543164595e-05, - "loss": 98.2199, - "step": 51590 - }, - { - "epoch": 0.41693937410612564, - "grad_norm": 1505.4620361328125, - "learning_rate": 3.6520487116260776e-05, - "loss": 129.1263, - "step": 51600 - }, - { - "epoch": 0.4170201763104098, - "grad_norm": 1009.4824829101562, - "learning_rate": 3.651429079096566e-05, - "loss": 101.8687, - "step": 51610 - }, - { - "epoch": 0.41710097851469385, - "grad_norm": 1013.9920043945312, - "learning_rate": 3.650809356776242e-05, - "loss": 85.4852, - "step": 51620 - }, - { - "epoch": 0.417181780718978, - "grad_norm": 1571.6351318359375, - "learning_rate": 3.650189544713437e-05, - "loss": 110.0508, - "step": 51630 - }, - { - "epoch": 0.4172625829232621, - "grad_norm": 677.537353515625, - "learning_rate": 3.6495696429564823e-05, - "loss": 99.9232, - "step": 51640 - }, - { - "epoch": 0.41734338512754626, - "grad_norm": 1993.697509765625, - "learning_rate": 3.6489496515537204e-05, - "loss": 106.697, - "step": 51650 - }, - { - "epoch": 0.4174241873318304, - "grad_norm": 968.6683349609375, - "learning_rate": 3.648329570553498e-05, - "loss": 123.2069, - "step": 51660 - }, - { - "epoch": 0.41750498953611453, - "grad_norm": 907.696533203125, - "learning_rate": 3.647709400004172e-05, - "loss": 161.9849, - "step": 51670 - }, - { - "epoch": 0.41758579174039867, - "grad_norm": 1380.95361328125, - "learning_rate": 3.647089139954104e-05, - "loss": 142.7653, - "step": 51680 - }, - { - "epoch": 0.4176665939446828, - "grad_norm": 884.8692626953125, - "learning_rate": 3.646468790451663e-05, - "loss": 81.0925, - "step": 51690 - }, - { - "epoch": 0.41774739614896694, - "grad_norm": 519.856201171875, - "learning_rate": 3.645848351545225e-05, - "loss": 108.8349, - "step": 51700 - }, - { - "epoch": 0.4178281983532511, - "grad_norm": 1149.3927001953125, - "learning_rate": 3.6452278232831735e-05, - "loss": 114.8804, - "step": 51710 - }, - { - "epoch": 0.4179090005575352, - "grad_norm": 898.1983032226562, - "learning_rate": 3.644607205713898e-05, - "loss": 82.9898, - "step": 51720 - }, - { - "epoch": 0.41798980276181935, - "grad_norm": 1059.548095703125, - "learning_rate": 3.643986498885796e-05, - "loss": 101.0293, - "step": 51730 - }, - { - "epoch": 0.4180706049661035, - "grad_norm": 320.30023193359375, - "learning_rate": 3.643365702847272e-05, - "loss": 101.508, - "step": 51740 - }, - { - "epoch": 0.4181514071703876, - "grad_norm": 689.6282958984375, - "learning_rate": 3.642744817646736e-05, - "loss": 88.6491, - "step": 51750 - }, - { - "epoch": 0.41823220937467176, - "grad_norm": 945.8651733398438, - "learning_rate": 3.642123843332606e-05, - "loss": 102.8155, - "step": 51760 - }, - { - "epoch": 0.4183130115789559, - "grad_norm": 1199.8419189453125, - "learning_rate": 3.641502779953307e-05, - "loss": 94.7032, - "step": 51770 - }, - { - "epoch": 0.41839381378324003, - "grad_norm": 961.8651123046875, - "learning_rate": 3.640881627557271e-05, - "loss": 93.6731, - "step": 51780 - }, - { - "epoch": 0.4184746159875241, - "grad_norm": 822.8622436523438, - "learning_rate": 3.6402603861929374e-05, - "loss": 97.6819, - "step": 51790 - }, - { - "epoch": 0.41855541819180825, - "grad_norm": 628.6469116210938, - "learning_rate": 3.639639055908751e-05, - "loss": 99.6512, - "step": 51800 - }, - { - "epoch": 0.4186362203960924, - "grad_norm": 1113.08935546875, - "learning_rate": 3.639017636753163e-05, - "loss": 109.5721, - "step": 51810 - }, - { - "epoch": 0.4187170226003765, - "grad_norm": 846.2689208984375, - "learning_rate": 3.638396128774636e-05, - "loss": 120.049, - "step": 51820 - }, - { - "epoch": 0.41879782480466066, - "grad_norm": 556.595703125, - "learning_rate": 3.6377745320216346e-05, - "loss": 128.9337, - "step": 51830 - }, - { - "epoch": 0.4188786270089448, - "grad_norm": 944.9878540039062, - "learning_rate": 3.637152846542633e-05, - "loss": 91.2079, - "step": 51840 - }, - { - "epoch": 0.41895942921322893, - "grad_norm": 710.7396850585938, - "learning_rate": 3.63653107238611e-05, - "loss": 105.1159, - "step": 51850 - }, - { - "epoch": 0.41904023141751306, - "grad_norm": 1113.7232666015625, - "learning_rate": 3.635909209600555e-05, - "loss": 134.9205, - "step": 51860 - }, - { - "epoch": 0.4191210336217972, - "grad_norm": 907.1137084960938, - "learning_rate": 3.6352872582344596e-05, - "loss": 97.1154, - "step": 51870 - }, - { - "epoch": 0.41920183582608134, - "grad_norm": 957.1614990234375, - "learning_rate": 3.634665218336328e-05, - "loss": 74.3417, - "step": 51880 - }, - { - "epoch": 0.4192826380303655, - "grad_norm": 659.573974609375, - "learning_rate": 3.6340430899546656e-05, - "loss": 116.0436, - "step": 51890 - }, - { - "epoch": 0.4193634402346496, - "grad_norm": 1061.35498046875, - "learning_rate": 3.633420873137988e-05, - "loss": 119.1621, - "step": 51900 - }, - { - "epoch": 0.41944424243893375, - "grad_norm": 902.3751220703125, - "learning_rate": 3.632798567934817e-05, - "loss": 86.4995, - "step": 51910 - }, - { - "epoch": 0.4195250446432179, - "grad_norm": 1387.8521728515625, - "learning_rate": 3.632176174393682e-05, - "loss": 94.3441, - "step": 51920 - }, - { - "epoch": 0.419605846847502, - "grad_norm": 839.9918823242188, - "learning_rate": 3.6315536925631174e-05, - "loss": 92.2668, - "step": 51930 - }, - { - "epoch": 0.41968664905178615, - "grad_norm": 1319.91015625, - "learning_rate": 3.630931122491666e-05, - "loss": 104.3476, - "step": 51940 - }, - { - "epoch": 0.4197674512560703, - "grad_norm": 1186.2650146484375, - "learning_rate": 3.630308464227877e-05, - "loss": 114.109, - "step": 51950 - }, - { - "epoch": 0.4198482534603544, - "grad_norm": 609.2423095703125, - "learning_rate": 3.629685717820307e-05, - "loss": 98.316, - "step": 51960 - }, - { - "epoch": 0.4199290556646385, - "grad_norm": 1281.401611328125, - "learning_rate": 3.629062883317519e-05, - "loss": 99.1893, - "step": 51970 - }, - { - "epoch": 0.42000985786892264, - "grad_norm": 792.6577758789062, - "learning_rate": 3.628439960768082e-05, - "loss": 112.2365, - "step": 51980 - }, - { - "epoch": 0.4200906600732068, - "grad_norm": 1376.571044921875, - "learning_rate": 3.6278169502205736e-05, - "loss": 94.9764, - "step": 51990 - }, - { - "epoch": 0.4201714622774909, - "grad_norm": 989.2171630859375, - "learning_rate": 3.627193851723577e-05, - "loss": 103.2787, - "step": 52000 - }, - { - "epoch": 0.42025226448177505, - "grad_norm": 1175.96630859375, - "learning_rate": 3.626570665325684e-05, - "loss": 95.3951, - "step": 52010 - }, - { - "epoch": 0.4203330666860592, - "grad_norm": 830.3632202148438, - "learning_rate": 3.6259473910754904e-05, - "loss": 111.1317, - "step": 52020 - }, - { - "epoch": 0.4204138688903433, - "grad_norm": 1285.8551025390625, - "learning_rate": 3.6253240290216e-05, - "loss": 95.5119, - "step": 52030 - }, - { - "epoch": 0.42049467109462746, - "grad_norm": 800.1044311523438, - "learning_rate": 3.624700579212626e-05, - "loss": 106.627, - "step": 52040 - }, - { - "epoch": 0.4205754732989116, - "grad_norm": 440.0909423828125, - "learning_rate": 3.624077041697185e-05, - "loss": 74.4075, - "step": 52050 - }, - { - "epoch": 0.42065627550319573, - "grad_norm": 975.0591430664062, - "learning_rate": 3.623453416523902e-05, - "loss": 92.1035, - "step": 52060 - }, - { - "epoch": 0.42073707770747987, - "grad_norm": 469.0267639160156, - "learning_rate": 3.6228297037414074e-05, - "loss": 94.1764, - "step": 52070 - }, - { - "epoch": 0.420817879911764, - "grad_norm": 523.0760498046875, - "learning_rate": 3.622205903398342e-05, - "loss": 99.5884, - "step": 52080 - }, - { - "epoch": 0.42089868211604814, - "grad_norm": 1811.6181640625, - "learning_rate": 3.621582015543348e-05, - "loss": 119.2955, - "step": 52090 - }, - { - "epoch": 0.4209794843203323, - "grad_norm": 564.1705932617188, - "learning_rate": 3.6209580402250815e-05, - "loss": 75.1434, - "step": 52100 - }, - { - "epoch": 0.4210602865246164, - "grad_norm": 2058.806396484375, - "learning_rate": 3.6203339774921976e-05, - "loss": 106.0374, - "step": 52110 - }, - { - "epoch": 0.42114108872890055, - "grad_norm": 372.7056579589844, - "learning_rate": 3.6197098273933634e-05, - "loss": 102.4193, - "step": 52120 - }, - { - "epoch": 0.4212218909331847, - "grad_norm": 1239.1873779296875, - "learning_rate": 3.619085589977251e-05, - "loss": 102.6541, - "step": 52130 - }, - { - "epoch": 0.42130269313746876, - "grad_norm": 704.6087646484375, - "learning_rate": 3.618461265292541e-05, - "loss": 101.775, - "step": 52140 - }, - { - "epoch": 0.4213834953417529, - "grad_norm": 1075.79296875, - "learning_rate": 3.617836853387918e-05, - "loss": 91.506, - "step": 52150 - }, - { - "epoch": 0.42146429754603704, - "grad_norm": 830.5742797851562, - "learning_rate": 3.617212354312076e-05, - "loss": 96.7083, - "step": 52160 - }, - { - "epoch": 0.4215450997503212, - "grad_norm": 1369.042724609375, - "learning_rate": 3.6165877681137136e-05, - "loss": 93.3957, - "step": 52170 - }, - { - "epoch": 0.4216259019546053, - "grad_norm": 481.28387451171875, - "learning_rate": 3.61596309484154e-05, - "loss": 93.548, - "step": 52180 - }, - { - "epoch": 0.42170670415888944, - "grad_norm": 926.9627075195312, - "learning_rate": 3.615338334544265e-05, - "loss": 80.5242, - "step": 52190 - }, - { - "epoch": 0.4217875063631736, - "grad_norm": 1057.8992919921875, - "learning_rate": 3.614713487270611e-05, - "loss": 81.2164, - "step": 52200 - }, - { - "epoch": 0.4218683085674577, - "grad_norm": 1092.752685546875, - "learning_rate": 3.614088553069303e-05, - "loss": 126.1279, - "step": 52210 - }, - { - "epoch": 0.42194911077174185, - "grad_norm": 1460.92138671875, - "learning_rate": 3.613463531989076e-05, - "loss": 125.3828, - "step": 52220 - }, - { - "epoch": 0.422029912976026, - "grad_norm": 776.3665161132812, - "learning_rate": 3.612838424078671e-05, - "loss": 98.0366, - "step": 52230 - }, - { - "epoch": 0.4221107151803101, - "grad_norm": 964.3656005859375, - "learning_rate": 3.6122132293868335e-05, - "loss": 178.2937, - "step": 52240 - }, - { - "epoch": 0.42219151738459426, - "grad_norm": 658.375732421875, - "learning_rate": 3.611587947962319e-05, - "loss": 105.4046, - "step": 52250 - }, - { - "epoch": 0.4222723195888784, - "grad_norm": 1233.294189453125, - "learning_rate": 3.6109625798538873e-05, - "loss": 91.4773, - "step": 52260 - }, - { - "epoch": 0.42235312179316253, - "grad_norm": 987.4110107421875, - "learning_rate": 3.610337125110307e-05, - "loss": 83.0719, - "step": 52270 - }, - { - "epoch": 0.42243392399744667, - "grad_norm": 1366.55419921875, - "learning_rate": 3.6097115837803505e-05, - "loss": 109.4826, - "step": 52280 - }, - { - "epoch": 0.4225147262017308, - "grad_norm": 1075.3995361328125, - "learning_rate": 3.6090859559128e-05, - "loss": 85.4439, - "step": 52290 - }, - { - "epoch": 0.42259552840601494, - "grad_norm": 993.6791381835938, - "learning_rate": 3.608460241556443e-05, - "loss": 115.2158, - "step": 52300 - }, - { - "epoch": 0.422676330610299, - "grad_norm": 750.389892578125, - "learning_rate": 3.607834440760074e-05, - "loss": 106.4523, - "step": 52310 - }, - { - "epoch": 0.42275713281458316, - "grad_norm": 1256.6260986328125, - "learning_rate": 3.6072085535724956e-05, - "loss": 108.5132, - "step": 52320 - }, - { - "epoch": 0.4228379350188673, - "grad_norm": 4178.677734375, - "learning_rate": 3.606582580042513e-05, - "loss": 100.8893, - "step": 52330 - }, - { - "epoch": 0.42291873722315143, - "grad_norm": 1386.273193359375, - "learning_rate": 3.6059565202189435e-05, - "loss": 119.1366, - "step": 52340 - }, - { - "epoch": 0.42299953942743557, - "grad_norm": 1174.271728515625, - "learning_rate": 3.605330374150607e-05, - "loss": 96.0526, - "step": 52350 - }, - { - "epoch": 0.4230803416317197, - "grad_norm": 903.0919799804688, - "learning_rate": 3.604704141886332e-05, - "loss": 110.9519, - "step": 52360 - }, - { - "epoch": 0.42316114383600384, - "grad_norm": 1444.7679443359375, - "learning_rate": 3.604077823474954e-05, - "loss": 130.3459, - "step": 52370 - }, - { - "epoch": 0.423241946040288, - "grad_norm": 710.629150390625, - "learning_rate": 3.603451418965313e-05, - "loss": 105.1461, - "step": 52380 - }, - { - "epoch": 0.4233227482445721, - "grad_norm": 1019.9679565429688, - "learning_rate": 3.602824928406259e-05, - "loss": 148.467, - "step": 52390 - }, - { - "epoch": 0.42340355044885625, - "grad_norm": 1575.0010986328125, - "learning_rate": 3.602198351846647e-05, - "loss": 155.3993, - "step": 52400 - }, - { - "epoch": 0.4234843526531404, - "grad_norm": 712.8841552734375, - "learning_rate": 3.6015716893353376e-05, - "loss": 112.5797, - "step": 52410 - }, - { - "epoch": 0.4235651548574245, - "grad_norm": 466.0777282714844, - "learning_rate": 3.600944940921199e-05, - "loss": 117.8744, - "step": 52420 - }, - { - "epoch": 0.42364595706170866, - "grad_norm": 1775.660400390625, - "learning_rate": 3.600318106653108e-05, - "loss": 109.2298, - "step": 52430 - }, - { - "epoch": 0.4237267592659928, - "grad_norm": 544.9641723632812, - "learning_rate": 3.5996911865799454e-05, - "loss": 88.6949, - "step": 52440 - }, - { - "epoch": 0.42380756147027693, - "grad_norm": 756.9719848632812, - "learning_rate": 3.5990641807506e-05, - "loss": 101.8291, - "step": 52450 - }, - { - "epoch": 0.42388836367456106, - "grad_norm": 1146.97216796875, - "learning_rate": 3.5984370892139666e-05, - "loss": 103.221, - "step": 52460 - }, - { - "epoch": 0.4239691658788452, - "grad_norm": 865.7724609375, - "learning_rate": 3.597809912018947e-05, - "loss": 90.3662, - "step": 52470 - }, - { - "epoch": 0.4240499680831293, - "grad_norm": 319.5663146972656, - "learning_rate": 3.5971826492144504e-05, - "loss": 118.0345, - "step": 52480 - }, - { - "epoch": 0.4241307702874134, - "grad_norm": 880.225830078125, - "learning_rate": 3.596555300849392e-05, - "loss": 116.1025, - "step": 52490 - }, - { - "epoch": 0.42421157249169755, - "grad_norm": 1057.83740234375, - "learning_rate": 3.5959278669726935e-05, - "loss": 100.3643, - "step": 52500 - }, - { - "epoch": 0.4242923746959817, - "grad_norm": 1166.5439453125, - "learning_rate": 3.5953003476332835e-05, - "loss": 125.3584, - "step": 52510 - }, - { - "epoch": 0.4243731769002658, - "grad_norm": 1357.318359375, - "learning_rate": 3.594672742880097e-05, - "loss": 102.7229, - "step": 52520 - }, - { - "epoch": 0.42445397910454996, - "grad_norm": 1215.2672119140625, - "learning_rate": 3.594045052762076e-05, - "loss": 97.8317, - "step": 52530 - }, - { - "epoch": 0.4245347813088341, - "grad_norm": 641.0037841796875, - "learning_rate": 3.5934172773281696e-05, - "loss": 72.563, - "step": 52540 - }, - { - "epoch": 0.42461558351311823, - "grad_norm": 768.0195922851562, - "learning_rate": 3.592789416627332e-05, - "loss": 96.4681, - "step": 52550 - }, - { - "epoch": 0.42469638571740237, - "grad_norm": 1321.3060302734375, - "learning_rate": 3.592161470708526e-05, - "loss": 125.2756, - "step": 52560 - }, - { - "epoch": 0.4247771879216865, - "grad_norm": 1013.5741577148438, - "learning_rate": 3.59153343962072e-05, - "loss": 93.0219, - "step": 52570 - }, - { - "epoch": 0.42485799012597064, - "grad_norm": 866.7152099609375, - "learning_rate": 3.5909053234128895e-05, - "loss": 90.8444, - "step": 52580 - }, - { - "epoch": 0.4249387923302548, - "grad_norm": 1009.0472412109375, - "learning_rate": 3.590277122134015e-05, - "loss": 109.6744, - "step": 52590 - }, - { - "epoch": 0.4250195945345389, - "grad_norm": 420.8393249511719, - "learning_rate": 3.5896488358330856e-05, - "loss": 89.444, - "step": 52600 - }, - { - "epoch": 0.42510039673882305, - "grad_norm": 779.4595336914062, - "learning_rate": 3.5890204645590964e-05, - "loss": 171.8326, - "step": 52610 - }, - { - "epoch": 0.4251811989431072, - "grad_norm": 1126.5540771484375, - "learning_rate": 3.588392008361049e-05, - "loss": 103.0303, - "step": 52620 - }, - { - "epoch": 0.4252620011473913, - "grad_norm": 1083.139892578125, - "learning_rate": 3.587763467287953e-05, - "loss": 95.7945, - "step": 52630 - }, - { - "epoch": 0.42534280335167546, - "grad_norm": 1276.6583251953125, - "learning_rate": 3.5871348413888204e-05, - "loss": 101.9571, - "step": 52640 - }, - { - "epoch": 0.4254236055559596, - "grad_norm": 916.5853271484375, - "learning_rate": 3.586506130712676e-05, - "loss": 97.2014, - "step": 52650 - }, - { - "epoch": 0.4255044077602437, - "grad_norm": 998.537109375, - "learning_rate": 3.585877335308546e-05, - "loss": 108.7726, - "step": 52660 - }, - { - "epoch": 0.4255852099645278, - "grad_norm": 734.4119873046875, - "learning_rate": 3.585248455225466e-05, - "loss": 124.7199, - "step": 52670 - }, - { - "epoch": 0.42566601216881195, - "grad_norm": 773.6392211914062, - "learning_rate": 3.5846194905124757e-05, - "loss": 137.7873, - "step": 52680 - }, - { - "epoch": 0.4257468143730961, - "grad_norm": 1135.0164794921875, - "learning_rate": 3.5839904412186256e-05, - "loss": 119.2154, - "step": 52690 - }, - { - "epoch": 0.4258276165773802, - "grad_norm": 756.4196166992188, - "learning_rate": 3.5833613073929684e-05, - "loss": 85.9956, - "step": 52700 - }, - { - "epoch": 0.42590841878166436, - "grad_norm": 857.3016967773438, - "learning_rate": 3.582732089084566e-05, - "loss": 90.0257, - "step": 52710 - }, - { - "epoch": 0.4259892209859485, - "grad_norm": 771.9523315429688, - "learning_rate": 3.582102786342485e-05, - "loss": 107.2872, - "step": 52720 - }, - { - "epoch": 0.4260700231902326, - "grad_norm": 893.7564086914062, - "learning_rate": 3.581473399215802e-05, - "loss": 103.5436, - "step": 52730 - }, - { - "epoch": 0.42615082539451676, - "grad_norm": 765.8409423828125, - "learning_rate": 3.5808439277535964e-05, - "loss": 77.9583, - "step": 52740 - }, - { - "epoch": 0.4262316275988009, - "grad_norm": 1282.4066162109375, - "learning_rate": 3.580214372004956e-05, - "loss": 97.5404, - "step": 52750 - }, - { - "epoch": 0.42631242980308504, - "grad_norm": 950.70947265625, - "learning_rate": 3.5795847320189746e-05, - "loss": 79.9648, - "step": 52760 - }, - { - "epoch": 0.42639323200736917, - "grad_norm": 894.989501953125, - "learning_rate": 3.5789550078447526e-05, - "loss": 125.4587, - "step": 52770 - }, - { - "epoch": 0.4264740342116533, - "grad_norm": 581.9622802734375, - "learning_rate": 3.5783251995313985e-05, - "loss": 107.2445, - "step": 52780 - }, - { - "epoch": 0.42655483641593744, - "grad_norm": 871.0712280273438, - "learning_rate": 3.577695307128024e-05, - "loss": 104.1567, - "step": 52790 - }, - { - "epoch": 0.4266356386202216, - "grad_norm": 1061.6864013671875, - "learning_rate": 3.577065330683751e-05, - "loss": 121.5894, - "step": 52800 - }, - { - "epoch": 0.4267164408245057, - "grad_norm": 684.8545532226562, - "learning_rate": 3.576435270247706e-05, - "loss": 94.7415, - "step": 52810 - }, - { - "epoch": 0.42679724302878985, - "grad_norm": 760.6361694335938, - "learning_rate": 3.575805125869022e-05, - "loss": 95.5143, - "step": 52820 - }, - { - "epoch": 0.42687804523307393, - "grad_norm": 1069.88818359375, - "learning_rate": 3.5751748975968394e-05, - "loss": 131.3254, - "step": 52830 - }, - { - "epoch": 0.42695884743735807, - "grad_norm": 957.1797485351562, - "learning_rate": 3.574544585480305e-05, - "loss": 106.3905, - "step": 52840 - }, - { - "epoch": 0.4270396496416422, - "grad_norm": 1508.6466064453125, - "learning_rate": 3.573914189568571e-05, - "loss": 124.409, - "step": 52850 - }, - { - "epoch": 0.42712045184592634, - "grad_norm": 2168.17236328125, - "learning_rate": 3.573283709910798e-05, - "loss": 110.2865, - "step": 52860 - }, - { - "epoch": 0.4272012540502105, - "grad_norm": 656.3319702148438, - "learning_rate": 3.5726531465561504e-05, - "loss": 108.2774, - "step": 52870 - }, - { - "epoch": 0.4272820562544946, - "grad_norm": 1138.474609375, - "learning_rate": 3.572022499553802e-05, - "loss": 120.5321, - "step": 52880 - }, - { - "epoch": 0.42736285845877875, - "grad_norm": 1081.6744384765625, - "learning_rate": 3.571391768952932e-05, - "loss": 89.9379, - "step": 52890 - }, - { - "epoch": 0.4274436606630629, - "grad_norm": 1235.8037109375, - "learning_rate": 3.570760954802726e-05, - "loss": 132.014, - "step": 52900 - }, - { - "epoch": 0.427524462867347, - "grad_norm": 830.6211547851562, - "learning_rate": 3.5701300571523755e-05, - "loss": 120.3861, - "step": 52910 - }, - { - "epoch": 0.42760526507163116, - "grad_norm": 768.0026245117188, - "learning_rate": 3.569499076051081e-05, - "loss": 105.0827, - "step": 52920 - }, - { - "epoch": 0.4276860672759153, - "grad_norm": 1009.3343505859375, - "learning_rate": 3.5688680115480455e-05, - "loss": 103.2886, - "step": 52930 - }, - { - "epoch": 0.42776686948019943, - "grad_norm": 755.9207763671875, - "learning_rate": 3.568236863692482e-05, - "loss": 106.8309, - "step": 52940 - }, - { - "epoch": 0.42784767168448357, - "grad_norm": 575.2706909179688, - "learning_rate": 3.567605632533608e-05, - "loss": 89.6383, - "step": 52950 - }, - { - "epoch": 0.4279284738887677, - "grad_norm": 634.4887084960938, - "learning_rate": 3.56697431812065e-05, - "loss": 89.9975, - "step": 52960 - }, - { - "epoch": 0.42800927609305184, - "grad_norm": 964.8114624023438, - "learning_rate": 3.566342920502837e-05, - "loss": 94.1664, - "step": 52970 - }, - { - "epoch": 0.428090078297336, - "grad_norm": 598.5281982421875, - "learning_rate": 3.565711439729408e-05, - "loss": 111.6871, - "step": 52980 - }, - { - "epoch": 0.4281708805016201, - "grad_norm": 1215.67578125, - "learning_rate": 3.565079875849605e-05, - "loss": 97.9546, - "step": 52990 - }, - { - "epoch": 0.4282516827059042, - "grad_norm": 1326.29736328125, - "learning_rate": 3.564448228912682e-05, - "loss": 91.9959, - "step": 53000 - }, - { - "epoch": 0.4283324849101883, - "grad_norm": 827.5070190429688, - "learning_rate": 3.5638164989678935e-05, - "loss": 90.0973, - "step": 53010 - }, - { - "epoch": 0.42841328711447246, - "grad_norm": 989.9232177734375, - "learning_rate": 3.5631846860645044e-05, - "loss": 104.5394, - "step": 53020 - }, - { - "epoch": 0.4284940893187566, - "grad_norm": 1027.08203125, - "learning_rate": 3.562552790251785e-05, - "loss": 146.6958, - "step": 53030 - }, - { - "epoch": 0.42857489152304074, - "grad_norm": 1084.989013671875, - "learning_rate": 3.56192081157901e-05, - "loss": 122.7506, - "step": 53040 - }, - { - "epoch": 0.42865569372732487, - "grad_norm": 993.0697631835938, - "learning_rate": 3.561288750095465e-05, - "loss": 91.4283, - "step": 53050 - }, - { - "epoch": 0.428736495931609, - "grad_norm": 1259.4981689453125, - "learning_rate": 3.5606566058504375e-05, - "loss": 110.8139, - "step": 53060 - }, - { - "epoch": 0.42881729813589314, - "grad_norm": 1766.361572265625, - "learning_rate": 3.560024378893224e-05, - "loss": 97.641, - "step": 53070 - }, - { - "epoch": 0.4288981003401773, - "grad_norm": 971.803466796875, - "learning_rate": 3.559392069273127e-05, - "loss": 145.6928, - "step": 53080 - }, - { - "epoch": 0.4289789025444614, - "grad_norm": 538.7084350585938, - "learning_rate": 3.558759677039455e-05, - "loss": 121.1986, - "step": 53090 - }, - { - "epoch": 0.42905970474874555, - "grad_norm": 668.2804565429688, - "learning_rate": 3.5581272022415244e-05, - "loss": 115.6762, - "step": 53100 - }, - { - "epoch": 0.4291405069530297, - "grad_norm": 790.904296875, - "learning_rate": 3.557494644928654e-05, - "loss": 125.4438, - "step": 53110 - }, - { - "epoch": 0.4292213091573138, - "grad_norm": 1031.3563232421875, - "learning_rate": 3.5568620051501756e-05, - "loss": 107.4744, - "step": 53120 - }, - { - "epoch": 0.42930211136159796, - "grad_norm": 865.033447265625, - "learning_rate": 3.556229282955421e-05, - "loss": 72.1575, - "step": 53130 - }, - { - "epoch": 0.4293829135658821, - "grad_norm": 620.6161499023438, - "learning_rate": 3.555596478393733e-05, - "loss": 102.0142, - "step": 53140 - }, - { - "epoch": 0.42946371577016623, - "grad_norm": 1347.4356689453125, - "learning_rate": 3.554963591514457e-05, - "loss": 134.2964, - "step": 53150 - }, - { - "epoch": 0.42954451797445037, - "grad_norm": 1216.6492919921875, - "learning_rate": 3.554330622366949e-05, - "loss": 116.5282, - "step": 53160 - }, - { - "epoch": 0.42962532017873445, - "grad_norm": 1310.10693359375, - "learning_rate": 3.5536975710005677e-05, - "loss": 126.5732, - "step": 53170 - }, - { - "epoch": 0.4297061223830186, - "grad_norm": 1413.683349609375, - "learning_rate": 3.5530644374646815e-05, - "loss": 102.0488, - "step": 53180 - }, - { - "epoch": 0.4297869245873027, - "grad_norm": 757.0252075195312, - "learning_rate": 3.552431221808661e-05, - "loss": 94.13, - "step": 53190 - }, - { - "epoch": 0.42986772679158686, - "grad_norm": 1112.575927734375, - "learning_rate": 3.551797924081887e-05, - "loss": 103.2866, - "step": 53200 - }, - { - "epoch": 0.429948528995871, - "grad_norm": 1163.7847900390625, - "learning_rate": 3.551164544333745e-05, - "loss": 117.5729, - "step": 53210 - }, - { - "epoch": 0.43002933120015513, - "grad_norm": 734.8391723632812, - "learning_rate": 3.5505310826136286e-05, - "loss": 108.3876, - "step": 53220 - }, - { - "epoch": 0.43011013340443927, - "grad_norm": 780.405517578125, - "learning_rate": 3.549897538970934e-05, - "loss": 104.4934, - "step": 53230 - }, - { - "epoch": 0.4301909356087234, - "grad_norm": 1604.9815673828125, - "learning_rate": 3.5492639134550695e-05, - "loss": 121.8409, - "step": 53240 - }, - { - "epoch": 0.43027173781300754, - "grad_norm": 1050.0513916015625, - "learning_rate": 3.548630206115443e-05, - "loss": 108.9174, - "step": 53250 - }, - { - "epoch": 0.4303525400172917, - "grad_norm": 1055.3604736328125, - "learning_rate": 3.5479964170014746e-05, - "loss": 135.5137, - "step": 53260 - }, - { - "epoch": 0.4304333422215758, - "grad_norm": 628.5155639648438, - "learning_rate": 3.547362546162588e-05, - "loss": 88.647, - "step": 53270 - }, - { - "epoch": 0.43051414442585995, - "grad_norm": 1206.5001220703125, - "learning_rate": 3.546728593648213e-05, - "loss": 102.5352, - "step": 53280 - }, - { - "epoch": 0.4305949466301441, - "grad_norm": 689.5252075195312, - "learning_rate": 3.546094559507787e-05, - "loss": 91.7416, - "step": 53290 - }, - { - "epoch": 0.4306757488344282, - "grad_norm": 865.0218505859375, - "learning_rate": 3.545460443790753e-05, - "loss": 151.2288, - "step": 53300 - }, - { - "epoch": 0.43075655103871235, - "grad_norm": 1074.1102294921875, - "learning_rate": 3.544826246546563e-05, - "loss": 120.0512, - "step": 53310 - }, - { - "epoch": 0.4308373532429965, - "grad_norm": 1206.14111328125, - "learning_rate": 3.544191967824669e-05, - "loss": 94.8466, - "step": 53320 - }, - { - "epoch": 0.4309181554472806, - "grad_norm": 870.14501953125, - "learning_rate": 3.543557607674537e-05, - "loss": 103.3588, - "step": 53330 - }, - { - "epoch": 0.4309989576515647, - "grad_norm": 998.771484375, - "learning_rate": 3.542923166145633e-05, - "loss": 78.6056, - "step": 53340 - }, - { - "epoch": 0.43107975985584884, - "grad_norm": 1044.3873291015625, - "learning_rate": 3.542288643287434e-05, - "loss": 96.562, - "step": 53350 - }, - { - "epoch": 0.431160562060133, - "grad_norm": 1033.466064453125, - "learning_rate": 3.54165403914942e-05, - "loss": 107.2544, - "step": 53360 - }, - { - "epoch": 0.4312413642644171, - "grad_norm": 596.2382202148438, - "learning_rate": 3.541019353781079e-05, - "loss": 94.7843, - "step": 53370 - }, - { - "epoch": 0.43132216646870125, - "grad_norm": 1056.2789306640625, - "learning_rate": 3.540384587231906e-05, - "loss": 111.1058, - "step": 53380 - }, - { - "epoch": 0.4314029686729854, - "grad_norm": 1953.5147705078125, - "learning_rate": 3.5397497395514004e-05, - "loss": 103.7415, - "step": 53390 - }, - { - "epoch": 0.4314837708772695, - "grad_norm": 1631.471435546875, - "learning_rate": 3.53911481078907e-05, - "loss": 98.644, - "step": 53400 - }, - { - "epoch": 0.43156457308155366, - "grad_norm": 675.1654052734375, - "learning_rate": 3.538479800994426e-05, - "loss": 87.8158, - "step": 53410 - }, - { - "epoch": 0.4316453752858378, - "grad_norm": 2158.873291015625, - "learning_rate": 3.5378447102169895e-05, - "loss": 99.6781, - "step": 53420 - }, - { - "epoch": 0.43172617749012193, - "grad_norm": 893.4830322265625, - "learning_rate": 3.537209538506286e-05, - "loss": 116.2382, - "step": 53430 - }, - { - "epoch": 0.43180697969440607, - "grad_norm": 785.0932006835938, - "learning_rate": 3.536574285911847e-05, - "loss": 105.484, - "step": 53440 - }, - { - "epoch": 0.4318877818986902, - "grad_norm": 1152.9134521484375, - "learning_rate": 3.535938952483211e-05, - "loss": 87.4028, - "step": 53450 - }, - { - "epoch": 0.43196858410297434, - "grad_norm": 1089.3548583984375, - "learning_rate": 3.535303538269922e-05, - "loss": 93.0411, - "step": 53460 - }, - { - "epoch": 0.4320493863072585, - "grad_norm": 1677.3712158203125, - "learning_rate": 3.5346680433215316e-05, - "loss": 115.5275, - "step": 53470 - }, - { - "epoch": 0.4321301885115426, - "grad_norm": 1183.71435546875, - "learning_rate": 3.534032467687597e-05, - "loss": 91.5449, - "step": 53480 - }, - { - "epoch": 0.43221099071582675, - "grad_norm": 678.9725341796875, - "learning_rate": 3.533396811417682e-05, - "loss": 79.5156, - "step": 53490 - }, - { - "epoch": 0.4322917929201109, - "grad_norm": 1038.9942626953125, - "learning_rate": 3.532761074561355e-05, - "loss": 92.3728, - "step": 53500 - }, - { - "epoch": 0.432372595124395, - "grad_norm": 1048.84912109375, - "learning_rate": 3.532125257168193e-05, - "loss": 83.4261, - "step": 53510 - }, - { - "epoch": 0.4324533973286791, - "grad_norm": 874.8933715820312, - "learning_rate": 3.531489359287779e-05, - "loss": 122.8855, - "step": 53520 - }, - { - "epoch": 0.43253419953296324, - "grad_norm": 1571.4178466796875, - "learning_rate": 3.530853380969701e-05, - "loss": 107.6969, - "step": 53530 - }, - { - "epoch": 0.4326150017372474, - "grad_norm": 1101.8731689453125, - "learning_rate": 3.5302173222635524e-05, - "loss": 92.9595, - "step": 53540 - }, - { - "epoch": 0.4326958039415315, - "grad_norm": 838.156005859375, - "learning_rate": 3.529581183218937e-05, - "loss": 84.3149, - "step": 53550 - }, - { - "epoch": 0.43277660614581565, - "grad_norm": 688.3433837890625, - "learning_rate": 3.528944963885461e-05, - "loss": 92.1553, - "step": 53560 - }, - { - "epoch": 0.4328574083500998, - "grad_norm": 812.886474609375, - "learning_rate": 3.528308664312739e-05, - "loss": 105.7649, - "step": 53570 - }, - { - "epoch": 0.4329382105543839, - "grad_norm": 1860.865966796875, - "learning_rate": 3.527672284550389e-05, - "loss": 119.0018, - "step": 53580 - }, - { - "epoch": 0.43301901275866805, - "grad_norm": 939.641845703125, - "learning_rate": 3.5270358246480386e-05, - "loss": 124.0257, - "step": 53590 - }, - { - "epoch": 0.4330998149629522, - "grad_norm": 767.065673828125, - "learning_rate": 3.52639928465532e-05, - "loss": 96.6075, - "step": 53600 - }, - { - "epoch": 0.4331806171672363, - "grad_norm": 589.7621459960938, - "learning_rate": 3.525762664621872e-05, - "loss": 92.9088, - "step": 53610 - }, - { - "epoch": 0.43326141937152046, - "grad_norm": 1278.2021484375, - "learning_rate": 3.5251259645973394e-05, - "loss": 126.5657, - "step": 53620 - }, - { - "epoch": 0.4333422215758046, - "grad_norm": 803.1061401367188, - "learning_rate": 3.5244891846313736e-05, - "loss": 91.6572, - "step": 53630 - }, - { - "epoch": 0.43342302378008873, - "grad_norm": 1491.0948486328125, - "learning_rate": 3.523852324773631e-05, - "loss": 129.1915, - "step": 53640 - }, - { - "epoch": 0.43350382598437287, - "grad_norm": 1292.84912109375, - "learning_rate": 3.523215385073777e-05, - "loss": 128.6682, - "step": 53650 - }, - { - "epoch": 0.433584628188657, - "grad_norm": 770.8333740234375, - "learning_rate": 3.5225783655814796e-05, - "loss": 83.3723, - "step": 53660 - }, - { - "epoch": 0.43366543039294114, - "grad_norm": 1228.6915283203125, - "learning_rate": 3.5219412663464167e-05, - "loss": 143.0409, - "step": 53670 - }, - { - "epoch": 0.4337462325972253, - "grad_norm": 1719.6175537109375, - "learning_rate": 3.521304087418269e-05, - "loss": 120.807, - "step": 53680 - }, - { - "epoch": 0.43382703480150936, - "grad_norm": 695.94482421875, - "learning_rate": 3.520666828846726e-05, - "loss": 73.4487, - "step": 53690 - }, - { - "epoch": 0.4339078370057935, - "grad_norm": 1078.605224609375, - "learning_rate": 3.5200294906814824e-05, - "loss": 152.7214, - "step": 53700 - }, - { - "epoch": 0.43398863921007763, - "grad_norm": 835.3974609375, - "learning_rate": 3.5193920729722384e-05, - "loss": 100.8444, - "step": 53710 - }, - { - "epoch": 0.43406944141436177, - "grad_norm": 791.3154296875, - "learning_rate": 3.5187545757687015e-05, - "loss": 82.3049, - "step": 53720 - }, - { - "epoch": 0.4341502436186459, - "grad_norm": 631.8236083984375, - "learning_rate": 3.5181169991205866e-05, - "loss": 99.8519, - "step": 53730 - }, - { - "epoch": 0.43423104582293004, - "grad_norm": 635.5999145507812, - "learning_rate": 3.517479343077611e-05, - "loss": 121.0106, - "step": 53740 - }, - { - "epoch": 0.4343118480272142, - "grad_norm": 1538.1953125, - "learning_rate": 3.516841607689501e-05, - "loss": 104.1127, - "step": 53750 - }, - { - "epoch": 0.4343926502314983, - "grad_norm": 786.080322265625, - "learning_rate": 3.516203793005989e-05, - "loss": 116.9553, - "step": 53760 - }, - { - "epoch": 0.43447345243578245, - "grad_norm": 1163.4224853515625, - "learning_rate": 3.515565899076813e-05, - "loss": 108.2932, - "step": 53770 - }, - { - "epoch": 0.4345542546400666, - "grad_norm": 1153.8079833984375, - "learning_rate": 3.514927925951717e-05, - "loss": 107.7102, - "step": 53780 - }, - { - "epoch": 0.4346350568443507, - "grad_norm": 801.5924072265625, - "learning_rate": 3.514289873680451e-05, - "loss": 83.0471, - "step": 53790 - }, - { - "epoch": 0.43471585904863486, - "grad_norm": 778.8651733398438, - "learning_rate": 3.513651742312774e-05, - "loss": 79.3414, - "step": 53800 - }, - { - "epoch": 0.434796661252919, - "grad_norm": 923.1555786132812, - "learning_rate": 3.5130135318984456e-05, - "loss": 146.8048, - "step": 53810 - }, - { - "epoch": 0.43487746345720313, - "grad_norm": 852.566650390625, - "learning_rate": 3.512375242487236e-05, - "loss": 93.0826, - "step": 53820 - }, - { - "epoch": 0.43495826566148726, - "grad_norm": 868.22216796875, - "learning_rate": 3.511736874128922e-05, - "loss": 120.5144, - "step": 53830 - }, - { - "epoch": 0.4350390678657714, - "grad_norm": 1766.9429931640625, - "learning_rate": 3.511098426873283e-05, - "loss": 98.7306, - "step": 53840 - }, - { - "epoch": 0.43511987007005554, - "grad_norm": 952.6795654296875, - "learning_rate": 3.5104599007701054e-05, - "loss": 85.5349, - "step": 53850 - }, - { - "epoch": 0.4352006722743396, - "grad_norm": 963.0565795898438, - "learning_rate": 3.5098212958691854e-05, - "loss": 133.2661, - "step": 53860 - }, - { - "epoch": 0.43528147447862375, - "grad_norm": 1223.63623046875, - "learning_rate": 3.509182612220322e-05, - "loss": 154.7412, - "step": 53870 - }, - { - "epoch": 0.4353622766829079, - "grad_norm": 875.8056030273438, - "learning_rate": 3.50854384987332e-05, - "loss": 139.7883, - "step": 53880 - }, - { - "epoch": 0.435443078887192, - "grad_norm": 736.044677734375, - "learning_rate": 3.5079050088779926e-05, - "loss": 80.8797, - "step": 53890 - }, - { - "epoch": 0.43552388109147616, - "grad_norm": 506.26104736328125, - "learning_rate": 3.507266089284157e-05, - "loss": 84.248, - "step": 53900 - }, - { - "epoch": 0.4356046832957603, - "grad_norm": 922.2618408203125, - "learning_rate": 3.5066270911416373e-05, - "loss": 115.3159, - "step": 53910 - }, - { - "epoch": 0.43568548550004443, - "grad_norm": 1350.2366943359375, - "learning_rate": 3.5059880145002654e-05, - "loss": 88.9406, - "step": 53920 - }, - { - "epoch": 0.43576628770432857, - "grad_norm": 1098.39501953125, - "learning_rate": 3.505348859409876e-05, - "loss": 82.4904, - "step": 53930 - }, - { - "epoch": 0.4358470899086127, - "grad_norm": 807.7853393554688, - "learning_rate": 3.5047096259203135e-05, - "loss": 94.6615, - "step": 53940 - }, - { - "epoch": 0.43592789211289684, - "grad_norm": 1596.8291015625, - "learning_rate": 3.504070314081425e-05, - "loss": 118.4171, - "step": 53950 - }, - { - "epoch": 0.436008694317181, - "grad_norm": 1369.6641845703125, - "learning_rate": 3.503430923943066e-05, - "loss": 105.3305, - "step": 53960 - }, - { - "epoch": 0.4360894965214651, - "grad_norm": 8795.5234375, - "learning_rate": 3.5027914555550976e-05, - "loss": 96.2118, - "step": 53970 - }, - { - "epoch": 0.43617029872574925, - "grad_norm": 1102.4058837890625, - "learning_rate": 3.5021519089673876e-05, - "loss": 80.3191, - "step": 53980 - }, - { - "epoch": 0.4362511009300334, - "grad_norm": 2237.216796875, - "learning_rate": 3.501512284229807e-05, - "loss": 119.1992, - "step": 53990 - }, - { - "epoch": 0.4363319031343175, - "grad_norm": 1358.170654296875, - "learning_rate": 3.5008725813922386e-05, - "loss": 84.6494, - "step": 54000 - }, - { - "epoch": 0.43641270533860166, - "grad_norm": 1261.294677734375, - "learning_rate": 3.500232800504563e-05, - "loss": 106.1775, - "step": 54010 - }, - { - "epoch": 0.4364935075428858, - "grad_norm": 1167.9873046875, - "learning_rate": 3.4995929416166756e-05, - "loss": 135.7313, - "step": 54020 - }, - { - "epoch": 0.4365743097471699, - "grad_norm": 1792.2176513671875, - "learning_rate": 3.4989530047784716e-05, - "loss": 100.0604, - "step": 54030 - }, - { - "epoch": 0.436655111951454, - "grad_norm": 1185.2357177734375, - "learning_rate": 3.498312990039856e-05, - "loss": 92.0171, - "step": 54040 - }, - { - "epoch": 0.43673591415573815, - "grad_norm": 684.9873657226562, - "learning_rate": 3.4976728974507384e-05, - "loss": 103.4003, - "step": 54050 - }, - { - "epoch": 0.4368167163600223, - "grad_norm": 1142.3291015625, - "learning_rate": 3.497032727061034e-05, - "loss": 104.779, - "step": 54060 - }, - { - "epoch": 0.4368975185643064, - "grad_norm": 600.034423828125, - "learning_rate": 3.4963924789206636e-05, - "loss": 122.4895, - "step": 54070 - }, - { - "epoch": 0.43697832076859056, - "grad_norm": 702.1489868164062, - "learning_rate": 3.495752153079557e-05, - "loss": 103.3102, - "step": 54080 - }, - { - "epoch": 0.4370591229728747, - "grad_norm": 655.8955688476562, - "learning_rate": 3.495111749587647e-05, - "loss": 92.9466, - "step": 54090 - }, - { - "epoch": 0.43713992517715883, - "grad_norm": 580.0801391601562, - "learning_rate": 3.494471268494875e-05, - "loss": 104.6388, - "step": 54100 - }, - { - "epoch": 0.43722072738144296, - "grad_norm": 808.445556640625, - "learning_rate": 3.4938307098511846e-05, - "loss": 90.6743, - "step": 54110 - }, - { - "epoch": 0.4373015295857271, - "grad_norm": 1305.1773681640625, - "learning_rate": 3.493190073706529e-05, - "loss": 76.8265, - "step": 54120 - }, - { - "epoch": 0.43738233179001124, - "grad_norm": 801.8179931640625, - "learning_rate": 3.492549360110868e-05, - "loss": 140.9319, - "step": 54130 - }, - { - "epoch": 0.4374631339942954, - "grad_norm": 2203.01171875, - "learning_rate": 3.491908569114164e-05, - "loss": 119.1102, - "step": 54140 - }, - { - "epoch": 0.4375439361985795, - "grad_norm": 1139.506103515625, - "learning_rate": 3.491267700766386e-05, - "loss": 129.6709, - "step": 54150 - }, - { - "epoch": 0.43762473840286364, - "grad_norm": 853.3720703125, - "learning_rate": 3.4906267551175124e-05, - "loss": 108.6392, - "step": 54160 - }, - { - "epoch": 0.4377055406071478, - "grad_norm": 779.7216186523438, - "learning_rate": 3.489985732217525e-05, - "loss": 101.385, - "step": 54170 - }, - { - "epoch": 0.4377863428114319, - "grad_norm": 1023.5377807617188, - "learning_rate": 3.489344632116412e-05, - "loss": 118.1631, - "step": 54180 - }, - { - "epoch": 0.43786714501571605, - "grad_norm": 1276.5364990234375, - "learning_rate": 3.488703454864167e-05, - "loss": 115.0822, - "step": 54190 - }, - { - "epoch": 0.4379479472200002, - "grad_norm": 690.0712280273438, - "learning_rate": 3.488062200510791e-05, - "loss": 110.5314, - "step": 54200 - }, - { - "epoch": 0.43802874942428427, - "grad_norm": 838.189697265625, - "learning_rate": 3.48742086910629e-05, - "loss": 111.8782, - "step": 54210 - }, - { - "epoch": 0.4381095516285684, - "grad_norm": 1070.1236572265625, - "learning_rate": 3.4867794607006784e-05, - "loss": 77.157, - "step": 54220 - }, - { - "epoch": 0.43819035383285254, - "grad_norm": 596.0957641601562, - "learning_rate": 3.486137975343971e-05, - "loss": 107.8391, - "step": 54230 - }, - { - "epoch": 0.4382711560371367, - "grad_norm": 1297.022705078125, - "learning_rate": 3.485496413086195e-05, - "loss": 138.3767, - "step": 54240 - }, - { - "epoch": 0.4383519582414208, - "grad_norm": 1065.7069091796875, - "learning_rate": 3.484854773977378e-05, - "loss": 94.8719, - "step": 54250 - }, - { - "epoch": 0.43843276044570495, - "grad_norm": 1461.8717041015625, - "learning_rate": 3.484213058067559e-05, - "loss": 112.1025, - "step": 54260 - }, - { - "epoch": 0.4385135626499891, - "grad_norm": 978.3469848632812, - "learning_rate": 3.4835712654067785e-05, - "loss": 128.9673, - "step": 54270 - }, - { - "epoch": 0.4385943648542732, - "grad_norm": 725.2369995117188, - "learning_rate": 3.482929396045087e-05, - "loss": 99.5264, - "step": 54280 - }, - { - "epoch": 0.43867516705855736, - "grad_norm": 813.05859375, - "learning_rate": 3.482287450032536e-05, - "loss": 109.4832, - "step": 54290 - }, - { - "epoch": 0.4387559692628415, - "grad_norm": 933.5191650390625, - "learning_rate": 3.481645427419188e-05, - "loss": 94.788, - "step": 54300 - }, - { - "epoch": 0.43883677146712563, - "grad_norm": 1071.7193603515625, - "learning_rate": 3.481003328255108e-05, - "loss": 88.3042, - "step": 54310 - }, - { - "epoch": 0.43891757367140977, - "grad_norm": 866.9697265625, - "learning_rate": 3.4803611525903685e-05, - "loss": 93.5913, - "step": 54320 - }, - { - "epoch": 0.4389983758756939, - "grad_norm": 1327.5521240234375, - "learning_rate": 3.479718900475049e-05, - "loss": 115.5358, - "step": 54330 - }, - { - "epoch": 0.43907917807997804, - "grad_norm": 996.3737182617188, - "learning_rate": 3.479076571959231e-05, - "loss": 164.3083, - "step": 54340 - }, - { - "epoch": 0.4391599802842622, - "grad_norm": 901.630615234375, - "learning_rate": 3.4784341670930065e-05, - "loss": 94.0621, - "step": 54350 - }, - { - "epoch": 0.4392407824885463, - "grad_norm": 775.2239379882812, - "learning_rate": 3.477791685926471e-05, - "loss": 86.5708, - "step": 54360 - }, - { - "epoch": 0.43932158469283045, - "grad_norm": 992.7667846679688, - "learning_rate": 3.477149128509727e-05, - "loss": 106.6585, - "step": 54370 - }, - { - "epoch": 0.4394023868971145, - "grad_norm": 1045.9317626953125, - "learning_rate": 3.4765064948928814e-05, - "loss": 98.0067, - "step": 54380 - }, - { - "epoch": 0.43948318910139866, - "grad_norm": 786.0052490234375, - "learning_rate": 3.475863785126049e-05, - "loss": 115.1629, - "step": 54390 - }, - { - "epoch": 0.4395639913056828, - "grad_norm": 503.0232849121094, - "learning_rate": 3.475220999259349e-05, - "loss": 139.5835, - "step": 54400 - }, - { - "epoch": 0.43964479350996694, - "grad_norm": 898.33984375, - "learning_rate": 3.474578137342909e-05, - "loss": 104.1695, - "step": 54410 - }, - { - "epoch": 0.43972559571425107, - "grad_norm": 2436.810791015625, - "learning_rate": 3.473935199426858e-05, - "loss": 131.1208, - "step": 54420 - }, - { - "epoch": 0.4398063979185352, - "grad_norm": 555.6993408203125, - "learning_rate": 3.4732921855613355e-05, - "loss": 90.429, - "step": 54430 - }, - { - "epoch": 0.43988720012281934, - "grad_norm": 1309.881591796875, - "learning_rate": 3.4726490957964834e-05, - "loss": 116.6967, - "step": 54440 - }, - { - "epoch": 0.4399680023271035, - "grad_norm": 394.55023193359375, - "learning_rate": 3.4720059301824525e-05, - "loss": 110.7494, - "step": 54450 - }, - { - "epoch": 0.4400488045313876, - "grad_norm": 805.7823486328125, - "learning_rate": 3.471362688769398e-05, - "loss": 101.1482, - "step": 54460 - }, - { - "epoch": 0.44012960673567175, - "grad_norm": 892.6566162109375, - "learning_rate": 3.4707193716074816e-05, - "loss": 89.6897, - "step": 54470 - }, - { - "epoch": 0.4402104089399559, - "grad_norm": 1127.4554443359375, - "learning_rate": 3.4700759787468695e-05, - "loss": 112.66, - "step": 54480 - }, - { - "epoch": 0.44029121114424, - "grad_norm": 2058.16455078125, - "learning_rate": 3.4694325102377355e-05, - "loss": 131.1624, - "step": 54490 - }, - { - "epoch": 0.44037201334852416, - "grad_norm": 948.6096801757812, - "learning_rate": 3.4687889661302576e-05, - "loss": 93.3497, - "step": 54500 - }, - { - "epoch": 0.4404528155528083, - "grad_norm": 1032.0595703125, - "learning_rate": 3.468145346474622e-05, - "loss": 102.0234, - "step": 54510 - }, - { - "epoch": 0.44053361775709243, - "grad_norm": 888.0936889648438, - "learning_rate": 3.467501651321019e-05, - "loss": 117.6697, - "step": 54520 - }, - { - "epoch": 0.44061441996137657, - "grad_norm": 1172.1923828125, - "learning_rate": 3.466857880719645e-05, - "loss": 111.5572, - "step": 54530 - }, - { - "epoch": 0.4406952221656607, - "grad_norm": 866.07861328125, - "learning_rate": 3.466214034720702e-05, - "loss": 108.2314, - "step": 54540 - }, - { - "epoch": 0.4407760243699448, - "grad_norm": 1167.5728759765625, - "learning_rate": 3.4655701133744e-05, - "loss": 99.7825, - "step": 54550 - }, - { - "epoch": 0.4408568265742289, - "grad_norm": 1713.0015869140625, - "learning_rate": 3.4649261167309526e-05, - "loss": 126.3239, - "step": 54560 - }, - { - "epoch": 0.44093762877851306, - "grad_norm": 739.7220458984375, - "learning_rate": 3.46428204484058e-05, - "loss": 110.7294, - "step": 54570 - }, - { - "epoch": 0.4410184309827972, - "grad_norm": 824.5387573242188, - "learning_rate": 3.4636378977535075e-05, - "loss": 95.3479, - "step": 54580 - }, - { - "epoch": 0.44109923318708133, - "grad_norm": 1037.643310546875, - "learning_rate": 3.462993675519968e-05, - "loss": 101.4099, - "step": 54590 - }, - { - "epoch": 0.44118003539136547, - "grad_norm": 1080.090576171875, - "learning_rate": 3.462349378190199e-05, - "loss": 94.7762, - "step": 54600 - }, - { - "epoch": 0.4412608375956496, - "grad_norm": 799.8740234375, - "learning_rate": 3.461705005814444e-05, - "loss": 86.9065, - "step": 54610 - }, - { - "epoch": 0.44134163979993374, - "grad_norm": 824.4632568359375, - "learning_rate": 3.461060558442952e-05, - "loss": 146.81, - "step": 54620 - }, - { - "epoch": 0.4414224420042179, - "grad_norm": 1098.3951416015625, - "learning_rate": 3.4604160361259796e-05, - "loss": 89.7208, - "step": 54630 - }, - { - "epoch": 0.441503244208502, - "grad_norm": 544.5059814453125, - "learning_rate": 3.459771438913787e-05, - "loss": 99.5069, - "step": 54640 - }, - { - "epoch": 0.44158404641278615, - "grad_norm": 480.9939270019531, - "learning_rate": 3.459126766856641e-05, - "loss": 97.0066, - "step": 54650 - }, - { - "epoch": 0.4416648486170703, - "grad_norm": 1367.4161376953125, - "learning_rate": 3.458482020004815e-05, - "loss": 92.9867, - "step": 54660 - }, - { - "epoch": 0.4417456508213544, - "grad_norm": 775.5013427734375, - "learning_rate": 3.457837198408588e-05, - "loss": 125.9974, - "step": 54670 - }, - { - "epoch": 0.44182645302563855, - "grad_norm": 422.24639892578125, - "learning_rate": 3.457192302118244e-05, - "loss": 113.1714, - "step": 54680 - }, - { - "epoch": 0.4419072552299227, - "grad_norm": 781.7955932617188, - "learning_rate": 3.4565473311840735e-05, - "loss": 113.1297, - "step": 54690 - }, - { - "epoch": 0.4419880574342068, - "grad_norm": 885.1204223632812, - "learning_rate": 3.455902285656373e-05, - "loss": 121.7524, - "step": 54700 - }, - { - "epoch": 0.44206885963849096, - "grad_norm": 925.4169311523438, - "learning_rate": 3.455257165585444e-05, - "loss": 113.4226, - "step": 54710 - }, - { - "epoch": 0.44214966184277504, - "grad_norm": 1047.358642578125, - "learning_rate": 3.454611971021593e-05, - "loss": 127.4025, - "step": 54720 - }, - { - "epoch": 0.4422304640470592, - "grad_norm": 741.985595703125, - "learning_rate": 3.453966702015137e-05, - "loss": 95.042, - "step": 54730 - }, - { - "epoch": 0.4423112662513433, - "grad_norm": 477.40509033203125, - "learning_rate": 3.453321358616393e-05, - "loss": 77.9872, - "step": 54740 - }, - { - "epoch": 0.44239206845562745, - "grad_norm": 1453.6295166015625, - "learning_rate": 3.452675940875686e-05, - "loss": 111.1772, - "step": 54750 - }, - { - "epoch": 0.4424728706599116, - "grad_norm": 920.3239135742188, - "learning_rate": 3.452030448843347e-05, - "loss": 136.883, - "step": 54760 - }, - { - "epoch": 0.4425536728641957, - "grad_norm": 1079.620361328125, - "learning_rate": 3.451384882569714e-05, - "loss": 100.5099, - "step": 54770 - }, - { - "epoch": 0.44263447506847986, - "grad_norm": 975.9778442382812, - "learning_rate": 3.45073924210513e-05, - "loss": 129.199, - "step": 54780 - }, - { - "epoch": 0.442715277272764, - "grad_norm": 847.71337890625, - "learning_rate": 3.4500935274999413e-05, - "loss": 145.1074, - "step": 54790 - }, - { - "epoch": 0.44279607947704813, - "grad_norm": 1253.744140625, - "learning_rate": 3.4494477388045035e-05, - "loss": 96.9493, - "step": 54800 - }, - { - "epoch": 0.44287688168133227, - "grad_norm": 1273.61181640625, - "learning_rate": 3.448801876069176e-05, - "loss": 120.0897, - "step": 54810 - }, - { - "epoch": 0.4429576838856164, - "grad_norm": 1555.8997802734375, - "learning_rate": 3.448155939344324e-05, - "loss": 112.1916, - "step": 54820 - }, - { - "epoch": 0.44303848608990054, - "grad_norm": 1842.7396240234375, - "learning_rate": 3.44750992868032e-05, - "loss": 122.5, - "step": 54830 - }, - { - "epoch": 0.4431192882941847, - "grad_norm": 835.58203125, - "learning_rate": 3.4468638441275415e-05, - "loss": 82.2662, - "step": 54840 - }, - { - "epoch": 0.4432000904984688, - "grad_norm": 1233.536376953125, - "learning_rate": 3.4462176857363704e-05, - "loss": 86.3533, - "step": 54850 - }, - { - "epoch": 0.44328089270275295, - "grad_norm": 640.84228515625, - "learning_rate": 3.445571453557196e-05, - "loss": 151.5007, - "step": 54860 - }, - { - "epoch": 0.4433616949070371, - "grad_norm": 645.3198852539062, - "learning_rate": 3.4449251476404135e-05, - "loss": 92.998, - "step": 54870 - }, - { - "epoch": 0.4434424971113212, - "grad_norm": 1311.8433837890625, - "learning_rate": 3.444278768036421e-05, - "loss": 98.6998, - "step": 54880 - }, - { - "epoch": 0.44352329931560536, - "grad_norm": 1376.2677001953125, - "learning_rate": 3.443632314795627e-05, - "loss": 99.9757, - "step": 54890 - }, - { - "epoch": 0.44360410151988944, - "grad_norm": 1036.857666015625, - "learning_rate": 3.442985787968442e-05, - "loss": 127.4637, - "step": 54900 - }, - { - "epoch": 0.4436849037241736, - "grad_norm": 826.0955810546875, - "learning_rate": 3.442339187605283e-05, - "loss": 94.8067, - "step": 54910 - }, - { - "epoch": 0.4437657059284577, - "grad_norm": 595.4618530273438, - "learning_rate": 3.4416925137565754e-05, - "loss": 127.8379, - "step": 54920 - }, - { - "epoch": 0.44384650813274185, - "grad_norm": 1041.47900390625, - "learning_rate": 3.441045766472745e-05, - "loss": 104.8486, - "step": 54930 - }, - { - "epoch": 0.443927310337026, - "grad_norm": 1513.0726318359375, - "learning_rate": 3.440398945804229e-05, - "loss": 116.8285, - "step": 54940 - }, - { - "epoch": 0.4440081125413101, - "grad_norm": 521.875, - "learning_rate": 3.439752051801467e-05, - "loss": 84.596, - "step": 54950 - }, - { - "epoch": 0.44408891474559425, - "grad_norm": 1719.56201171875, - "learning_rate": 3.439105084514905e-05, - "loss": 334.509, - "step": 54960 - }, - { - "epoch": 0.4441697169498784, - "grad_norm": 1043.5592041015625, - "learning_rate": 3.438458043994995e-05, - "loss": 108.8561, - "step": 54970 - }, - { - "epoch": 0.4442505191541625, - "grad_norm": 999.97119140625, - "learning_rate": 3.437810930292195e-05, - "loss": 104.3715, - "step": 54980 - }, - { - "epoch": 0.44433132135844666, - "grad_norm": 789.031982421875, - "learning_rate": 3.437163743456967e-05, - "loss": 124.3942, - "step": 54990 - }, - { - "epoch": 0.4444121235627308, - "grad_norm": 2851.12158203125, - "learning_rate": 3.436516483539781e-05, - "loss": 115.5443, - "step": 55000 - }, - { - "epoch": 0.44449292576701493, - "grad_norm": 1518.363525390625, - "learning_rate": 3.4358691505911104e-05, - "loss": 108.8778, - "step": 55010 - }, - { - "epoch": 0.44457372797129907, - "grad_norm": 974.4264526367188, - "learning_rate": 3.435221744661438e-05, - "loss": 108.4476, - "step": 55020 - }, - { - "epoch": 0.4446545301755832, - "grad_norm": 1133.935546875, - "learning_rate": 3.434574265801247e-05, - "loss": 82.8257, - "step": 55030 - }, - { - "epoch": 0.44473533237986734, - "grad_norm": 852.2101440429688, - "learning_rate": 3.433926714061032e-05, - "loss": 116.7943, - "step": 55040 - }, - { - "epoch": 0.4448161345841515, - "grad_norm": 933.7111206054688, - "learning_rate": 3.433279089491288e-05, - "loss": 95.6058, - "step": 55050 - }, - { - "epoch": 0.4448969367884356, - "grad_norm": 918.9985961914062, - "learning_rate": 3.432631392142519e-05, - "loss": 120.7652, - "step": 55060 - }, - { - "epoch": 0.4449777389927197, - "grad_norm": 856.4068603515625, - "learning_rate": 3.4319836220652335e-05, - "loss": 70.9872, - "step": 55070 - }, - { - "epoch": 0.44505854119700383, - "grad_norm": 882.0916137695312, - "learning_rate": 3.431335779309947e-05, - "loss": 103.2964, - "step": 55080 - }, - { - "epoch": 0.44513934340128797, - "grad_norm": 911.1038818359375, - "learning_rate": 3.430687863927178e-05, - "loss": 127.5514, - "step": 55090 - }, - { - "epoch": 0.4452201456055721, - "grad_norm": 1076.07421875, - "learning_rate": 3.430039875967454e-05, - "loss": 116.2657, - "step": 55100 - }, - { - "epoch": 0.44530094780985624, - "grad_norm": 671.8142700195312, - "learning_rate": 3.429391815481305e-05, - "loss": 106.82, - "step": 55110 - }, - { - "epoch": 0.4453817500141404, - "grad_norm": 1284.429931640625, - "learning_rate": 3.428743682519269e-05, - "loss": 141.1629, - "step": 55120 - }, - { - "epoch": 0.4454625522184245, - "grad_norm": 946.4417724609375, - "learning_rate": 3.428095477131888e-05, - "loss": 104.9157, - "step": 55130 - }, - { - "epoch": 0.44554335442270865, - "grad_norm": 1418.9573974609375, - "learning_rate": 3.427447199369711e-05, - "loss": 90.5976, - "step": 55140 - }, - { - "epoch": 0.4456241566269928, - "grad_norm": 1282.1669921875, - "learning_rate": 3.426798849283291e-05, - "loss": 109.9933, - "step": 55150 - }, - { - "epoch": 0.4457049588312769, - "grad_norm": 690.4255981445312, - "learning_rate": 3.4261504269231904e-05, - "loss": 122.1959, - "step": 55160 - }, - { - "epoch": 0.44578576103556106, - "grad_norm": 1454.087158203125, - "learning_rate": 3.425501932339971e-05, - "loss": 165.4744, - "step": 55170 - }, - { - "epoch": 0.4458665632398452, - "grad_norm": 646.5927734375, - "learning_rate": 3.4248533655842066e-05, - "loss": 112.0175, - "step": 55180 - }, - { - "epoch": 0.44594736544412933, - "grad_norm": 1006.7059936523438, - "learning_rate": 3.4242047267064715e-05, - "loss": 85.0201, - "step": 55190 - }, - { - "epoch": 0.44602816764841346, - "grad_norm": 936.4608764648438, - "learning_rate": 3.423556015757349e-05, - "loss": 99.7377, - "step": 55200 - }, - { - "epoch": 0.4461089698526976, - "grad_norm": 1159.5823974609375, - "learning_rate": 3.4229072327874274e-05, - "loss": 111.9427, - "step": 55210 - }, - { - "epoch": 0.44618977205698174, - "grad_norm": 974.6563720703125, - "learning_rate": 3.4222583778472996e-05, - "loss": 82.0316, - "step": 55220 - }, - { - "epoch": 0.4462705742612659, - "grad_norm": 700.6890869140625, - "learning_rate": 3.421609450987563e-05, - "loss": 111.695, - "step": 55230 - }, - { - "epoch": 0.44635137646554995, - "grad_norm": 1090.9473876953125, - "learning_rate": 3.4209604522588255e-05, - "loss": 78.592, - "step": 55240 - }, - { - "epoch": 0.4464321786698341, - "grad_norm": 973.021240234375, - "learning_rate": 3.4203113817116957e-05, - "loss": 80.9439, - "step": 55250 - }, - { - "epoch": 0.4465129808741182, - "grad_norm": 928.7651977539062, - "learning_rate": 3.419662239396789e-05, - "loss": 114.6702, - "step": 55260 - }, - { - "epoch": 0.44659378307840236, - "grad_norm": 617.3494262695312, - "learning_rate": 3.419013025364727e-05, - "loss": 118.6977, - "step": 55270 - }, - { - "epoch": 0.4466745852826865, - "grad_norm": 628.002197265625, - "learning_rate": 3.418363739666137e-05, - "loss": 139.217, - "step": 55280 - }, - { - "epoch": 0.44675538748697063, - "grad_norm": 1211.73193359375, - "learning_rate": 3.417714382351652e-05, - "loss": 83.0112, - "step": 55290 - }, - { - "epoch": 0.44683618969125477, - "grad_norm": 863.4985961914062, - "learning_rate": 3.417064953471911e-05, - "loss": 117.3713, - "step": 55300 - }, - { - "epoch": 0.4469169918955389, - "grad_norm": 688.23193359375, - "learning_rate": 3.416415453077555e-05, - "loss": 124.7274, - "step": 55310 - }, - { - "epoch": 0.44699779409982304, - "grad_norm": 940.3441772460938, - "learning_rate": 3.415765881219236e-05, - "loss": 146.5833, - "step": 55320 - }, - { - "epoch": 0.4470785963041072, - "grad_norm": 1715.5086669921875, - "learning_rate": 3.4151162379476075e-05, - "loss": 166.437, - "step": 55330 - }, - { - "epoch": 0.4471593985083913, - "grad_norm": 652.9030151367188, - "learning_rate": 3.414466523313332e-05, - "loss": 87.9318, - "step": 55340 - }, - { - "epoch": 0.44724020071267545, - "grad_norm": 1491.0399169921875, - "learning_rate": 3.413816737367073e-05, - "loss": 108.5132, - "step": 55350 - }, - { - "epoch": 0.4473210029169596, - "grad_norm": 1970.426025390625, - "learning_rate": 3.4131668801595027e-05, - "loss": 107.9846, - "step": 55360 - }, - { - "epoch": 0.4474018051212437, - "grad_norm": 958.4073486328125, - "learning_rate": 3.4125169517413e-05, - "loss": 123.8806, - "step": 55370 - }, - { - "epoch": 0.44748260732552786, - "grad_norm": 867.6197509765625, - "learning_rate": 3.411866952163146e-05, - "loss": 89.6519, - "step": 55380 - }, - { - "epoch": 0.447563409529812, - "grad_norm": 1201.0921630859375, - "learning_rate": 3.4112168814757307e-05, - "loss": 119.9681, - "step": 55390 - }, - { - "epoch": 0.44764421173409613, - "grad_norm": 777.0783081054688, - "learning_rate": 3.410566739729746e-05, - "loss": 96.5677, - "step": 55400 - }, - { - "epoch": 0.4477250139383802, - "grad_norm": 878.3605346679688, - "learning_rate": 3.409916526975892e-05, - "loss": 88.3176, - "step": 55410 - }, - { - "epoch": 0.44780581614266435, - "grad_norm": 927.0245361328125, - "learning_rate": 3.409266243264874e-05, - "loss": 103.203, - "step": 55420 - }, - { - "epoch": 0.4478866183469485, - "grad_norm": 1339.468994140625, - "learning_rate": 3.408615888647402e-05, - "loss": 100.4267, - "step": 55430 - }, - { - "epoch": 0.4479674205512326, - "grad_norm": 756.3515014648438, - "learning_rate": 3.407965463174192e-05, - "loss": 104.8354, - "step": 55440 - }, - { - "epoch": 0.44804822275551676, - "grad_norm": 1440.641845703125, - "learning_rate": 3.407314966895966e-05, - "loss": 98.579, - "step": 55450 - }, - { - "epoch": 0.4481290249598009, - "grad_norm": 635.2562255859375, - "learning_rate": 3.4066643998634505e-05, - "loss": 119.871, - "step": 55460 - }, - { - "epoch": 0.44820982716408503, - "grad_norm": 895.081787109375, - "learning_rate": 3.406013762127379e-05, - "loss": 146.1503, - "step": 55470 - }, - { - "epoch": 0.44829062936836916, - "grad_norm": 1211.1097412109375, - "learning_rate": 3.4053630537384885e-05, - "loss": 104.4044, - "step": 55480 - }, - { - "epoch": 0.4483714315726533, - "grad_norm": 918.4685668945312, - "learning_rate": 3.4047122747475224e-05, - "loss": 82.5837, - "step": 55490 - }, - { - "epoch": 0.44845223377693744, - "grad_norm": 611.8151245117188, - "learning_rate": 3.4040614252052305e-05, - "loss": 125.1051, - "step": 55500 - }, - { - "epoch": 0.4485330359812216, - "grad_norm": 760.5692749023438, - "learning_rate": 3.403410505162369e-05, - "loss": 136.7812, - "step": 55510 - }, - { - "epoch": 0.4486138381855057, - "grad_norm": 678.6265258789062, - "learning_rate": 3.402759514669694e-05, - "loss": 102.5119, - "step": 55520 - }, - { - "epoch": 0.44869464038978984, - "grad_norm": 399.2571105957031, - "learning_rate": 3.402108453777974e-05, - "loss": 96.2267, - "step": 55530 - }, - { - "epoch": 0.448775442594074, - "grad_norm": 719.0748901367188, - "learning_rate": 3.401457322537979e-05, - "loss": 121.0691, - "step": 55540 - }, - { - "epoch": 0.4488562447983581, - "grad_norm": 1012.3196411132812, - "learning_rate": 3.400806121000487e-05, - "loss": 94.6327, - "step": 55550 - }, - { - "epoch": 0.44893704700264225, - "grad_norm": 882.3594970703125, - "learning_rate": 3.400154849216278e-05, - "loss": 93.5721, - "step": 55560 - }, - { - "epoch": 0.4490178492069264, - "grad_norm": 660.6729736328125, - "learning_rate": 3.399503507236141e-05, - "loss": 96.6749, - "step": 55570 - }, - { - "epoch": 0.4490986514112105, - "grad_norm": 895.5341796875, - "learning_rate": 3.398852095110868e-05, - "loss": 84.8421, - "step": 55580 - }, - { - "epoch": 0.4491794536154946, - "grad_norm": 779.3908081054688, - "learning_rate": 3.3982006128912584e-05, - "loss": 84.597, - "step": 55590 - }, - { - "epoch": 0.44926025581977874, - "grad_norm": 1128.20947265625, - "learning_rate": 3.397549060628116e-05, - "loss": 118.2778, - "step": 55600 - }, - { - "epoch": 0.4493410580240629, - "grad_norm": 688.1251220703125, - "learning_rate": 3.3968974383722495e-05, - "loss": 81.4213, - "step": 55610 - }, - { - "epoch": 0.449421860228347, - "grad_norm": 906.6687622070312, - "learning_rate": 3.396245746174473e-05, - "loss": 129.4966, - "step": 55620 - }, - { - "epoch": 0.44950266243263115, - "grad_norm": 475.6767883300781, - "learning_rate": 3.3955939840856096e-05, - "loss": 95.1173, - "step": 55630 - }, - { - "epoch": 0.4495834646369153, - "grad_norm": 786.6535034179688, - "learning_rate": 3.394942152156482e-05, - "loss": 80.7174, - "step": 55640 - }, - { - "epoch": 0.4496642668411994, - "grad_norm": 884.5067749023438, - "learning_rate": 3.3942902504379235e-05, - "loss": 97.7728, - "step": 55650 - }, - { - "epoch": 0.44974506904548356, - "grad_norm": 1186.8828125, - "learning_rate": 3.39363827898077e-05, - "loss": 170.5884, - "step": 55660 - }, - { - "epoch": 0.4498258712497677, - "grad_norm": 617.2779541015625, - "learning_rate": 3.392986237835863e-05, - "loss": 107.7082, - "step": 55670 - }, - { - "epoch": 0.44990667345405183, - "grad_norm": 524.74365234375, - "learning_rate": 3.392334127054051e-05, - "loss": 97.5946, - "step": 55680 - }, - { - "epoch": 0.44998747565833597, - "grad_norm": 821.1349487304688, - "learning_rate": 3.391681946686186e-05, - "loss": 123.4892, - "step": 55690 - }, - { - "epoch": 0.4500682778626201, - "grad_norm": 1358.6807861328125, - "learning_rate": 3.3910296967831266e-05, - "loss": 100.7652, - "step": 55700 - }, - { - "epoch": 0.45014908006690424, - "grad_norm": 793.5254516601562, - "learning_rate": 3.390377377395738e-05, - "loss": 103.1433, - "step": 55710 - }, - { - "epoch": 0.4502298822711884, - "grad_norm": 813.9572143554688, - "learning_rate": 3.389724988574887e-05, - "loss": 116.5025, - "step": 55720 - }, - { - "epoch": 0.4503106844754725, - "grad_norm": 1072.8924560546875, - "learning_rate": 3.389072530371451e-05, - "loss": 75.8144, - "step": 55730 - }, - { - "epoch": 0.45039148667975665, - "grad_norm": 692.0950317382812, - "learning_rate": 3.388420002836307e-05, - "loss": 100.4977, - "step": 55740 - }, - { - "epoch": 0.4504722888840408, - "grad_norm": 595.9341430664062, - "learning_rate": 3.387767406020343e-05, - "loss": 99.5261, - "step": 55750 - }, - { - "epoch": 0.45055309108832486, - "grad_norm": 967.0464477539062, - "learning_rate": 3.387114739974448e-05, - "loss": 110.7147, - "step": 55760 - }, - { - "epoch": 0.450633893292609, - "grad_norm": 1555.326416015625, - "learning_rate": 3.38646200474952e-05, - "loss": 112.2924, - "step": 55770 - }, - { - "epoch": 0.45071469549689314, - "grad_norm": 1116.3736572265625, - "learning_rate": 3.3858092003964594e-05, - "loss": 93.3444, - "step": 55780 - }, - { - "epoch": 0.4507954977011773, - "grad_norm": 737.1619873046875, - "learning_rate": 3.3851563269661726e-05, - "loss": 121.1976, - "step": 55790 - }, - { - "epoch": 0.4508762999054614, - "grad_norm": 656.2864990234375, - "learning_rate": 3.384503384509574e-05, - "loss": 70.9897, - "step": 55800 - }, - { - "epoch": 0.45095710210974554, - "grad_norm": 1176.42138671875, - "learning_rate": 3.38385037307758e-05, - "loss": 88.7279, - "step": 55810 - }, - { - "epoch": 0.4510379043140297, - "grad_norm": 919.260009765625, - "learning_rate": 3.3831972927211135e-05, - "loss": 107.8653, - "step": 55820 - }, - { - "epoch": 0.4511187065183138, - "grad_norm": 1271.9913330078125, - "learning_rate": 3.382544143491104e-05, - "loss": 128.2169, - "step": 55830 - }, - { - "epoch": 0.45119950872259795, - "grad_norm": 940.2571411132812, - "learning_rate": 3.381890925438486e-05, - "loss": 60.8618, - "step": 55840 - }, - { - "epoch": 0.4512803109268821, - "grad_norm": 1402.5384521484375, - "learning_rate": 3.381237638614196e-05, - "loss": 98.5799, - "step": 55850 - }, - { - "epoch": 0.4513611131311662, - "grad_norm": 555.073486328125, - "learning_rate": 3.380584283069183e-05, - "loss": 83.9401, - "step": 55860 - }, - { - "epoch": 0.45144191533545036, - "grad_norm": 609.7122802734375, - "learning_rate": 3.379930858854392e-05, - "loss": 126.8957, - "step": 55870 - }, - { - "epoch": 0.4515227175397345, - "grad_norm": 1621.0721435546875, - "learning_rate": 3.379277366020782e-05, - "loss": 85.7596, - "step": 55880 - }, - { - "epoch": 0.45160351974401863, - "grad_norm": 871.9271850585938, - "learning_rate": 3.378623804619313e-05, - "loss": 69.1888, - "step": 55890 - }, - { - "epoch": 0.45168432194830277, - "grad_norm": 937.989990234375, - "learning_rate": 3.3779701747009504e-05, - "loss": 98.4574, - "step": 55900 - }, - { - "epoch": 0.4517651241525869, - "grad_norm": 889.8865966796875, - "learning_rate": 3.3773164763166655e-05, - "loss": 104.1919, - "step": 55910 - }, - { - "epoch": 0.45184592635687104, - "grad_norm": 682.0979614257812, - "learning_rate": 3.376662709517435e-05, - "loss": 93.4242, - "step": 55920 - }, - { - "epoch": 0.4519267285611551, - "grad_norm": 557.0958862304688, - "learning_rate": 3.3760088743542424e-05, - "loss": 137.8811, - "step": 55930 - }, - { - "epoch": 0.45200753076543926, - "grad_norm": 655.5415649414062, - "learning_rate": 3.375354970878073e-05, - "loss": 95.8518, - "step": 55940 - }, - { - "epoch": 0.4520883329697234, - "grad_norm": 1005.4135131835938, - "learning_rate": 3.374700999139923e-05, - "loss": 112.2898, - "step": 55950 - }, - { - "epoch": 0.45216913517400753, - "grad_norm": 1093.60205078125, - "learning_rate": 3.374046959190786e-05, - "loss": 127.9379, - "step": 55960 - }, - { - "epoch": 0.45224993737829167, - "grad_norm": 1608.089599609375, - "learning_rate": 3.373392851081668e-05, - "loss": 104.504, - "step": 55970 - }, - { - "epoch": 0.4523307395825758, - "grad_norm": 924.5111694335938, - "learning_rate": 3.372738674863577e-05, - "loss": 94.9859, - "step": 55980 - }, - { - "epoch": 0.45241154178685994, - "grad_norm": 765.6442260742188, - "learning_rate": 3.372084430587528e-05, - "loss": 74.4946, - "step": 55990 - }, - { - "epoch": 0.4524923439911441, - "grad_norm": 481.0462341308594, - "learning_rate": 3.3714301183045385e-05, - "loss": 96.2298, - "step": 56000 - }, - { - "epoch": 0.4525731461954282, - "grad_norm": 1174.8433837890625, - "learning_rate": 3.370775738065634e-05, - "loss": 95.9572, - "step": 56010 - }, - { - "epoch": 0.45265394839971235, - "grad_norm": 4381.15966796875, - "learning_rate": 3.370121289921845e-05, - "loss": 167.1694, - "step": 56020 - }, - { - "epoch": 0.4527347506039965, - "grad_norm": 1117.05029296875, - "learning_rate": 3.3694667739242066e-05, - "loss": 88.9415, - "step": 56030 - }, - { - "epoch": 0.4528155528082806, - "grad_norm": 2334.68701171875, - "learning_rate": 3.368812190123759e-05, - "loss": 104.6402, - "step": 56040 - }, - { - "epoch": 0.45289635501256476, - "grad_norm": 1391.3603515625, - "learning_rate": 3.368157538571548e-05, - "loss": 101.4631, - "step": 56050 - }, - { - "epoch": 0.4529771572168489, - "grad_norm": 915.3136596679688, - "learning_rate": 3.367502819318624e-05, - "loss": 82.3991, - "step": 56060 - }, - { - "epoch": 0.453057959421133, - "grad_norm": 481.421142578125, - "learning_rate": 3.366848032416045e-05, - "loss": 102.8317, - "step": 56070 - }, - { - "epoch": 0.45313876162541716, - "grad_norm": 1096.5465087890625, - "learning_rate": 3.3661931779148707e-05, - "loss": 118.1833, - "step": 56080 - }, - { - "epoch": 0.4532195638297013, - "grad_norm": 436.0419006347656, - "learning_rate": 3.3655382558661685e-05, - "loss": 91.0674, - "step": 56090 - }, - { - "epoch": 0.4533003660339854, - "grad_norm": 1014.8424072265625, - "learning_rate": 3.3648832663210124e-05, - "loss": 123.1172, - "step": 56100 - }, - { - "epoch": 0.4533811682382695, - "grad_norm": 1169.797119140625, - "learning_rate": 3.364228209330477e-05, - "loss": 90.8611, - "step": 56110 - }, - { - "epoch": 0.45346197044255365, - "grad_norm": 1212.8084716796875, - "learning_rate": 3.363573084945648e-05, - "loss": 96.1828, - "step": 56120 - }, - { - "epoch": 0.4535427726468378, - "grad_norm": 656.957275390625, - "learning_rate": 3.362917893217611e-05, - "loss": 119.4915, - "step": 56130 - }, - { - "epoch": 0.4536235748511219, - "grad_norm": 1094.2177734375, - "learning_rate": 3.3622626341974594e-05, - "loss": 110.1044, - "step": 56140 - }, - { - "epoch": 0.45370437705540606, - "grad_norm": 537.4096069335938, - "learning_rate": 3.3616073079362926e-05, - "loss": 96.7, - "step": 56150 - }, - { - "epoch": 0.4537851792596902, - "grad_norm": 864.9129028320312, - "learning_rate": 3.360951914485215e-05, - "loss": 99.3269, - "step": 56160 - }, - { - "epoch": 0.45386598146397433, - "grad_norm": 707.6769409179688, - "learning_rate": 3.360296453895333e-05, - "loss": 91.4267, - "step": 56170 - }, - { - "epoch": 0.45394678366825847, - "grad_norm": 504.4292297363281, - "learning_rate": 3.359640926217763e-05, - "loss": 79.2444, - "step": 56180 - }, - { - "epoch": 0.4540275858725426, - "grad_norm": 1074.1849365234375, - "learning_rate": 3.3589853315036225e-05, - "loss": 86.1575, - "step": 56190 - }, - { - "epoch": 0.45410838807682674, - "grad_norm": 1197.366943359375, - "learning_rate": 3.3583296698040384e-05, - "loss": 107.4782, - "step": 56200 - }, - { - "epoch": 0.4541891902811109, - "grad_norm": 962.258544921875, - "learning_rate": 3.3576739411701394e-05, - "loss": 111.6501, - "step": 56210 - }, - { - "epoch": 0.454269992485395, - "grad_norm": 1319.8289794921875, - "learning_rate": 3.35701814565306e-05, - "loss": 102.2861, - "step": 56220 - }, - { - "epoch": 0.45435079468967915, - "grad_norm": 936.8291015625, - "learning_rate": 3.356362283303941e-05, - "loss": 73.8159, - "step": 56230 - }, - { - "epoch": 0.4544315968939633, - "grad_norm": 2018.6744384765625, - "learning_rate": 3.355706354173928e-05, - "loss": 112.5514, - "step": 56240 - }, - { - "epoch": 0.4545123990982474, - "grad_norm": 672.4506225585938, - "learning_rate": 3.355050358314172e-05, - "loss": 101.8951, - "step": 56250 - }, - { - "epoch": 0.45459320130253156, - "grad_norm": 748.7069702148438, - "learning_rate": 3.354394295775829e-05, - "loss": 80.3955, - "step": 56260 - }, - { - "epoch": 0.45467400350681564, - "grad_norm": 1302.580810546875, - "learning_rate": 3.353738166610058e-05, - "loss": 104.7833, - "step": 56270 - }, - { - "epoch": 0.4547548057110998, - "grad_norm": 770.7080078125, - "learning_rate": 3.3530819708680286e-05, - "loss": 122.5262, - "step": 56280 - }, - { - "epoch": 0.4548356079153839, - "grad_norm": 779.2553100585938, - "learning_rate": 3.3524257086009104e-05, - "loss": 64.8892, - "step": 56290 - }, - { - "epoch": 0.45491641011966805, - "grad_norm": 923.4341430664062, - "learning_rate": 3.35176937985988e-05, - "loss": 99.6245, - "step": 56300 - }, - { - "epoch": 0.4549972123239522, - "grad_norm": 1138.59423828125, - "learning_rate": 3.3511129846961184e-05, - "loss": 110.9873, - "step": 56310 - }, - { - "epoch": 0.4550780145282363, - "grad_norm": 1343.1478271484375, - "learning_rate": 3.350456523160815e-05, - "loss": 108.9869, - "step": 56320 - }, - { - "epoch": 0.45515881673252045, - "grad_norm": 1196.330078125, - "learning_rate": 3.349799995305162e-05, - "loss": 105.1507, - "step": 56330 - }, - { - "epoch": 0.4552396189368046, - "grad_norm": 1436.5767822265625, - "learning_rate": 3.349143401180354e-05, - "loss": 97.4441, - "step": 56340 - }, - { - "epoch": 0.4553204211410887, - "grad_norm": 1145.8896484375, - "learning_rate": 3.3484867408375954e-05, - "loss": 96.1582, - "step": 56350 - }, - { - "epoch": 0.45540122334537286, - "grad_norm": 866.2322998046875, - "learning_rate": 3.347830014328094e-05, - "loss": 106.9375, - "step": 56360 - }, - { - "epoch": 0.455482025549657, - "grad_norm": 1068.439208984375, - "learning_rate": 3.3471732217030625e-05, - "loss": 93.3527, - "step": 56370 - }, - { - "epoch": 0.45556282775394114, - "grad_norm": 936.2102661132812, - "learning_rate": 3.346516363013719e-05, - "loss": 127.8496, - "step": 56380 - }, - { - "epoch": 0.45564362995822527, - "grad_norm": 740.4263305664062, - "learning_rate": 3.345859438311287e-05, - "loss": 82.4283, - "step": 56390 - }, - { - "epoch": 0.4557244321625094, - "grad_norm": 899.1220703125, - "learning_rate": 3.3452024476469934e-05, - "loss": 101.2224, - "step": 56400 - }, - { - "epoch": 0.45580523436679354, - "grad_norm": 525.888916015625, - "learning_rate": 3.344545391072073e-05, - "loss": 93.3358, - "step": 56410 - }, - { - "epoch": 0.4558860365710777, - "grad_norm": 1165.54638671875, - "learning_rate": 3.343888268637765e-05, - "loss": 105.6394, - "step": 56420 - }, - { - "epoch": 0.4559668387753618, - "grad_norm": 1060.2603759765625, - "learning_rate": 3.343231080395312e-05, - "loss": 121.0597, - "step": 56430 - }, - { - "epoch": 0.45604764097964595, - "grad_norm": 914.628173828125, - "learning_rate": 3.3425738263959615e-05, - "loss": 70.8887, - "step": 56440 - }, - { - "epoch": 0.45612844318393003, - "grad_norm": 703.721435546875, - "learning_rate": 3.3419165066909705e-05, - "loss": 102.8441, - "step": 56450 - }, - { - "epoch": 0.45620924538821417, - "grad_norm": 778.9115600585938, - "learning_rate": 3.341259121331597e-05, - "loss": 91.6393, - "step": 56460 - }, - { - "epoch": 0.4562900475924983, - "grad_norm": 902.790283203125, - "learning_rate": 3.3406016703691055e-05, - "loss": 102.3402, - "step": 56470 - }, - { - "epoch": 0.45637084979678244, - "grad_norm": 820.8761596679688, - "learning_rate": 3.339944153854764e-05, - "loss": 123.4187, - "step": 56480 - }, - { - "epoch": 0.4564516520010666, - "grad_norm": 3015.48095703125, - "learning_rate": 3.339286571839848e-05, - "loss": 126.7324, - "step": 56490 - }, - { - "epoch": 0.4565324542053507, - "grad_norm": 1237.8597412109375, - "learning_rate": 3.338628924375638e-05, - "loss": 112.8112, - "step": 56500 - }, - { - "epoch": 0.45661325640963485, - "grad_norm": 1115.94140625, - "learning_rate": 3.337971211513417e-05, - "loss": 121.3506, - "step": 56510 - }, - { - "epoch": 0.456694058613919, - "grad_norm": 473.3012390136719, - "learning_rate": 3.3373134333044756e-05, - "loss": 77.8062, - "step": 56520 - }, - { - "epoch": 0.4567748608182031, - "grad_norm": 812.8482055664062, - "learning_rate": 3.336655589800109e-05, - "loss": 87.7469, - "step": 56530 - }, - { - "epoch": 0.45685566302248726, - "grad_norm": 792.9755859375, - "learning_rate": 3.3359976810516164e-05, - "loss": 92.8981, - "step": 56540 - }, - { - "epoch": 0.4569364652267714, - "grad_norm": 870.7730712890625, - "learning_rate": 3.3353397071103046e-05, - "loss": 144.3688, - "step": 56550 - }, - { - "epoch": 0.45701726743105553, - "grad_norm": 1068.0155029296875, - "learning_rate": 3.334681668027481e-05, - "loss": 139.2411, - "step": 56560 - }, - { - "epoch": 0.45709806963533967, - "grad_norm": 1667.3941650390625, - "learning_rate": 3.334023563854463e-05, - "loss": 96.4864, - "step": 56570 - }, - { - "epoch": 0.4571788718396238, - "grad_norm": 652.8388671875, - "learning_rate": 3.33336539464257e-05, - "loss": 84.7737, - "step": 56580 - }, - { - "epoch": 0.45725967404390794, - "grad_norm": 741.3201904296875, - "learning_rate": 3.3327071604431275e-05, - "loss": 87.6663, - "step": 56590 - }, - { - "epoch": 0.4573404762481921, - "grad_norm": 260.24835205078125, - "learning_rate": 3.332048861307467e-05, - "loss": 87.6609, - "step": 56600 - }, - { - "epoch": 0.4574212784524762, - "grad_norm": 1081.678955078125, - "learning_rate": 3.331390497286922e-05, - "loss": 125.2377, - "step": 56610 - }, - { - "epoch": 0.4575020806567603, - "grad_norm": 1178.3035888671875, - "learning_rate": 3.3307320684328354e-05, - "loss": 113.7592, - "step": 56620 - }, - { - "epoch": 0.4575828828610444, - "grad_norm": 1128.451171875, - "learning_rate": 3.3300735747965505e-05, - "loss": 90.6729, - "step": 56630 - }, - { - "epoch": 0.45766368506532856, - "grad_norm": 498.7513427734375, - "learning_rate": 3.3294150164294204e-05, - "loss": 106.1388, - "step": 56640 - }, - { - "epoch": 0.4577444872696127, - "grad_norm": 724.35546875, - "learning_rate": 3.3287563933827995e-05, - "loss": 90.0715, - "step": 56650 - }, - { - "epoch": 0.45782528947389683, - "grad_norm": 456.78033447265625, - "learning_rate": 3.328097705708047e-05, - "loss": 96.2324, - "step": 56660 - }, - { - "epoch": 0.45790609167818097, - "grad_norm": 582.1641235351562, - "learning_rate": 3.327438953456532e-05, - "loss": 100.8045, - "step": 56670 - }, - { - "epoch": 0.4579868938824651, - "grad_norm": 1068.67822265625, - "learning_rate": 3.326780136679623e-05, - "loss": 100.6599, - "step": 56680 - }, - { - "epoch": 0.45806769608674924, - "grad_norm": 1061.540283203125, - "learning_rate": 3.3261212554286975e-05, - "loss": 92.4695, - "step": 56690 - }, - { - "epoch": 0.4581484982910334, - "grad_norm": 671.1739501953125, - "learning_rate": 3.325462309755134e-05, - "loss": 104.9912, - "step": 56700 - }, - { - "epoch": 0.4582293004953175, - "grad_norm": 1352.1739501953125, - "learning_rate": 3.324803299710321e-05, - "loss": 119.7549, - "step": 56710 - }, - { - "epoch": 0.45831010269960165, - "grad_norm": 1245.5567626953125, - "learning_rate": 3.324144225345649e-05, - "loss": 115.5293, - "step": 56720 - }, - { - "epoch": 0.4583909049038858, - "grad_norm": 1363.4453125, - "learning_rate": 3.323485086712513e-05, - "loss": 116.8819, - "step": 56730 - }, - { - "epoch": 0.4584717071081699, - "grad_norm": 1196.8370361328125, - "learning_rate": 3.322825883862314e-05, - "loss": 100.6262, - "step": 56740 - }, - { - "epoch": 0.45855250931245406, - "grad_norm": 445.4294128417969, - "learning_rate": 3.322166616846458e-05, - "loss": 79.4838, - "step": 56750 - }, - { - "epoch": 0.4586333115167382, - "grad_norm": 1192.1490478515625, - "learning_rate": 3.321507285716357e-05, - "loss": 101.9952, - "step": 56760 - }, - { - "epoch": 0.45871411372102233, - "grad_norm": 808.8568725585938, - "learning_rate": 3.3208478905234274e-05, - "loss": 93.0783, - "step": 56770 - }, - { - "epoch": 0.45879491592530647, - "grad_norm": 1017.7055053710938, - "learning_rate": 3.320188431319088e-05, - "loss": 80.9454, - "step": 56780 - }, - { - "epoch": 0.45887571812959055, - "grad_norm": 884.1676025390625, - "learning_rate": 3.319528908154766e-05, - "loss": 110.7695, - "step": 56790 - }, - { - "epoch": 0.4589565203338747, - "grad_norm": 1028.6517333984375, - "learning_rate": 3.318869321081892e-05, - "loss": 97.2693, - "step": 56800 - }, - { - "epoch": 0.4590373225381588, - "grad_norm": 948.8199462890625, - "learning_rate": 3.318209670151904e-05, - "loss": 100.8977, - "step": 56810 - }, - { - "epoch": 0.45911812474244296, - "grad_norm": 916.5050048828125, - "learning_rate": 3.31754995541624e-05, - "loss": 98.5942, - "step": 56820 - }, - { - "epoch": 0.4591989269467271, - "grad_norm": 1691.0257568359375, - "learning_rate": 3.3168901769263474e-05, - "loss": 168.1444, - "step": 56830 - }, - { - "epoch": 0.45927972915101123, - "grad_norm": 572.3355102539062, - "learning_rate": 3.3162303347336764e-05, - "loss": 130.9961, - "step": 56840 - }, - { - "epoch": 0.45936053135529537, - "grad_norm": 766.59765625, - "learning_rate": 3.315570428889684e-05, - "loss": 73.0469, - "step": 56850 - }, - { - "epoch": 0.4594413335595795, - "grad_norm": 1008.1466064453125, - "learning_rate": 3.31491045944583e-05, - "loss": 86.8971, - "step": 56860 - }, - { - "epoch": 0.45952213576386364, - "grad_norm": 755.7885131835938, - "learning_rate": 3.3142504264535804e-05, - "loss": 101.5364, - "step": 56870 - }, - { - "epoch": 0.4596029379681478, - "grad_norm": 2321.716552734375, - "learning_rate": 3.313590329964406e-05, - "loss": 100.4451, - "step": 56880 - }, - { - "epoch": 0.4596837401724319, - "grad_norm": 660.9086303710938, - "learning_rate": 3.312930170029783e-05, - "loss": 117.6842, - "step": 56890 - }, - { - "epoch": 0.45976454237671605, - "grad_norm": 826.6620483398438, - "learning_rate": 3.312269946701191e-05, - "loss": 78.0758, - "step": 56900 - }, - { - "epoch": 0.4598453445810002, - "grad_norm": 713.7135009765625, - "learning_rate": 3.311609660030117e-05, - "loss": 99.7536, - "step": 56910 - }, - { - "epoch": 0.4599261467852843, - "grad_norm": 553.6259765625, - "learning_rate": 3.31094931006805e-05, - "loss": 126.8934, - "step": 56920 - }, - { - "epoch": 0.46000694898956845, - "grad_norm": 951.6075439453125, - "learning_rate": 3.310288896866486e-05, - "loss": 84.8257, - "step": 56930 - }, - { - "epoch": 0.4600877511938526, - "grad_norm": 1161.0311279296875, - "learning_rate": 3.309628420476926e-05, - "loss": 130.2858, - "step": 56940 - }, - { - "epoch": 0.4601685533981367, - "grad_norm": 1533.3114013671875, - "learning_rate": 3.308967880950874e-05, - "loss": 108.5542, - "step": 56950 - }, - { - "epoch": 0.4602493556024208, - "grad_norm": 989.2337646484375, - "learning_rate": 3.3083072783398416e-05, - "loss": 189.8215, - "step": 56960 - }, - { - "epoch": 0.46033015780670494, - "grad_norm": 1115.4666748046875, - "learning_rate": 3.307646612695343e-05, - "loss": 103.562, - "step": 56970 - }, - { - "epoch": 0.4604109600109891, - "grad_norm": 1246.2509765625, - "learning_rate": 3.3069858840688994e-05, - "loss": 114.7842, - "step": 56980 - }, - { - "epoch": 0.4604917622152732, - "grad_norm": 1287.987548828125, - "learning_rate": 3.3063250925120334e-05, - "loss": 83.5232, - "step": 56990 - }, - { - "epoch": 0.46057256441955735, - "grad_norm": 1891.885986328125, - "learning_rate": 3.305664238076278e-05, - "loss": 88.9205, - "step": 57000 - }, - { - "epoch": 0.4606533666238415, - "grad_norm": 462.0375061035156, - "learning_rate": 3.3050033208131656e-05, - "loss": 90.0197, - "step": 57010 - }, - { - "epoch": 0.4607341688281256, - "grad_norm": 847.8806762695312, - "learning_rate": 3.3043423407742375e-05, - "loss": 113.3436, - "step": 57020 - }, - { - "epoch": 0.46081497103240976, - "grad_norm": 989.9613037109375, - "learning_rate": 3.303681298011037e-05, - "loss": 162.6573, - "step": 57030 - }, - { - "epoch": 0.4608957732366939, - "grad_norm": 766.4435424804688, - "learning_rate": 3.3030201925751145e-05, - "loss": 73.6941, - "step": 57040 - }, - { - "epoch": 0.46097657544097803, - "grad_norm": 747.1614379882812, - "learning_rate": 3.302359024518024e-05, - "loss": 72.137, - "step": 57050 - }, - { - "epoch": 0.46105737764526217, - "grad_norm": 564.53955078125, - "learning_rate": 3.301697793891324e-05, - "loss": 100.495, - "step": 57060 - }, - { - "epoch": 0.4611381798495463, - "grad_norm": 833.794921875, - "learning_rate": 3.3010365007465805e-05, - "loss": 114.0099, - "step": 57070 - }, - { - "epoch": 0.46121898205383044, - "grad_norm": 672.6408081054688, - "learning_rate": 3.300375145135361e-05, - "loss": 91.6967, - "step": 57080 - }, - { - "epoch": 0.4612997842581146, - "grad_norm": 635.527587890625, - "learning_rate": 3.299713727109239e-05, - "loss": 116.567, - "step": 57090 - }, - { - "epoch": 0.4613805864623987, - "grad_norm": 1776.06005859375, - "learning_rate": 3.299052246719795e-05, - "loss": 151.9863, - "step": 57100 - }, - { - "epoch": 0.46146138866668285, - "grad_norm": 1509.708251953125, - "learning_rate": 3.298390704018611e-05, - "loss": 94.782, - "step": 57110 - }, - { - "epoch": 0.461542190870967, - "grad_norm": 678.9341430664062, - "learning_rate": 3.297729099057277e-05, - "loss": 107.1302, - "step": 57120 - }, - { - "epoch": 0.4616229930752511, - "grad_norm": 1264.8927001953125, - "learning_rate": 3.297067431887384e-05, - "loss": 105.553, - "step": 57130 - }, - { - "epoch": 0.4617037952795352, - "grad_norm": 1086.1097412109375, - "learning_rate": 3.296405702560532e-05, - "loss": 140.6444, - "step": 57140 - }, - { - "epoch": 0.46178459748381934, - "grad_norm": 701.7677612304688, - "learning_rate": 3.295743911128324e-05, - "loss": 123.4673, - "step": 57150 - }, - { - "epoch": 0.4618653996881035, - "grad_norm": 1054.185546875, - "learning_rate": 3.295082057642367e-05, - "loss": 109.0559, - "step": 57160 - }, - { - "epoch": 0.4619462018923876, - "grad_norm": 955.3489379882812, - "learning_rate": 3.294420142154274e-05, - "loss": 117.471, - "step": 57170 - }, - { - "epoch": 0.46202700409667175, - "grad_norm": 1257.5474853515625, - "learning_rate": 3.293758164715663e-05, - "loss": 126.9206, - "step": 57180 - }, - { - "epoch": 0.4621078063009559, - "grad_norm": 1198.5538330078125, - "learning_rate": 3.2930961253781554e-05, - "loss": 112.9252, - "step": 57190 - }, - { - "epoch": 0.46218860850524, - "grad_norm": 633.9056396484375, - "learning_rate": 3.29243402419338e-05, - "loss": 96.0779, - "step": 57200 - }, - { - "epoch": 0.46226941070952415, - "grad_norm": 691.6924438476562, - "learning_rate": 3.2917718612129665e-05, - "loss": 93.3989, - "step": 57210 - }, - { - "epoch": 0.4623502129138083, - "grad_norm": 1050.9312744140625, - "learning_rate": 3.2911096364885544e-05, - "loss": 120.7004, - "step": 57220 - }, - { - "epoch": 0.4624310151180924, - "grad_norm": 1082.2530517578125, - "learning_rate": 3.2904473500717824e-05, - "loss": 100.4312, - "step": 57230 - }, - { - "epoch": 0.46251181732237656, - "grad_norm": 868.1658325195312, - "learning_rate": 3.2897850020143005e-05, - "loss": 95.8304, - "step": 57240 - }, - { - "epoch": 0.4625926195266607, - "grad_norm": 582.0343017578125, - "learning_rate": 3.289122592367757e-05, - "loss": 107.3932, - "step": 57250 - }, - { - "epoch": 0.46267342173094483, - "grad_norm": 932.794189453125, - "learning_rate": 3.2884601211838085e-05, - "loss": 102.7364, - "step": 57260 - }, - { - "epoch": 0.46275422393522897, - "grad_norm": 1241.356201171875, - "learning_rate": 3.287797588514117e-05, - "loss": 94.3089, - "step": 57270 - }, - { - "epoch": 0.4628350261395131, - "grad_norm": 805.4827880859375, - "learning_rate": 3.287134994410347e-05, - "loss": 90.9399, - "step": 57280 - }, - { - "epoch": 0.46291582834379724, - "grad_norm": 951.2842407226562, - "learning_rate": 3.28647233892417e-05, - "loss": 94.8644, - "step": 57290 - }, - { - "epoch": 0.4629966305480814, - "grad_norm": 747.2940673828125, - "learning_rate": 3.28580962210726e-05, - "loss": 93.3947, - "step": 57300 - }, - { - "epoch": 0.46307743275236546, - "grad_norm": 498.01226806640625, - "learning_rate": 3.285146844011298e-05, - "loss": 97.4008, - "step": 57310 - }, - { - "epoch": 0.4631582349566496, - "grad_norm": 1033.824951171875, - "learning_rate": 3.2844840046879686e-05, - "loss": 84.3206, - "step": 57320 - }, - { - "epoch": 0.46323903716093373, - "grad_norm": 1906.4432373046875, - "learning_rate": 3.2838211041889625e-05, - "loss": 82.2766, - "step": 57330 - }, - { - "epoch": 0.46331983936521787, - "grad_norm": 1675.2425537109375, - "learning_rate": 3.283158142565971e-05, - "loss": 118.3647, - "step": 57340 - }, - { - "epoch": 0.463400641569502, - "grad_norm": 898.859130859375, - "learning_rate": 3.2824951198706954e-05, - "loss": 97.679, - "step": 57350 - }, - { - "epoch": 0.46348144377378614, - "grad_norm": 833.0927124023438, - "learning_rate": 3.28183203615484e-05, - "loss": 110.486, - "step": 57360 - }, - { - "epoch": 0.4635622459780703, - "grad_norm": 1613.375244140625, - "learning_rate": 3.281168891470112e-05, - "loss": 113.1182, - "step": 57370 - }, - { - "epoch": 0.4636430481823544, - "grad_norm": 1013.6747436523438, - "learning_rate": 3.280505685868226e-05, - "loss": 75.7911, - "step": 57380 - }, - { - "epoch": 0.46372385038663855, - "grad_norm": 1444.9769287109375, - "learning_rate": 3.279842419400899e-05, - "loss": 114.6812, - "step": 57390 - }, - { - "epoch": 0.4638046525909227, - "grad_norm": 1627.5826416015625, - "learning_rate": 3.279179092119855e-05, - "loss": 104.5297, - "step": 57400 - }, - { - "epoch": 0.4638854547952068, - "grad_norm": 559.1404418945312, - "learning_rate": 3.278515704076821e-05, - "loss": 128.8307, - "step": 57410 - }, - { - "epoch": 0.46396625699949096, - "grad_norm": 622.7420654296875, - "learning_rate": 3.277852255323529e-05, - "loss": 78.8194, - "step": 57420 - }, - { - "epoch": 0.4640470592037751, - "grad_norm": 1442.7891845703125, - "learning_rate": 3.277188745911717e-05, - "loss": 113.4144, - "step": 57430 - }, - { - "epoch": 0.46412786140805923, - "grad_norm": 671.3884887695312, - "learning_rate": 3.276525175893126e-05, - "loss": 121.2021, - "step": 57440 - }, - { - "epoch": 0.46420866361234336, - "grad_norm": 1309.0482177734375, - "learning_rate": 3.2758615453195034e-05, - "loss": 92.9694, - "step": 57450 - }, - { - "epoch": 0.4642894658166275, - "grad_norm": 884.1349487304688, - "learning_rate": 3.2751978542425995e-05, - "loss": 110.7425, - "step": 57460 - }, - { - "epoch": 0.46437026802091164, - "grad_norm": 670.400390625, - "learning_rate": 3.274534102714172e-05, - "loss": 79.4906, - "step": 57470 - }, - { - "epoch": 0.4644510702251957, - "grad_norm": 603.43798828125, - "learning_rate": 3.273870290785979e-05, - "loss": 103.5333, - "step": 57480 - }, - { - "epoch": 0.46453187242947985, - "grad_norm": 602.8924560546875, - "learning_rate": 3.273206418509788e-05, - "loss": 85.6954, - "step": 57490 - }, - { - "epoch": 0.464612674633764, - "grad_norm": 1302.9310302734375, - "learning_rate": 3.272542485937369e-05, - "loss": 120.6508, - "step": 57500 - }, - { - "epoch": 0.4646934768380481, - "grad_norm": 1471.5174560546875, - "learning_rate": 3.271878493120496e-05, - "loss": 135.5886, - "step": 57510 - }, - { - "epoch": 0.46477427904233226, - "grad_norm": 826.2156372070312, - "learning_rate": 3.271214440110948e-05, - "loss": 102.4583, - "step": 57520 - }, - { - "epoch": 0.4648550812466164, - "grad_norm": 792.457275390625, - "learning_rate": 3.270550326960511e-05, - "loss": 94.5905, - "step": 57530 - }, - { - "epoch": 0.46493588345090053, - "grad_norm": 1075.4874267578125, - "learning_rate": 3.269886153720972e-05, - "loss": 125.5046, - "step": 57540 - }, - { - "epoch": 0.46501668565518467, - "grad_norm": 505.3604431152344, - "learning_rate": 3.269221920444127e-05, - "loss": 96.3872, - "step": 57550 - }, - { - "epoch": 0.4650974878594688, - "grad_norm": 1079.845703125, - "learning_rate": 3.2685576271817716e-05, - "loss": 92.692, - "step": 57560 - }, - { - "epoch": 0.46517829006375294, - "grad_norm": 2167.5048828125, - "learning_rate": 3.267893273985711e-05, - "loss": 120.3047, - "step": 57570 - }, - { - "epoch": 0.4652590922680371, - "grad_norm": 793.6063842773438, - "learning_rate": 3.267228860907751e-05, - "loss": 83.4853, - "step": 57580 - }, - { - "epoch": 0.4653398944723212, - "grad_norm": 704.1026611328125, - "learning_rate": 3.2665643879997056e-05, - "loss": 117.5697, - "step": 57590 - }, - { - "epoch": 0.46542069667660535, - "grad_norm": 985.5035400390625, - "learning_rate": 3.2658998553133895e-05, - "loss": 71.9135, - "step": 57600 - }, - { - "epoch": 0.4655014988808895, - "grad_norm": 701.177490234375, - "learning_rate": 3.2652352629006275e-05, - "loss": 102.2039, - "step": 57610 - }, - { - "epoch": 0.4655823010851736, - "grad_norm": 668.28759765625, - "learning_rate": 3.2645706108132424e-05, - "loss": 89.1009, - "step": 57620 - }, - { - "epoch": 0.46566310328945776, - "grad_norm": 1036.6947021484375, - "learning_rate": 3.263905899103068e-05, - "loss": 77.3765, - "step": 57630 - }, - { - "epoch": 0.4657439054937419, - "grad_norm": 1061.061767578125, - "learning_rate": 3.263241127821938e-05, - "loss": 97.3841, - "step": 57640 - }, - { - "epoch": 0.465824707698026, - "grad_norm": 734.5075073242188, - "learning_rate": 3.262576297021695e-05, - "loss": 77.2737, - "step": 57650 - }, - { - "epoch": 0.4659055099023101, - "grad_norm": 733.6683349609375, - "learning_rate": 3.2619114067541796e-05, - "loss": 120.7216, - "step": 57660 - }, - { - "epoch": 0.46598631210659425, - "grad_norm": 847.5562133789062, - "learning_rate": 3.261246457071245e-05, - "loss": 94.6434, - "step": 57670 - }, - { - "epoch": 0.4660671143108784, - "grad_norm": 945.1595458984375, - "learning_rate": 3.260581448024745e-05, - "loss": 108.5249, - "step": 57680 - }, - { - "epoch": 0.4661479165151625, - "grad_norm": 952.0477294921875, - "learning_rate": 3.2599163796665376e-05, - "loss": 91.4986, - "step": 57690 - }, - { - "epoch": 0.46622871871944666, - "grad_norm": 784.075439453125, - "learning_rate": 3.2592512520484856e-05, - "loss": 97.2511, - "step": 57700 - }, - { - "epoch": 0.4663095209237308, - "grad_norm": 694.65283203125, - "learning_rate": 3.2585860652224585e-05, - "loss": 93.6368, - "step": 57710 - }, - { - "epoch": 0.4663903231280149, - "grad_norm": 1273.0028076171875, - "learning_rate": 3.257920819240328e-05, - "loss": 91.0541, - "step": 57720 - }, - { - "epoch": 0.46647112533229906, - "grad_norm": 721.4236450195312, - "learning_rate": 3.257255514153971e-05, - "loss": 103.7508, - "step": 57730 - }, - { - "epoch": 0.4665519275365832, - "grad_norm": 1380.3624267578125, - "learning_rate": 3.25659015001527e-05, - "loss": 145.1021, - "step": 57740 - }, - { - "epoch": 0.46663272974086734, - "grad_norm": 717.0982666015625, - "learning_rate": 3.2559247268761115e-05, - "loss": 90.404, - "step": 57750 - }, - { - "epoch": 0.46671353194515147, - "grad_norm": 1308.2506103515625, - "learning_rate": 3.2552592447883865e-05, - "loss": 114.8901, - "step": 57760 - }, - { - "epoch": 0.4667943341494356, - "grad_norm": 885.66015625, - "learning_rate": 3.25459370380399e-05, - "loss": 106.8264, - "step": 57770 - }, - { - "epoch": 0.46687513635371974, - "grad_norm": 1066.3802490234375, - "learning_rate": 3.253928103974823e-05, - "loss": 118.0046, - "step": 57780 - }, - { - "epoch": 0.4669559385580039, - "grad_norm": 2478.63623046875, - "learning_rate": 3.253262445352791e-05, - "loss": 96.5745, - "step": 57790 - }, - { - "epoch": 0.467036740762288, - "grad_norm": 1116.6505126953125, - "learning_rate": 3.2525967279898015e-05, - "loss": 111.5416, - "step": 57800 - }, - { - "epoch": 0.46711754296657215, - "grad_norm": 849.6885375976562, - "learning_rate": 3.25193095193777e-05, - "loss": 80.4875, - "step": 57810 - }, - { - "epoch": 0.4671983451708563, - "grad_norm": 756.7638549804688, - "learning_rate": 3.251265117248614e-05, - "loss": 113.3234, - "step": 57820 - }, - { - "epoch": 0.46727914737514037, - "grad_norm": 1729.8343505859375, - "learning_rate": 3.250599223974258e-05, - "loss": 120.4109, - "step": 57830 - }, - { - "epoch": 0.4673599495794245, - "grad_norm": 1687.8067626953125, - "learning_rate": 3.249933272166629e-05, - "loss": 128.4396, - "step": 57840 - }, - { - "epoch": 0.46744075178370864, - "grad_norm": 674.70263671875, - "learning_rate": 3.24926726187766e-05, - "loss": 96.1781, - "step": 57850 - }, - { - "epoch": 0.4675215539879928, - "grad_norm": 811.1097412109375, - "learning_rate": 3.248601193159287e-05, - "loss": 122.4934, - "step": 57860 - }, - { - "epoch": 0.4676023561922769, - "grad_norm": 964.5817260742188, - "learning_rate": 3.247935066063451e-05, - "loss": 94.6904, - "step": 57870 - }, - { - "epoch": 0.46768315839656105, - "grad_norm": 640.372314453125, - "learning_rate": 3.247268880642098e-05, - "loss": 82.9971, - "step": 57880 - }, - { - "epoch": 0.4677639606008452, - "grad_norm": 921.1566162109375, - "learning_rate": 3.24660263694718e-05, - "loss": 126.6147, - "step": 57890 - }, - { - "epoch": 0.4678447628051293, - "grad_norm": 2424.14697265625, - "learning_rate": 3.245936335030651e-05, - "loss": 106.9907, - "step": 57900 - }, - { - "epoch": 0.46792556500941346, - "grad_norm": 923.6797485351562, - "learning_rate": 3.245269974944471e-05, - "loss": 102.8632, - "step": 57910 - }, - { - "epoch": 0.4680063672136976, - "grad_norm": 2133.920166015625, - "learning_rate": 3.244603556740603e-05, - "loss": 109.9338, - "step": 57920 - }, - { - "epoch": 0.46808716941798173, - "grad_norm": 509.9008483886719, - "learning_rate": 3.243937080471017e-05, - "loss": 98.4513, - "step": 57930 - }, - { - "epoch": 0.46816797162226587, - "grad_norm": 988.5900268554688, - "learning_rate": 3.243270546187687e-05, - "loss": 80.2409, - "step": 57940 - }, - { - "epoch": 0.46824877382655, - "grad_norm": 960.398193359375, - "learning_rate": 3.2426039539425876e-05, - "loss": 102.6017, - "step": 57950 - }, - { - "epoch": 0.46832957603083414, - "grad_norm": 917.8516845703125, - "learning_rate": 3.241937303787703e-05, - "loss": 94.7822, - "step": 57960 - }, - { - "epoch": 0.4684103782351183, - "grad_norm": 1820.247314453125, - "learning_rate": 3.241270595775021e-05, - "loss": 109.4503, - "step": 57970 - }, - { - "epoch": 0.4684911804394024, - "grad_norm": 1758.2685546875, - "learning_rate": 3.240603829956531e-05, - "loss": 111.3401, - "step": 57980 - }, - { - "epoch": 0.46857198264368655, - "grad_norm": 1212.7603759765625, - "learning_rate": 3.2399370063842294e-05, - "loss": 118.0239, - "step": 57990 - }, - { - "epoch": 0.4686527848479706, - "grad_norm": 1095.7799072265625, - "learning_rate": 3.239270125110117e-05, - "loss": 99.4997, - "step": 58000 - }, - { - "epoch": 0.46873358705225476, - "grad_norm": 1054.893798828125, - "learning_rate": 3.2386031861861976e-05, - "loss": 72.9148, - "step": 58010 - }, - { - "epoch": 0.4688143892565389, - "grad_norm": 884.32421875, - "learning_rate": 3.2379361896644816e-05, - "loss": 124.0116, - "step": 58020 - }, - { - "epoch": 0.46889519146082304, - "grad_norm": 889.4166259765625, - "learning_rate": 3.2372691355969816e-05, - "loss": 102.5547, - "step": 58030 - }, - { - "epoch": 0.46897599366510717, - "grad_norm": 432.0292663574219, - "learning_rate": 3.236602024035716e-05, - "loss": 81.4001, - "step": 58040 - }, - { - "epoch": 0.4690567958693913, - "grad_norm": 734.2676391601562, - "learning_rate": 3.235934855032709e-05, - "loss": 103.1359, - "step": 58050 - }, - { - "epoch": 0.46913759807367544, - "grad_norm": 968.5971069335938, - "learning_rate": 3.235267628639987e-05, - "loss": 121.2424, - "step": 58060 - }, - { - "epoch": 0.4692184002779596, - "grad_norm": 846.4686889648438, - "learning_rate": 3.2346003449095805e-05, - "loss": 88.2614, - "step": 58070 - }, - { - "epoch": 0.4692992024822437, - "grad_norm": 410.9154357910156, - "learning_rate": 3.2339330038935265e-05, - "loss": 108.0753, - "step": 58080 - }, - { - "epoch": 0.46938000468652785, - "grad_norm": 522.4291381835938, - "learning_rate": 3.233265605643866e-05, - "loss": 108.3384, - "step": 58090 - }, - { - "epoch": 0.469460806890812, - "grad_norm": 1142.4039306640625, - "learning_rate": 3.2325981502126433e-05, - "loss": 98.0097, - "step": 58100 - }, - { - "epoch": 0.4695416090950961, - "grad_norm": 652.0031127929688, - "learning_rate": 3.231930637651909e-05, - "loss": 90.157, - "step": 58110 - }, - { - "epoch": 0.46962241129938026, - "grad_norm": 963.2822875976562, - "learning_rate": 3.2312630680137175e-05, - "loss": 88.7647, - "step": 58120 - }, - { - "epoch": 0.4697032135036644, - "grad_norm": 747.8682250976562, - "learning_rate": 3.230595441350125e-05, - "loss": 117.516, - "step": 58130 - }, - { - "epoch": 0.46978401570794853, - "grad_norm": 664.3839721679688, - "learning_rate": 3.229927757713196e-05, - "loss": 96.944, - "step": 58140 - }, - { - "epoch": 0.46986481791223267, - "grad_norm": 1770.969970703125, - "learning_rate": 3.229260017154997e-05, - "loss": 130.2008, - "step": 58150 - }, - { - "epoch": 0.4699456201165168, - "grad_norm": 1176.301513671875, - "learning_rate": 3.228592219727602e-05, - "loss": 125.049, - "step": 58160 - }, - { - "epoch": 0.4700264223208009, - "grad_norm": 726.5639038085938, - "learning_rate": 3.2279243654830836e-05, - "loss": 103.9181, - "step": 58170 - }, - { - "epoch": 0.470107224525085, - "grad_norm": 849.9468994140625, - "learning_rate": 3.227256454473526e-05, - "loss": 78.5453, - "step": 58180 - }, - { - "epoch": 0.47018802672936916, - "grad_norm": 1280.2154541015625, - "learning_rate": 3.226588486751012e-05, - "loss": 98.3025, - "step": 58190 - }, - { - "epoch": 0.4702688289336533, - "grad_norm": 910.3240966796875, - "learning_rate": 3.225920462367632e-05, - "loss": 92.3926, - "step": 58200 - }, - { - "epoch": 0.47034963113793743, - "grad_norm": 1310.4786376953125, - "learning_rate": 3.225252381375479e-05, - "loss": 85.4375, - "step": 58210 - }, - { - "epoch": 0.47043043334222157, - "grad_norm": 1054.052734375, - "learning_rate": 3.2245842438266526e-05, - "loss": 122.2845, - "step": 58220 - }, - { - "epoch": 0.4705112355465057, - "grad_norm": 636.2752685546875, - "learning_rate": 3.223916049773256e-05, - "loss": 85.0813, - "step": 58230 - }, - { - "epoch": 0.47059203775078984, - "grad_norm": 876.0001220703125, - "learning_rate": 3.223247799267394e-05, - "loss": 129.5137, - "step": 58240 - }, - { - "epoch": 0.470672839955074, - "grad_norm": 856.2767333984375, - "learning_rate": 3.222579492361179e-05, - "loss": 85.6041, - "step": 58250 - }, - { - "epoch": 0.4707536421593581, - "grad_norm": 1067.6402587890625, - "learning_rate": 3.221911129106728e-05, - "loss": 96.5397, - "step": 58260 - }, - { - "epoch": 0.47083444436364225, - "grad_norm": 1053.01123046875, - "learning_rate": 3.221242709556161e-05, - "loss": 94.2288, - "step": 58270 - }, - { - "epoch": 0.4709152465679264, - "grad_norm": 769.0462646484375, - "learning_rate": 3.220574233761603e-05, - "loss": 101.4168, - "step": 58280 - }, - { - "epoch": 0.4709960487722105, - "grad_norm": 649.397216796875, - "learning_rate": 3.219905701775182e-05, - "loss": 129.0152, - "step": 58290 - }, - { - "epoch": 0.47107685097649465, - "grad_norm": 1128.5592041015625, - "learning_rate": 3.219237113649032e-05, - "loss": 104.2195, - "step": 58300 - }, - { - "epoch": 0.4711576531807788, - "grad_norm": 557.17724609375, - "learning_rate": 3.2185684694352916e-05, - "loss": 119.2173, - "step": 58310 - }, - { - "epoch": 0.4712384553850629, - "grad_norm": 998.49609375, - "learning_rate": 3.2178997691861014e-05, - "loss": 89.4016, - "step": 58320 - }, - { - "epoch": 0.47131925758934706, - "grad_norm": 1012.6923217773438, - "learning_rate": 3.2172310129536096e-05, - "loss": 80.2471, - "step": 58330 - }, - { - "epoch": 0.47140005979363114, - "grad_norm": 858.87890625, - "learning_rate": 3.2165622007899676e-05, - "loss": 118.3096, - "step": 58340 - }, - { - "epoch": 0.4714808619979153, - "grad_norm": 1073.2791748046875, - "learning_rate": 3.215893332747328e-05, - "loss": 93.0368, - "step": 58350 - }, - { - "epoch": 0.4715616642021994, - "grad_norm": 691.7823486328125, - "learning_rate": 3.215224408877854e-05, - "loss": 82.6391, - "step": 58360 - }, - { - "epoch": 0.47164246640648355, - "grad_norm": 934.9600830078125, - "learning_rate": 3.214555429233707e-05, - "loss": 77.1286, - "step": 58370 - }, - { - "epoch": 0.4717232686107677, - "grad_norm": 646.1305541992188, - "learning_rate": 3.213886393867057e-05, - "loss": 113.5603, - "step": 58380 - }, - { - "epoch": 0.4718040708150518, - "grad_norm": 722.4954223632812, - "learning_rate": 3.2132173028300756e-05, - "loss": 99.9792, - "step": 58390 - }, - { - "epoch": 0.47188487301933596, - "grad_norm": 985.608154296875, - "learning_rate": 3.21254815617494e-05, - "loss": 95.6503, - "step": 58400 - }, - { - "epoch": 0.4719656752236201, - "grad_norm": 649.9782104492188, - "learning_rate": 3.2118789539538335e-05, - "loss": 125.4758, - "step": 58410 - }, - { - "epoch": 0.47204647742790423, - "grad_norm": 479.0445861816406, - "learning_rate": 3.21120969621894e-05, - "loss": 108.6725, - "step": 58420 - }, - { - "epoch": 0.47212727963218837, - "grad_norm": 1052.915771484375, - "learning_rate": 3.210540383022449e-05, - "loss": 101.2907, - "step": 58430 - }, - { - "epoch": 0.4722080818364725, - "grad_norm": 773.9623413085938, - "learning_rate": 3.209871014416557e-05, - "loss": 111.5068, - "step": 58440 - }, - { - "epoch": 0.47228888404075664, - "grad_norm": 824.235107421875, - "learning_rate": 3.209201590453461e-05, - "loss": 126.5944, - "step": 58450 - }, - { - "epoch": 0.4723696862450408, - "grad_norm": 780.6783447265625, - "learning_rate": 3.208532111185365e-05, - "loss": 105.4085, - "step": 58460 - }, - { - "epoch": 0.4724504884493249, - "grad_norm": 1268.6893310546875, - "learning_rate": 3.207862576664477e-05, - "loss": 135.0629, - "step": 58470 - }, - { - "epoch": 0.47253129065360905, - "grad_norm": 1165.115966796875, - "learning_rate": 3.207192986943006e-05, - "loss": 121.0744, - "step": 58480 - }, - { - "epoch": 0.4726120928578932, - "grad_norm": 603.7785034179688, - "learning_rate": 3.206523342073172e-05, - "loss": 108.0878, - "step": 58490 - }, - { - "epoch": 0.4726928950621773, - "grad_norm": 800.9425048828125, - "learning_rate": 3.205853642107192e-05, - "loss": 100.4045, - "step": 58500 - }, - { - "epoch": 0.47277369726646146, - "grad_norm": 876.8309326171875, - "learning_rate": 3.205183887097291e-05, - "loss": 128.3182, - "step": 58510 - }, - { - "epoch": 0.47285449947074554, - "grad_norm": 810.20947265625, - "learning_rate": 3.204514077095699e-05, - "loss": 104.0912, - "step": 58520 - }, - { - "epoch": 0.4729353016750297, - "grad_norm": 568.296630859375, - "learning_rate": 3.2038442121546487e-05, - "loss": 112.8761, - "step": 58530 - }, - { - "epoch": 0.4730161038793138, - "grad_norm": 1239.09033203125, - "learning_rate": 3.203174292326378e-05, - "loss": 112.9604, - "step": 58540 - }, - { - "epoch": 0.47309690608359795, - "grad_norm": 624.9968872070312, - "learning_rate": 3.202504317663128e-05, - "loss": 79.25, - "step": 58550 - }, - { - "epoch": 0.4731777082878821, - "grad_norm": 1313.496826171875, - "learning_rate": 3.2018342882171445e-05, - "loss": 92.6158, - "step": 58560 - }, - { - "epoch": 0.4732585104921662, - "grad_norm": 923.2136840820312, - "learning_rate": 3.2011642040406784e-05, - "loss": 83.504, - "step": 58570 - }, - { - "epoch": 0.47333931269645035, - "grad_norm": 972.1646728515625, - "learning_rate": 3.2004940651859844e-05, - "loss": 124.9935, - "step": 58580 - }, - { - "epoch": 0.4734201149007345, - "grad_norm": 1288.2474365234375, - "learning_rate": 3.1998238717053206e-05, - "loss": 109.7616, - "step": 58590 - }, - { - "epoch": 0.4735009171050186, - "grad_norm": 556.7060546875, - "learning_rate": 3.19915362365095e-05, - "loss": 97.102, - "step": 58600 - }, - { - "epoch": 0.47358171930930276, - "grad_norm": 701.5609130859375, - "learning_rate": 3.198483321075141e-05, - "loss": 94.6518, - "step": 58610 - }, - { - "epoch": 0.4736625215135869, - "grad_norm": 717.743896484375, - "learning_rate": 3.197812964030164e-05, - "loss": 87.4509, - "step": 58620 - }, - { - "epoch": 0.47374332371787103, - "grad_norm": 769.5567016601562, - "learning_rate": 3.197142552568295e-05, - "loss": 90.3785, - "step": 58630 - }, - { - "epoch": 0.47382412592215517, - "grad_norm": 1441.760009765625, - "learning_rate": 3.196472086741815e-05, - "loss": 122.7155, - "step": 58640 - }, - { - "epoch": 0.4739049281264393, - "grad_norm": 897.425537109375, - "learning_rate": 3.195801566603007e-05, - "loss": 89.9327, - "step": 58650 - }, - { - "epoch": 0.47398573033072344, - "grad_norm": 988.5167236328125, - "learning_rate": 3.195130992204161e-05, - "loss": 123.6318, - "step": 58660 - }, - { - "epoch": 0.4740665325350076, - "grad_norm": 678.8578491210938, - "learning_rate": 3.194460363597569e-05, - "loss": 107.4381, - "step": 58670 - }, - { - "epoch": 0.4741473347392917, - "grad_norm": 1366.6861572265625, - "learning_rate": 3.193789680835527e-05, - "loss": 97.8036, - "step": 58680 - }, - { - "epoch": 0.4742281369435758, - "grad_norm": 714.5557861328125, - "learning_rate": 3.193118943970338e-05, - "loss": 128.3359, - "step": 58690 - }, - { - "epoch": 0.47430893914785993, - "grad_norm": 1254.711181640625, - "learning_rate": 3.192448153054306e-05, - "loss": 122.3371, - "step": 58700 - }, - { - "epoch": 0.47438974135214407, - "grad_norm": 811.3286743164062, - "learning_rate": 3.191777308139742e-05, - "loss": 105.6813, - "step": 58710 - }, - { - "epoch": 0.4744705435564282, - "grad_norm": 1074.335693359375, - "learning_rate": 3.191106409278959e-05, - "loss": 110.9824, - "step": 58720 - }, - { - "epoch": 0.47455134576071234, - "grad_norm": 860.7822875976562, - "learning_rate": 3.190435456524275e-05, - "loss": 82.3038, - "step": 58730 - }, - { - "epoch": 0.4746321479649965, - "grad_norm": 1507.017333984375, - "learning_rate": 3.189764449928012e-05, - "loss": 105.2848, - "step": 58740 - }, - { - "epoch": 0.4747129501692806, - "grad_norm": 805.6829223632812, - "learning_rate": 3.1890933895424976e-05, - "loss": 99.0614, - "step": 58750 - }, - { - "epoch": 0.47479375237356475, - "grad_norm": 687.3268432617188, - "learning_rate": 3.1884222754200625e-05, - "loss": 81.6906, - "step": 58760 - }, - { - "epoch": 0.4748745545778489, - "grad_norm": 1014.6216430664062, - "learning_rate": 3.1877511076130404e-05, - "loss": 100.4825, - "step": 58770 - }, - { - "epoch": 0.474955356782133, - "grad_norm": 913.6085815429688, - "learning_rate": 3.1870798861737705e-05, - "loss": 104.8872, - "step": 58780 - }, - { - "epoch": 0.47503615898641716, - "grad_norm": 1361.265625, - "learning_rate": 3.186408611154597e-05, - "loss": 81.0609, - "step": 58790 - }, - { - "epoch": 0.4751169611907013, - "grad_norm": 994.3406982421875, - "learning_rate": 3.185737282607867e-05, - "loss": 114.9165, - "step": 58800 - }, - { - "epoch": 0.47519776339498543, - "grad_norm": 1521.5931396484375, - "learning_rate": 3.185065900585931e-05, - "loss": 121.4001, - "step": 58810 - }, - { - "epoch": 0.47527856559926956, - "grad_norm": 1093.007080078125, - "learning_rate": 3.1843944651411456e-05, - "loss": 92.6791, - "step": 58820 - }, - { - "epoch": 0.4753593678035537, - "grad_norm": 2034.453125, - "learning_rate": 3.1837229763258705e-05, - "loss": 131.7108, - "step": 58830 - }, - { - "epoch": 0.47544017000783784, - "grad_norm": 991.9862060546875, - "learning_rate": 3.183051434192471e-05, - "loss": 98.0535, - "step": 58840 - }, - { - "epoch": 0.475520972212122, - "grad_norm": 593.083740234375, - "learning_rate": 3.1823798387933134e-05, - "loss": 104.3583, - "step": 58850 - }, - { - "epoch": 0.47560177441640605, - "grad_norm": 925.4994506835938, - "learning_rate": 3.181708190180771e-05, - "loss": 92.8692, - "step": 58860 - }, - { - "epoch": 0.4756825766206902, - "grad_norm": 1329.87255859375, - "learning_rate": 3.1810364884072205e-05, - "loss": 98.4736, - "step": 58870 - }, - { - "epoch": 0.4757633788249743, - "grad_norm": 704.4393920898438, - "learning_rate": 3.180364733525043e-05, - "loss": 93.1203, - "step": 58880 - }, - { - "epoch": 0.47584418102925846, - "grad_norm": 1495.446533203125, - "learning_rate": 3.179692925586622e-05, - "loss": 105.0044, - "step": 58890 - }, - { - "epoch": 0.4759249832335426, - "grad_norm": 1185.6829833984375, - "learning_rate": 3.179021064644347e-05, - "loss": 111.7292, - "step": 58900 - }, - { - "epoch": 0.47600578543782673, - "grad_norm": 979.1586303710938, - "learning_rate": 3.178349150750612e-05, - "loss": 96.0568, - "step": 58910 - }, - { - "epoch": 0.47608658764211087, - "grad_norm": 886.9452514648438, - "learning_rate": 3.177677183957813e-05, - "loss": 98.2371, - "step": 58920 - }, - { - "epoch": 0.476167389846395, - "grad_norm": 721.514892578125, - "learning_rate": 3.177005164318353e-05, - "loss": 91.367, - "step": 58930 - }, - { - "epoch": 0.47624819205067914, - "grad_norm": 1384.469970703125, - "learning_rate": 3.176333091884635e-05, - "loss": 106.8865, - "step": 58940 - }, - { - "epoch": 0.4763289942549633, - "grad_norm": 1170.453125, - "learning_rate": 3.1756609667090696e-05, - "loss": 103.9065, - "step": 58950 - }, - { - "epoch": 0.4764097964592474, - "grad_norm": 875.2481689453125, - "learning_rate": 3.174988788844072e-05, - "loss": 143.4019, - "step": 58960 - }, - { - "epoch": 0.47649059866353155, - "grad_norm": 524.7057495117188, - "learning_rate": 3.174316558342059e-05, - "loss": 101.1321, - "step": 58970 - }, - { - "epoch": 0.4765714008678157, - "grad_norm": 809.9110717773438, - "learning_rate": 3.173644275255451e-05, - "loss": 114.4977, - "step": 58980 - }, - { - "epoch": 0.4766522030720998, - "grad_norm": 849.5028686523438, - "learning_rate": 3.1729719396366765e-05, - "loss": 73.1764, - "step": 58990 - }, - { - "epoch": 0.47673300527638396, - "grad_norm": 1663.684326171875, - "learning_rate": 3.172299551538164e-05, - "loss": 99.6599, - "step": 59000 - }, - { - "epoch": 0.4768138074806681, - "grad_norm": 765.8724365234375, - "learning_rate": 3.171627111012349e-05, - "loss": 88.0449, - "step": 59010 - }, - { - "epoch": 0.47689460968495223, - "grad_norm": 698.2634887695312, - "learning_rate": 3.170954618111669e-05, - "loss": 120.0432, - "step": 59020 - }, - { - "epoch": 0.4769754118892363, - "grad_norm": 690.5016479492188, - "learning_rate": 3.170282072888566e-05, - "loss": 91.566, - "step": 59030 - }, - { - "epoch": 0.47705621409352045, - "grad_norm": 1586.9439697265625, - "learning_rate": 3.169609475395486e-05, - "loss": 106.3707, - "step": 59040 - }, - { - "epoch": 0.4771370162978046, - "grad_norm": 856.8453979492188, - "learning_rate": 3.168936825684882e-05, - "loss": 90.2925, - "step": 59050 - }, - { - "epoch": 0.4772178185020887, - "grad_norm": 696.286376953125, - "learning_rate": 3.1682641238092064e-05, - "loss": 82.8917, - "step": 59060 - }, - { - "epoch": 0.47729862070637286, - "grad_norm": 932.0253295898438, - "learning_rate": 3.167591369820918e-05, - "loss": 130.3971, - "step": 59070 - }, - { - "epoch": 0.477379422910657, - "grad_norm": 717.440673828125, - "learning_rate": 3.166918563772481e-05, - "loss": 92.5327, - "step": 59080 - }, - { - "epoch": 0.47746022511494113, - "grad_norm": 1038.5445556640625, - "learning_rate": 3.1662457057163604e-05, - "loss": 86.9968, - "step": 59090 - }, - { - "epoch": 0.47754102731922526, - "grad_norm": 498.76617431640625, - "learning_rate": 3.1655727957050285e-05, - "loss": 107.8832, - "step": 59100 - }, - { - "epoch": 0.4776218295235094, - "grad_norm": 935.7940673828125, - "learning_rate": 3.1648998337909594e-05, - "loss": 96.4381, - "step": 59110 - }, - { - "epoch": 0.47770263172779354, - "grad_norm": 747.111083984375, - "learning_rate": 3.1642268200266317e-05, - "loss": 82.0378, - "step": 59120 - }, - { - "epoch": 0.4777834339320777, - "grad_norm": 747.8549194335938, - "learning_rate": 3.1635537544645296e-05, - "loss": 104.9988, - "step": 59130 - }, - { - "epoch": 0.4778642361363618, - "grad_norm": 3223.177490234375, - "learning_rate": 3.162880637157139e-05, - "loss": 103.5026, - "step": 59140 - }, - { - "epoch": 0.47794503834064594, - "grad_norm": 507.07635498046875, - "learning_rate": 3.162207468156952e-05, - "loss": 110.3765, - "step": 59150 - }, - { - "epoch": 0.4780258405449301, - "grad_norm": 1431.1328125, - "learning_rate": 3.1615342475164636e-05, - "loss": 101.0132, - "step": 59160 - }, - { - "epoch": 0.4781066427492142, - "grad_norm": 1058.451904296875, - "learning_rate": 3.160860975288171e-05, - "loss": 85.4014, - "step": 59170 - }, - { - "epoch": 0.47818744495349835, - "grad_norm": 825.1602172851562, - "learning_rate": 3.16018765152458e-05, - "loss": 83.6388, - "step": 59180 - }, - { - "epoch": 0.4782682471577825, - "grad_norm": 623.8566284179688, - "learning_rate": 3.159514276278197e-05, - "loss": 106.2872, - "step": 59190 - }, - { - "epoch": 0.47834904936206657, - "grad_norm": 7339.82958984375, - "learning_rate": 3.158840849601532e-05, - "loss": 105.9948, - "step": 59200 - }, - { - "epoch": 0.4784298515663507, - "grad_norm": 1579.6492919921875, - "learning_rate": 3.1581673715471006e-05, - "loss": 91.3266, - "step": 59210 - }, - { - "epoch": 0.47851065377063484, - "grad_norm": 1075.34765625, - "learning_rate": 3.157493842167423e-05, - "loss": 102.1096, - "step": 59220 - }, - { - "epoch": 0.478591455974919, - "grad_norm": 1023.5353393554688, - "learning_rate": 3.156820261515022e-05, - "loss": 119.3776, - "step": 59230 - }, - { - "epoch": 0.4786722581792031, - "grad_norm": 843.8431396484375, - "learning_rate": 3.156146629642425e-05, - "loss": 84.1022, - "step": 59240 - }, - { - "epoch": 0.47875306038348725, - "grad_norm": 831.8274536132812, - "learning_rate": 3.155472946602162e-05, - "loss": 131.5271, - "step": 59250 - }, - { - "epoch": 0.4788338625877714, - "grad_norm": 1006.3485717773438, - "learning_rate": 3.15479921244677e-05, - "loss": 89.7562, - "step": 59260 - }, - { - "epoch": 0.4789146647920555, - "grad_norm": 1144.1214599609375, - "learning_rate": 3.1541254272287865e-05, - "loss": 100.28, - "step": 59270 - }, - { - "epoch": 0.47899546699633966, - "grad_norm": 971.5165405273438, - "learning_rate": 3.153451591000756e-05, - "loss": 146.5191, - "step": 59280 - }, - { - "epoch": 0.4790762692006238, - "grad_norm": 886.0711669921875, - "learning_rate": 3.152777703815223e-05, - "loss": 122.3819, - "step": 59290 - }, - { - "epoch": 0.47915707140490793, - "grad_norm": 887.5047607421875, - "learning_rate": 3.152103765724743e-05, - "loss": 105.9066, - "step": 59300 - }, - { - "epoch": 0.47923787360919207, - "grad_norm": 1289.8714599609375, - "learning_rate": 3.151429776781868e-05, - "loss": 90.3328, - "step": 59310 - }, - { - "epoch": 0.4793186758134762, - "grad_norm": 1534.733642578125, - "learning_rate": 3.150755737039157e-05, - "loss": 121.8971, - "step": 59320 - }, - { - "epoch": 0.47939947801776034, - "grad_norm": 882.5303955078125, - "learning_rate": 3.150081646549174e-05, - "loss": 109.2284, - "step": 59330 - }, - { - "epoch": 0.4794802802220445, - "grad_norm": 640.6094970703125, - "learning_rate": 3.149407505364486e-05, - "loss": 103.6609, - "step": 59340 - }, - { - "epoch": 0.4795610824263286, - "grad_norm": 881.5503540039062, - "learning_rate": 3.148733313537664e-05, - "loss": 84.82, - "step": 59350 - }, - { - "epoch": 0.47964188463061275, - "grad_norm": 791.4007568359375, - "learning_rate": 3.148059071121282e-05, - "loss": 122.9908, - "step": 59360 - }, - { - "epoch": 0.4797226868348969, - "grad_norm": 618.663818359375, - "learning_rate": 3.14738477816792e-05, - "loss": 97.6629, - "step": 59370 - }, - { - "epoch": 0.47980348903918096, - "grad_norm": 1005.4253540039062, - "learning_rate": 3.146710434730159e-05, - "loss": 77.9889, - "step": 59380 - }, - { - "epoch": 0.4798842912434651, - "grad_norm": 801.0524291992188, - "learning_rate": 3.1460360408605866e-05, - "loss": 84.4541, - "step": 59390 - }, - { - "epoch": 0.47996509344774924, - "grad_norm": 909.794921875, - "learning_rate": 3.145361596611795e-05, - "loss": 108.2647, - "step": 59400 - }, - { - "epoch": 0.48004589565203337, - "grad_norm": 879.032958984375, - "learning_rate": 3.144687102036376e-05, - "loss": 93.3489, - "step": 59410 - }, - { - "epoch": 0.4801266978563175, - "grad_norm": 940.010498046875, - "learning_rate": 3.1440125571869306e-05, - "loss": 88.4953, - "step": 59420 - }, - { - "epoch": 0.48020750006060164, - "grad_norm": 1568.209228515625, - "learning_rate": 3.1433379621160586e-05, - "loss": 87.6884, - "step": 59430 - }, - { - "epoch": 0.4802883022648858, - "grad_norm": 841.1142578125, - "learning_rate": 3.142663316876368e-05, - "loss": 99.9515, - "step": 59440 - }, - { - "epoch": 0.4803691044691699, - "grad_norm": 652.7152099609375, - "learning_rate": 3.1419886215204694e-05, - "loss": 89.1127, - "step": 59450 - }, - { - "epoch": 0.48044990667345405, - "grad_norm": 758.3694458007812, - "learning_rate": 3.141313876100976e-05, - "loss": 110.9414, - "step": 59460 - }, - { - "epoch": 0.4805307088777382, - "grad_norm": 668.2158813476562, - "learning_rate": 3.140639080670507e-05, - "loss": 114.1169, - "step": 59470 - }, - { - "epoch": 0.4806115110820223, - "grad_norm": 669.8775634765625, - "learning_rate": 3.139964235281682e-05, - "loss": 96.589, - "step": 59480 - }, - { - "epoch": 0.48069231328630646, - "grad_norm": 617.7365112304688, - "learning_rate": 3.1392893399871295e-05, - "loss": 110.699, - "step": 59490 - }, - { - "epoch": 0.4807731154905906, - "grad_norm": 998.271240234375, - "learning_rate": 3.138614394839476e-05, - "loss": 94.4083, - "step": 59500 - }, - { - "epoch": 0.48085391769487473, - "grad_norm": 756.2373657226562, - "learning_rate": 3.137939399891359e-05, - "loss": 178.919, - "step": 59510 - }, - { - "epoch": 0.48093471989915887, - "grad_norm": 944.0829467773438, - "learning_rate": 3.137264355195413e-05, - "loss": 110.9646, - "step": 59520 - }, - { - "epoch": 0.481015522103443, - "grad_norm": 1094.1724853515625, - "learning_rate": 3.136589260804282e-05, - "loss": 98.9162, - "step": 59530 - }, - { - "epoch": 0.48109632430772714, - "grad_norm": 1111.6876220703125, - "learning_rate": 3.135914116770609e-05, - "loss": 101.1254, - "step": 59540 - }, - { - "epoch": 0.4811771265120112, - "grad_norm": 957.7742919921875, - "learning_rate": 3.135238923147043e-05, - "loss": 120.4345, - "step": 59550 - }, - { - "epoch": 0.48125792871629536, - "grad_norm": 1138.4683837890625, - "learning_rate": 3.134563679986238e-05, - "loss": 68.9614, - "step": 59560 - }, - { - "epoch": 0.4813387309205795, - "grad_norm": 1039.4593505859375, - "learning_rate": 3.1338883873408516e-05, - "loss": 92.0563, - "step": 59570 - }, - { - "epoch": 0.48141953312486363, - "grad_norm": 468.5234069824219, - "learning_rate": 3.133213045263543e-05, - "loss": 98.3264, - "step": 59580 - }, - { - "epoch": 0.48150033532914777, - "grad_norm": 1000.0983276367188, - "learning_rate": 3.1325376538069776e-05, - "loss": 144.6992, - "step": 59590 - }, - { - "epoch": 0.4815811375334319, - "grad_norm": 1160.0662841796875, - "learning_rate": 3.1318622130238236e-05, - "loss": 112.0439, - "step": 59600 - }, - { - "epoch": 0.48166193973771604, - "grad_norm": 911.6088256835938, - "learning_rate": 3.131186722966753e-05, - "loss": 89.6205, - "step": 59610 - }, - { - "epoch": 0.4817427419420002, - "grad_norm": 722.9677734375, - "learning_rate": 3.1305111836884425e-05, - "loss": 87.3508, - "step": 59620 - }, - { - "epoch": 0.4818235441462843, - "grad_norm": 983.3289794921875, - "learning_rate": 3.129835595241571e-05, - "loss": 116.1893, - "step": 59630 - }, - { - "epoch": 0.48190434635056845, - "grad_norm": 1213.1534423828125, - "learning_rate": 3.129159957678824e-05, - "loss": 91.9284, - "step": 59640 - }, - { - "epoch": 0.4819851485548526, - "grad_norm": 1225.816162109375, - "learning_rate": 3.1284842710528876e-05, - "loss": 98.2112, - "step": 59650 - }, - { - "epoch": 0.4820659507591367, - "grad_norm": 1024.888427734375, - "learning_rate": 3.127808535416454e-05, - "loss": 111.0748, - "step": 59660 - }, - { - "epoch": 0.48214675296342085, - "grad_norm": 635.3829345703125, - "learning_rate": 3.1271327508222174e-05, - "loss": 145.5628, - "step": 59670 - }, - { - "epoch": 0.482227555167705, - "grad_norm": 686.2293701171875, - "learning_rate": 3.126456917322878e-05, - "loss": 114.7817, - "step": 59680 - }, - { - "epoch": 0.4823083573719891, - "grad_norm": 1201.61962890625, - "learning_rate": 3.125781034971139e-05, - "loss": 104.3948, - "step": 59690 - }, - { - "epoch": 0.48238915957627326, - "grad_norm": 539.2265014648438, - "learning_rate": 3.1251051038197055e-05, - "loss": 101.9212, - "step": 59700 - }, - { - "epoch": 0.4824699617805574, - "grad_norm": 1384.9298095703125, - "learning_rate": 3.1244291239212896e-05, - "loss": 98.5375, - "step": 59710 - }, - { - "epoch": 0.4825507639848415, - "grad_norm": 1162.1209716796875, - "learning_rate": 3.123753095328604e-05, - "loss": 122.0196, - "step": 59720 - }, - { - "epoch": 0.4826315661891256, - "grad_norm": 774.910400390625, - "learning_rate": 3.123077018094369e-05, - "loss": 79.7318, - "step": 59730 - }, - { - "epoch": 0.48271236839340975, - "grad_norm": 965.8831176757812, - "learning_rate": 3.1224008922713044e-05, - "loss": 121.5367, - "step": 59740 - }, - { - "epoch": 0.4827931705976939, - "grad_norm": 820.662353515625, - "learning_rate": 3.121724717912138e-05, - "loss": 82.3887, - "step": 59750 - }, - { - "epoch": 0.482873972801978, - "grad_norm": 1469.3553466796875, - "learning_rate": 3.121048495069596e-05, - "loss": 101.9264, - "step": 59760 - }, - { - "epoch": 0.48295477500626216, - "grad_norm": 1699.1929931640625, - "learning_rate": 3.120372223796415e-05, - "loss": 129.0573, - "step": 59770 - }, - { - "epoch": 0.4830355772105463, - "grad_norm": 1139.0489501953125, - "learning_rate": 3.11969590414533e-05, - "loss": 112.7681, - "step": 59780 - }, - { - "epoch": 0.48311637941483043, - "grad_norm": 550.82958984375, - "learning_rate": 3.119019536169083e-05, - "loss": 80.5146, - "step": 59790 - }, - { - "epoch": 0.48319718161911457, - "grad_norm": 1049.458740234375, - "learning_rate": 3.118343119920418e-05, - "loss": 95.4374, - "step": 59800 - }, - { - "epoch": 0.4832779838233987, - "grad_norm": 859.3832397460938, - "learning_rate": 3.117666655452083e-05, - "loss": 88.6226, - "step": 59810 - }, - { - "epoch": 0.48335878602768284, - "grad_norm": 942.1868896484375, - "learning_rate": 3.11699014281683e-05, - "loss": 101.5068, - "step": 59820 - }, - { - "epoch": 0.483439588231967, - "grad_norm": 858.2274780273438, - "learning_rate": 3.116313582067416e-05, - "loss": 98.059, - "step": 59830 - }, - { - "epoch": 0.4835203904362511, - "grad_norm": 816.7584228515625, - "learning_rate": 3.1156369732566006e-05, - "loss": 68.2786, - "step": 59840 - }, - { - "epoch": 0.48360119264053525, - "grad_norm": 733.7037963867188, - "learning_rate": 3.114960316437145e-05, - "loss": 117.8468, - "step": 59850 - }, - { - "epoch": 0.4836819948448194, - "grad_norm": 1159.0211181640625, - "learning_rate": 3.114283611661818e-05, - "loss": 119.6192, - "step": 59860 - }, - { - "epoch": 0.4837627970491035, - "grad_norm": 672.6622924804688, - "learning_rate": 3.1136068589833914e-05, - "loss": 104.3813, - "step": 59870 - }, - { - "epoch": 0.48384359925338766, - "grad_norm": 824.0282592773438, - "learning_rate": 3.1129300584546375e-05, - "loss": 86.5423, - "step": 59880 - }, - { - "epoch": 0.48392440145767174, - "grad_norm": 1150.6656494140625, - "learning_rate": 3.112253210128336e-05, - "loss": 98.0679, - "step": 59890 - }, - { - "epoch": 0.4840052036619559, - "grad_norm": 703.7778930664062, - "learning_rate": 3.111576314057268e-05, - "loss": 84.4992, - "step": 59900 - }, - { - "epoch": 0.48408600586624, - "grad_norm": 1012.8980712890625, - "learning_rate": 3.1108993702942205e-05, - "loss": 110.2319, - "step": 59910 - }, - { - "epoch": 0.48416680807052415, - "grad_norm": 798.6395874023438, - "learning_rate": 3.1102223788919824e-05, - "loss": 85.7478, - "step": 59920 - }, - { - "epoch": 0.4842476102748083, - "grad_norm": 769.3587646484375, - "learning_rate": 3.1095453399033466e-05, - "loss": 91.0564, - "step": 59930 - }, - { - "epoch": 0.4843284124790924, - "grad_norm": 1363.0001220703125, - "learning_rate": 3.10886825338111e-05, - "loss": 104.5787, - "step": 59940 - }, - { - "epoch": 0.48440921468337655, - "grad_norm": 1342.95849609375, - "learning_rate": 3.108191119378073e-05, - "loss": 89.6273, - "step": 59950 - }, - { - "epoch": 0.4844900168876607, - "grad_norm": 972.3782958984375, - "learning_rate": 3.107513937947041e-05, - "loss": 120.0807, - "step": 59960 - }, - { - "epoch": 0.4845708190919448, - "grad_norm": 865.6650390625, - "learning_rate": 3.106836709140821e-05, - "loss": 86.6694, - "step": 59970 - }, - { - "epoch": 0.48465162129622896, - "grad_norm": 984.1576538085938, - "learning_rate": 3.1061594330122246e-05, - "loss": 89.5221, - "step": 59980 - }, - { - "epoch": 0.4847324235005131, - "grad_norm": 1149.4617919921875, - "learning_rate": 3.1054821096140676e-05, - "loss": 97.7978, - "step": 59990 - }, - { - "epoch": 0.48481322570479723, - "grad_norm": 592.638916015625, - "learning_rate": 3.104804738999169e-05, - "loss": 104.4585, - "step": 60000 - }, - { - "epoch": 0.48489402790908137, - "grad_norm": 1775.9285888671875, - "learning_rate": 3.104127321220353e-05, - "loss": 94.7904, - "step": 60010 - }, - { - "epoch": 0.4849748301133655, - "grad_norm": 2295.8642578125, - "learning_rate": 3.103449856330443e-05, - "loss": 112.4231, - "step": 60020 - }, - { - "epoch": 0.48505563231764964, - "grad_norm": 1315.8931884765625, - "learning_rate": 3.102772344382271e-05, - "loss": 114.7798, - "step": 60030 - }, - { - "epoch": 0.4851364345219338, - "grad_norm": 980.0896606445312, - "learning_rate": 3.102094785428671e-05, - "loss": 111.8056, - "step": 60040 - }, - { - "epoch": 0.4852172367262179, - "grad_norm": 834.1741333007812, - "learning_rate": 3.101417179522479e-05, - "loss": 83.3201, - "step": 60050 - }, - { - "epoch": 0.48529803893050205, - "grad_norm": 1191.6806640625, - "learning_rate": 3.100739526716538e-05, - "loss": 100.5132, - "step": 60060 - }, - { - "epoch": 0.48537884113478613, - "grad_norm": 1658.875, - "learning_rate": 3.100061827063692e-05, - "loss": 103.364, - "step": 60070 - }, - { - "epoch": 0.48545964333907027, - "grad_norm": 1259.3106689453125, - "learning_rate": 3.099384080616789e-05, - "loss": 130.4786, - "step": 60080 - }, - { - "epoch": 0.4855404455433544, - "grad_norm": 620.7562866210938, - "learning_rate": 3.0987062874286804e-05, - "loss": 77.8296, - "step": 60090 - }, - { - "epoch": 0.48562124774763854, - "grad_norm": 609.9969482421875, - "learning_rate": 3.098028447552224e-05, - "loss": 101.6062, - "step": 60100 - }, - { - "epoch": 0.4857020499519227, - "grad_norm": 595.8947143554688, - "learning_rate": 3.0973505610402765e-05, - "loss": 89.2852, - "step": 60110 - }, - { - "epoch": 0.4857828521562068, - "grad_norm": 1015.4224243164062, - "learning_rate": 3.0966726279457034e-05, - "loss": 95.6552, - "step": 60120 - }, - { - "epoch": 0.48586365436049095, - "grad_norm": 2668.068115234375, - "learning_rate": 3.09599464832137e-05, - "loss": 111.774, - "step": 60130 - }, - { - "epoch": 0.4859444565647751, - "grad_norm": 677.4678344726562, - "learning_rate": 3.0953166222201476e-05, - "loss": 91.261, - "step": 60140 - }, - { - "epoch": 0.4860252587690592, - "grad_norm": 1510.345703125, - "learning_rate": 3.094638549694908e-05, - "loss": 108.0132, - "step": 60150 - }, - { - "epoch": 0.48610606097334336, - "grad_norm": 1440.59716796875, - "learning_rate": 3.09396043079853e-05, - "loss": 124.7984, - "step": 60160 - }, - { - "epoch": 0.4861868631776275, - "grad_norm": 743.6878051757812, - "learning_rate": 3.093282265583895e-05, - "loss": 103.9458, - "step": 60170 - }, - { - "epoch": 0.48626766538191163, - "grad_norm": 799.70703125, - "learning_rate": 3.092604054103888e-05, - "loss": 92.9165, - "step": 60180 - }, - { - "epoch": 0.48634846758619577, - "grad_norm": 921.1470947265625, - "learning_rate": 3.0919257964113964e-05, - "loss": 134.4742, - "step": 60190 - }, - { - "epoch": 0.4864292697904799, - "grad_norm": 691.5244140625, - "learning_rate": 3.091247492559312e-05, - "loss": 103.7256, - "step": 60200 - }, - { - "epoch": 0.48651007199476404, - "grad_norm": 1174.453125, - "learning_rate": 3.090569142600531e-05, - "loss": 98.6646, - "step": 60210 - }, - { - "epoch": 0.4865908741990482, - "grad_norm": 1069.83447265625, - "learning_rate": 3.089890746587953e-05, - "loss": 88.6477, - "step": 60220 - }, - { - "epoch": 0.4866716764033323, - "grad_norm": 519.0724487304688, - "learning_rate": 3.0892123045744785e-05, - "loss": 82.0224, - "step": 60230 - }, - { - "epoch": 0.4867524786076164, - "grad_norm": 986.7581176757812, - "learning_rate": 3.088533816613017e-05, - "loss": 113.1994, - "step": 60240 - }, - { - "epoch": 0.4868332808119005, - "grad_norm": 1006.0873413085938, - "learning_rate": 3.087855282756475e-05, - "loss": 102.0562, - "step": 60250 - }, - { - "epoch": 0.48691408301618466, - "grad_norm": 1294.3375244140625, - "learning_rate": 3.087176703057769e-05, - "loss": 106.5908, - "step": 60260 - }, - { - "epoch": 0.4869948852204688, - "grad_norm": 501.10223388671875, - "learning_rate": 3.0864980775698145e-05, - "loss": 130.151, - "step": 60270 - }, - { - "epoch": 0.48707568742475293, - "grad_norm": 844.74951171875, - "learning_rate": 3.085819406345532e-05, - "loss": 131.4795, - "step": 60280 - }, - { - "epoch": 0.48715648962903707, - "grad_norm": 850.210693359375, - "learning_rate": 3.085140689437846e-05, - "loss": 102.8871, - "step": 60290 - }, - { - "epoch": 0.4872372918333212, - "grad_norm": 1011.3098754882812, - "learning_rate": 3.0844619268996845e-05, - "loss": 105.288, - "step": 60300 - }, - { - "epoch": 0.48731809403760534, - "grad_norm": 1169.3095703125, - "learning_rate": 3.0837831187839784e-05, - "loss": 125.0571, - "step": 60310 - }, - { - "epoch": 0.4873988962418895, - "grad_norm": 1305.4566650390625, - "learning_rate": 3.083104265143663e-05, - "loss": 136.4557, - "step": 60320 - }, - { - "epoch": 0.4874796984461736, - "grad_norm": 537.4886474609375, - "learning_rate": 3.082425366031676e-05, - "loss": 77.1489, - "step": 60330 - }, - { - "epoch": 0.48756050065045775, - "grad_norm": 1468.9324951171875, - "learning_rate": 3.08174642150096e-05, - "loss": 87.9731, - "step": 60340 - }, - { - "epoch": 0.4876413028547419, - "grad_norm": 1293.12841796875, - "learning_rate": 3.08106743160446e-05, - "loss": 84.0916, - "step": 60350 - }, - { - "epoch": 0.487722105059026, - "grad_norm": 876.5225830078125, - "learning_rate": 3.0803883963951255e-05, - "loss": 105.9564, - "step": 60360 - }, - { - "epoch": 0.48780290726331016, - "grad_norm": 586.9011840820312, - "learning_rate": 3.0797093159259085e-05, - "loss": 78.521, - "step": 60370 - }, - { - "epoch": 0.4878837094675943, - "grad_norm": 652.8150634765625, - "learning_rate": 3.0790301902497666e-05, - "loss": 75.6755, - "step": 60380 - }, - { - "epoch": 0.48796451167187843, - "grad_norm": 1636.29248046875, - "learning_rate": 3.0783510194196576e-05, - "loss": 154.5272, - "step": 60390 - }, - { - "epoch": 0.48804531387616257, - "grad_norm": 898.5106201171875, - "learning_rate": 3.0776718034885454e-05, - "loss": 83.3957, - "step": 60400 - }, - { - "epoch": 0.48812611608044665, - "grad_norm": 858.0210571289062, - "learning_rate": 3.076992542509396e-05, - "loss": 78.7058, - "step": 60410 - }, - { - "epoch": 0.4882069182847308, - "grad_norm": 716.4771728515625, - "learning_rate": 3.07631323653518e-05, - "loss": 124.9374, - "step": 60420 - }, - { - "epoch": 0.4882877204890149, - "grad_norm": 1164.1922607421875, - "learning_rate": 3.0756338856188716e-05, - "loss": 118.8369, - "step": 60430 - }, - { - "epoch": 0.48836852269329906, - "grad_norm": 1190.9383544921875, - "learning_rate": 3.074954489813449e-05, - "loss": 134.1836, - "step": 60440 - }, - { - "epoch": 0.4884493248975832, - "grad_norm": 564.4974365234375, - "learning_rate": 3.074275049171889e-05, - "loss": 113.8675, - "step": 60450 - }, - { - "epoch": 0.48853012710186733, - "grad_norm": 782.3978881835938, - "learning_rate": 3.0735955637471794e-05, - "loss": 96.8152, - "step": 60460 - }, - { - "epoch": 0.48861092930615146, - "grad_norm": 735.1063842773438, - "learning_rate": 3.072916033592307e-05, - "loss": 96.5397, - "step": 60470 - }, - { - "epoch": 0.4886917315104356, - "grad_norm": 539.7359008789062, - "learning_rate": 3.072236458760262e-05, - "loss": 69.7088, - "step": 60480 - }, - { - "epoch": 0.48877253371471974, - "grad_norm": 1166.1112060546875, - "learning_rate": 3.0715568393040405e-05, - "loss": 111.1935, - "step": 60490 - }, - { - "epoch": 0.4888533359190039, - "grad_norm": 960.2236938476562, - "learning_rate": 3.0708771752766394e-05, - "loss": 129.4855, - "step": 60500 - }, - { - "epoch": 0.488934138123288, - "grad_norm": 525.9735107421875, - "learning_rate": 3.070197466731061e-05, - "loss": 96.1911, - "step": 60510 - }, - { - "epoch": 0.48901494032757215, - "grad_norm": 812.2979125976562, - "learning_rate": 3.06951771372031e-05, - "loss": 97.6864, - "step": 60520 - }, - { - "epoch": 0.4890957425318563, - "grad_norm": 1304.519775390625, - "learning_rate": 3.0688379162973955e-05, - "loss": 149.2851, - "step": 60530 - }, - { - "epoch": 0.4891765447361404, - "grad_norm": 614.3257446289062, - "learning_rate": 3.068158074515328e-05, - "loss": 95.7029, - "step": 60540 - }, - { - "epoch": 0.48925734694042455, - "grad_norm": 1712.494384765625, - "learning_rate": 3.0674781884271254e-05, - "loss": 137.9967, - "step": 60550 - }, - { - "epoch": 0.4893381491447087, - "grad_norm": 1382.921142578125, - "learning_rate": 3.0667982580858044e-05, - "loss": 135.9933, - "step": 60560 - }, - { - "epoch": 0.4894189513489928, - "grad_norm": 1035.962158203125, - "learning_rate": 3.0661182835443884e-05, - "loss": 100.9676, - "step": 60570 - }, - { - "epoch": 0.4894997535532769, - "grad_norm": 1817.8814697265625, - "learning_rate": 3.0654382648559026e-05, - "loss": 95.5548, - "step": 60580 - }, - { - "epoch": 0.48958055575756104, - "grad_norm": 621.560302734375, - "learning_rate": 3.064758202073377e-05, - "loss": 116.2889, - "step": 60590 - }, - { - "epoch": 0.4896613579618452, - "grad_norm": 1343.86181640625, - "learning_rate": 3.064078095249844e-05, - "loss": 92.1819, - "step": 60600 - }, - { - "epoch": 0.4897421601661293, - "grad_norm": 1446.5303955078125, - "learning_rate": 3.06339794443834e-05, - "loss": 113.155, - "step": 60610 - }, - { - "epoch": 0.48982296237041345, - "grad_norm": 634.691162109375, - "learning_rate": 3.062717749691904e-05, - "loss": 77.573, - "step": 60620 - }, - { - "epoch": 0.4899037645746976, - "grad_norm": 842.3832397460938, - "learning_rate": 3.06203751106358e-05, - "loss": 92.64, - "step": 60630 - }, - { - "epoch": 0.4899845667789817, - "grad_norm": 1305.42236328125, - "learning_rate": 3.0613572286064125e-05, - "loss": 112.9239, - "step": 60640 - }, - { - "epoch": 0.49006536898326586, - "grad_norm": 535.7638549804688, - "learning_rate": 3.0606769023734536e-05, - "loss": 124.2977, - "step": 60650 - }, - { - "epoch": 0.49014617118755, - "grad_norm": 777.18408203125, - "learning_rate": 3.059996532417754e-05, - "loss": 103.5948, - "step": 60660 - }, - { - "epoch": 0.49022697339183413, - "grad_norm": 713.6359252929688, - "learning_rate": 3.0593161187923736e-05, - "loss": 102.3931, - "step": 60670 - }, - { - "epoch": 0.49030777559611827, - "grad_norm": 1172.728515625, - "learning_rate": 3.058635661550369e-05, - "loss": 109.8537, - "step": 60680 - }, - { - "epoch": 0.4903885778004024, - "grad_norm": 613.489501953125, - "learning_rate": 3.0579551607448066e-05, - "loss": 104.109, - "step": 60690 - }, - { - "epoch": 0.49046938000468654, - "grad_norm": 1086.861328125, - "learning_rate": 3.0572746164287514e-05, - "loss": 88.9355, - "step": 60700 - }, - { - "epoch": 0.4905501822089707, - "grad_norm": 508.2934875488281, - "learning_rate": 3.056594028655274e-05, - "loss": 109.406, - "step": 60710 - }, - { - "epoch": 0.4906309844132548, - "grad_norm": 1035.7122802734375, - "learning_rate": 3.055913397477448e-05, - "loss": 124.8222, - "step": 60720 - }, - { - "epoch": 0.49071178661753895, - "grad_norm": 719.455810546875, - "learning_rate": 3.0552327229483515e-05, - "loss": 96.3, - "step": 60730 - }, - { - "epoch": 0.4907925888218231, - "grad_norm": 1167.258544921875, - "learning_rate": 3.054552005121064e-05, - "loss": 112.7671, - "step": 60740 - }, - { - "epoch": 0.4908733910261072, - "grad_norm": 855.69189453125, - "learning_rate": 3.053871244048669e-05, - "loss": 91.4691, - "step": 60750 - }, - { - "epoch": 0.4909541932303913, - "grad_norm": 1401.1826171875, - "learning_rate": 3.053190439784253e-05, - "loss": 97.0819, - "step": 60760 - }, - { - "epoch": 0.49103499543467544, - "grad_norm": 2805.474365234375, - "learning_rate": 3.052509592380909e-05, - "loss": 127.2373, - "step": 60770 - }, - { - "epoch": 0.4911157976389596, - "grad_norm": 1249.7474365234375, - "learning_rate": 3.051828701891729e-05, - "loss": 78.0477, - "step": 60780 - }, - { - "epoch": 0.4911965998432437, - "grad_norm": 638.1631469726562, - "learning_rate": 3.0511477683698108e-05, - "loss": 124.8624, - "step": 60790 - }, - { - "epoch": 0.49127740204752784, - "grad_norm": 1172.542236328125, - "learning_rate": 3.050466791868254e-05, - "loss": 132.901, - "step": 60800 - }, - { - "epoch": 0.491358204251812, - "grad_norm": 766.94287109375, - "learning_rate": 3.0497857724401642e-05, - "loss": 132.9624, - "step": 60810 - }, - { - "epoch": 0.4914390064560961, - "grad_norm": 447.9418029785156, - "learning_rate": 3.049104710138647e-05, - "loss": 84.1941, - "step": 60820 - }, - { - "epoch": 0.49151980866038025, - "grad_norm": 1044.43896484375, - "learning_rate": 3.0484236050168153e-05, - "loss": 80.2653, - "step": 60830 - }, - { - "epoch": 0.4916006108646644, - "grad_norm": 3091.36572265625, - "learning_rate": 3.0477424571277807e-05, - "loss": 98.6662, - "step": 60840 - }, - { - "epoch": 0.4916814130689485, - "grad_norm": 1133.0091552734375, - "learning_rate": 3.0470612665246618e-05, - "loss": 105.285, - "step": 60850 - }, - { - "epoch": 0.49176221527323266, - "grad_norm": 1228.9381103515625, - "learning_rate": 3.0463800332605784e-05, - "loss": 84.2497, - "step": 60860 - }, - { - "epoch": 0.4918430174775168, - "grad_norm": 880.3346557617188, - "learning_rate": 3.0456987573886564e-05, - "loss": 81.2345, - "step": 60870 - }, - { - "epoch": 0.49192381968180093, - "grad_norm": 657.6962280273438, - "learning_rate": 3.0450174389620205e-05, - "loss": 112.6922, - "step": 60880 - }, - { - "epoch": 0.49200462188608507, - "grad_norm": 687.509521484375, - "learning_rate": 3.044336078033803e-05, - "loss": 81.1746, - "step": 60890 - }, - { - "epoch": 0.4920854240903692, - "grad_norm": 569.5506591796875, - "learning_rate": 3.0436546746571372e-05, - "loss": 100.4749, - "step": 60900 - }, - { - "epoch": 0.49216622629465334, - "grad_norm": 1080.706787109375, - "learning_rate": 3.0429732288851603e-05, - "loss": 125.0461, - "step": 60910 - }, - { - "epoch": 0.4922470284989375, - "grad_norm": 715.1385498046875, - "learning_rate": 3.0422917407710137e-05, - "loss": 74.653, - "step": 60920 - }, - { - "epoch": 0.49232783070322156, - "grad_norm": 826.19384765625, - "learning_rate": 3.0416102103678402e-05, - "loss": 110.6619, - "step": 60930 - }, - { - "epoch": 0.4924086329075057, - "grad_norm": 613.7562866210938, - "learning_rate": 3.040928637728787e-05, - "loss": 113.4333, - "step": 60940 - }, - { - "epoch": 0.49248943511178983, - "grad_norm": 814.072021484375, - "learning_rate": 3.0402470229070056e-05, - "loss": 100.765, - "step": 60950 - }, - { - "epoch": 0.49257023731607397, - "grad_norm": 536.596435546875, - "learning_rate": 3.0395653659556488e-05, - "loss": 87.4366, - "step": 60960 - }, - { - "epoch": 0.4926510395203581, - "grad_norm": 1002.4295654296875, - "learning_rate": 3.0388836669278738e-05, - "loss": 105.0891, - "step": 60970 - }, - { - "epoch": 0.49273184172464224, - "grad_norm": 1038.828857421875, - "learning_rate": 3.0382019258768403e-05, - "loss": 115.8443, - "step": 60980 - }, - { - "epoch": 0.4928126439289264, - "grad_norm": 653.9907836914062, - "learning_rate": 3.0375201428557132e-05, - "loss": 99.3368, - "step": 60990 - }, - { - "epoch": 0.4928934461332105, - "grad_norm": 581.2391967773438, - "learning_rate": 3.0368383179176585e-05, - "loss": 91.8406, - "step": 61000 - }, - { - "epoch": 0.49297424833749465, - "grad_norm": 1085.697509765625, - "learning_rate": 3.0361564511158457e-05, - "loss": 108.0153, - "step": 61010 - }, - { - "epoch": 0.4930550505417788, - "grad_norm": 797.926513671875, - "learning_rate": 3.0354745425034498e-05, - "loss": 119.8415, - "step": 61020 - }, - { - "epoch": 0.4931358527460629, - "grad_norm": 1245.55810546875, - "learning_rate": 3.0347925921336463e-05, - "loss": 109.9589, - "step": 61030 - }, - { - "epoch": 0.49321665495034706, - "grad_norm": 1325.5728759765625, - "learning_rate": 3.034110600059616e-05, - "loss": 78.2776, - "step": 61040 - }, - { - "epoch": 0.4932974571546312, - "grad_norm": 1648.55419921875, - "learning_rate": 3.0334285663345404e-05, - "loss": 77.1788, - "step": 61050 - }, - { - "epoch": 0.4933782593589153, - "grad_norm": 1330.4677734375, - "learning_rate": 3.032746491011607e-05, - "loss": 82.9381, - "step": 61060 - }, - { - "epoch": 0.49345906156319946, - "grad_norm": 1758.0655517578125, - "learning_rate": 3.032064374144005e-05, - "loss": 99.8391, - "step": 61070 - }, - { - "epoch": 0.4935398637674836, - "grad_norm": 1029.344482421875, - "learning_rate": 3.0313822157849287e-05, - "loss": 137.6532, - "step": 61080 - }, - { - "epoch": 0.49362066597176774, - "grad_norm": 1515.48193359375, - "learning_rate": 3.030700015987573e-05, - "loss": 110.8472, - "step": 61090 - }, - { - "epoch": 0.4937014681760518, - "grad_norm": 1400.1971435546875, - "learning_rate": 3.0300177748051373e-05, - "loss": 112.555, - "step": 61100 - }, - { - "epoch": 0.49378227038033595, - "grad_norm": 1121.00439453125, - "learning_rate": 3.0293354922908235e-05, - "loss": 120.1636, - "step": 61110 - }, - { - "epoch": 0.4938630725846201, - "grad_norm": 788.2630615234375, - "learning_rate": 3.028653168497838e-05, - "loss": 97.029, - "step": 61120 - }, - { - "epoch": 0.4939438747889042, - "grad_norm": 638.068603515625, - "learning_rate": 3.0279708034793907e-05, - "loss": 94.1032, - "step": 61130 - }, - { - "epoch": 0.49402467699318836, - "grad_norm": 451.4193115234375, - "learning_rate": 3.0272883972886935e-05, - "loss": 85.9979, - "step": 61140 - }, - { - "epoch": 0.4941054791974725, - "grad_norm": 1408.3233642578125, - "learning_rate": 3.02660594997896e-05, - "loss": 101.7955, - "step": 61150 - }, - { - "epoch": 0.49418628140175663, - "grad_norm": 838.1246337890625, - "learning_rate": 3.0259234616034116e-05, - "loss": 95.6618, - "step": 61160 - }, - { - "epoch": 0.49426708360604077, - "grad_norm": 879.6422729492188, - "learning_rate": 3.025240932215268e-05, - "loss": 94.2452, - "step": 61170 - }, - { - "epoch": 0.4943478858103249, - "grad_norm": 985.1724243164062, - "learning_rate": 3.0245583618677558e-05, - "loss": 86.3851, - "step": 61180 - }, - { - "epoch": 0.49442868801460904, - "grad_norm": 536.5324096679688, - "learning_rate": 3.0238757506141012e-05, - "loss": 92.2913, - "step": 61190 - }, - { - "epoch": 0.4945094902188932, - "grad_norm": 1872.2591552734375, - "learning_rate": 3.023193098507538e-05, - "loss": 117.4869, - "step": 61200 - }, - { - "epoch": 0.4945902924231773, - "grad_norm": 1106.338134765625, - "learning_rate": 3.0225104056013e-05, - "loss": 116.547, - "step": 61210 - }, - { - "epoch": 0.49467109462746145, - "grad_norm": 512.8605346679688, - "learning_rate": 3.0218276719486244e-05, - "loss": 118.0442, - "step": 61220 - }, - { - "epoch": 0.4947518968317456, - "grad_norm": 872.9130249023438, - "learning_rate": 3.021144897602752e-05, - "loss": 93.774, - "step": 61230 - }, - { - "epoch": 0.4948326990360297, - "grad_norm": 692.8314819335938, - "learning_rate": 3.020462082616928e-05, - "loss": 80.1066, - "step": 61240 - }, - { - "epoch": 0.49491350124031386, - "grad_norm": 509.7411804199219, - "learning_rate": 3.0197792270443982e-05, - "loss": 79.5788, - "step": 61250 - }, - { - "epoch": 0.494994303444598, - "grad_norm": 709.2567138671875, - "learning_rate": 3.0190963309384156e-05, - "loss": 95.6635, - "step": 61260 - }, - { - "epoch": 0.4950751056488821, - "grad_norm": 804.3460693359375, - "learning_rate": 3.0184133943522314e-05, - "loss": 74.7411, - "step": 61270 - }, - { - "epoch": 0.4951559078531662, - "grad_norm": 1383.018310546875, - "learning_rate": 3.0177304173391037e-05, - "loss": 99.3223, - "step": 61280 - }, - { - "epoch": 0.49523671005745035, - "grad_norm": 1804.20703125, - "learning_rate": 3.0170473999522915e-05, - "loss": 101.919, - "step": 61290 - }, - { - "epoch": 0.4953175122617345, - "grad_norm": 1267.220947265625, - "learning_rate": 3.016364342245059e-05, - "loss": 128.8924, - "step": 61300 - }, - { - "epoch": 0.4953983144660186, - "grad_norm": 944.1229248046875, - "learning_rate": 3.0156812442706715e-05, - "loss": 106.653, - "step": 61310 - }, - { - "epoch": 0.49547911667030275, - "grad_norm": 639.6654052734375, - "learning_rate": 3.0149981060823995e-05, - "loss": 92.4858, - "step": 61320 - }, - { - "epoch": 0.4955599188745869, - "grad_norm": 767.285400390625, - "learning_rate": 3.0143149277335138e-05, - "loss": 103.2523, - "step": 61330 - }, - { - "epoch": 0.495640721078871, - "grad_norm": 948.9341430664062, - "learning_rate": 3.013631709277292e-05, - "loss": 142.2647, - "step": 61340 - }, - { - "epoch": 0.49572152328315516, - "grad_norm": 662.1615600585938, - "learning_rate": 3.0129484507670115e-05, - "loss": 107.1395, - "step": 61350 - }, - { - "epoch": 0.4958023254874393, - "grad_norm": 950.81201171875, - "learning_rate": 3.0122651522559553e-05, - "loss": 91.6862, - "step": 61360 - }, - { - "epoch": 0.49588312769172344, - "grad_norm": 429.4052429199219, - "learning_rate": 3.0115818137974067e-05, - "loss": 146.5581, - "step": 61370 - }, - { - "epoch": 0.49596392989600757, - "grad_norm": 930.103515625, - "learning_rate": 3.0108984354446556e-05, - "loss": 101.8897, - "step": 61380 - }, - { - "epoch": 0.4960447321002917, - "grad_norm": 1196.59326171875, - "learning_rate": 3.010215017250993e-05, - "loss": 123.2352, - "step": 61390 - }, - { - "epoch": 0.49612553430457584, - "grad_norm": 589.8703002929688, - "learning_rate": 3.0095315592697126e-05, - "loss": 87.7526, - "step": 61400 - }, - { - "epoch": 0.49620633650886, - "grad_norm": 1237.8167724609375, - "learning_rate": 3.0088480615541113e-05, - "loss": 106.4812, - "step": 61410 - }, - { - "epoch": 0.4962871387131441, - "grad_norm": 737.3546752929688, - "learning_rate": 3.008164524157491e-05, - "loss": 95.5115, - "step": 61420 - }, - { - "epoch": 0.49636794091742825, - "grad_norm": 747.0277709960938, - "learning_rate": 3.007480947133155e-05, - "loss": 105.8834, - "step": 61430 - }, - { - "epoch": 0.4964487431217124, - "grad_norm": 984.3314208984375, - "learning_rate": 3.00679733053441e-05, - "loss": 105.9469, - "step": 61440 - }, - { - "epoch": 0.49652954532599647, - "grad_norm": 679.7565307617188, - "learning_rate": 3.0061136744145652e-05, - "loss": 84.4708, - "step": 61450 - }, - { - "epoch": 0.4966103475302806, - "grad_norm": 1201.8026123046875, - "learning_rate": 3.005429978826934e-05, - "loss": 96.8751, - "step": 61460 - }, - { - "epoch": 0.49669114973456474, - "grad_norm": 776.0872192382812, - "learning_rate": 3.004746243824833e-05, - "loss": 84.5766, - "step": 61470 - }, - { - "epoch": 0.4967719519388489, - "grad_norm": 1907.1820068359375, - "learning_rate": 3.0040624694615803e-05, - "loss": 121.5948, - "step": 61480 - }, - { - "epoch": 0.496852754143133, - "grad_norm": 857.5036010742188, - "learning_rate": 3.003378655790498e-05, - "loss": 109.1802, - "step": 61490 - }, - { - "epoch": 0.49693355634741715, - "grad_norm": 509.0009765625, - "learning_rate": 3.002694802864912e-05, - "loss": 111.4011, - "step": 61500 - }, - { - "epoch": 0.4970143585517013, - "grad_norm": 1530.005126953125, - "learning_rate": 3.00201091073815e-05, - "loss": 126.7474, - "step": 61510 - }, - { - "epoch": 0.4970951607559854, - "grad_norm": 773.0906982421875, - "learning_rate": 3.0013269794635446e-05, - "loss": 90.8878, - "step": 61520 - }, - { - "epoch": 0.49717596296026956, - "grad_norm": 1618.01123046875, - "learning_rate": 3.0006430090944277e-05, - "loss": 124.9891, - "step": 61530 - }, - { - "epoch": 0.4972567651645537, - "grad_norm": 399.9528503417969, - "learning_rate": 2.9999589996841386e-05, - "loss": 107.4075, - "step": 61540 - }, - { - "epoch": 0.49733756736883783, - "grad_norm": 1108.7855224609375, - "learning_rate": 2.9992749512860173e-05, - "loss": 109.8831, - "step": 61550 - }, - { - "epoch": 0.49741836957312197, - "grad_norm": 1194.5028076171875, - "learning_rate": 2.9985908639534075e-05, - "loss": 151.2577, - "step": 61560 - }, - { - "epoch": 0.4974991717774061, - "grad_norm": 926.6067504882812, - "learning_rate": 2.9979067377396565e-05, - "loss": 69.9582, - "step": 61570 - }, - { - "epoch": 0.49757997398169024, - "grad_norm": 796.760498046875, - "learning_rate": 2.9972225726981113e-05, - "loss": 77.7238, - "step": 61580 - }, - { - "epoch": 0.4976607761859744, - "grad_norm": 1386.447021484375, - "learning_rate": 2.996538368882127e-05, - "loss": 135.9352, - "step": 61590 - }, - { - "epoch": 0.4977415783902585, - "grad_norm": 1185.4478759765625, - "learning_rate": 2.9958541263450584e-05, - "loss": 81.8305, - "step": 61600 - }, - { - "epoch": 0.49782238059454265, - "grad_norm": 1080.8087158203125, - "learning_rate": 2.995169845140264e-05, - "loss": 77.6133, - "step": 61610 - }, - { - "epoch": 0.4979031827988267, - "grad_norm": 1280.5985107421875, - "learning_rate": 2.9944855253211052e-05, - "loss": 104.4355, - "step": 61620 - }, - { - "epoch": 0.49798398500311086, - "grad_norm": 645.7960815429688, - "learning_rate": 2.993801166940947e-05, - "loss": 95.1785, - "step": 61630 - }, - { - "epoch": 0.498064787207395, - "grad_norm": 630.66015625, - "learning_rate": 2.9931167700531578e-05, - "loss": 90.5321, - "step": 61640 - }, - { - "epoch": 0.49814558941167913, - "grad_norm": 776.0673217773438, - "learning_rate": 2.9924323347111073e-05, - "loss": 133.4106, - "step": 61650 - }, - { - "epoch": 0.49822639161596327, - "grad_norm": 1130.7745361328125, - "learning_rate": 2.991747860968168e-05, - "loss": 122.8374, - "step": 61660 - }, - { - "epoch": 0.4983071938202474, - "grad_norm": 1256.7821044921875, - "learning_rate": 2.9910633488777196e-05, - "loss": 81.2758, - "step": 61670 - }, - { - "epoch": 0.49838799602453154, - "grad_norm": 372.975341796875, - "learning_rate": 2.9903787984931396e-05, - "loss": 71.5151, - "step": 61680 - }, - { - "epoch": 0.4984687982288157, - "grad_norm": 962.6753540039062, - "learning_rate": 2.9896942098678122e-05, - "loss": 89.9487, - "step": 61690 - }, - { - "epoch": 0.4985496004330998, - "grad_norm": 586.8377685546875, - "learning_rate": 2.9890095830551207e-05, - "loss": 102.0299, - "step": 61700 - }, - { - "epoch": 0.49863040263738395, - "grad_norm": 2170.886962890625, - "learning_rate": 2.988324918108456e-05, - "loss": 117.4013, - "step": 61710 - }, - { - "epoch": 0.4987112048416681, - "grad_norm": 850.5200805664062, - "learning_rate": 2.9876402150812078e-05, - "loss": 137.5344, - "step": 61720 - }, - { - "epoch": 0.4987920070459522, - "grad_norm": 982.2659912109375, - "learning_rate": 2.9869554740267724e-05, - "loss": 76.4956, - "step": 61730 - }, - { - "epoch": 0.49887280925023636, - "grad_norm": 1771.2686767578125, - "learning_rate": 2.9862706949985463e-05, - "loss": 109.2477, - "step": 61740 - }, - { - "epoch": 0.4989536114545205, - "grad_norm": 1000.6109008789062, - "learning_rate": 2.98558587804993e-05, - "loss": 90.1948, - "step": 61750 - }, - { - "epoch": 0.49903441365880463, - "grad_norm": 1270.658935546875, - "learning_rate": 2.984901023234327e-05, - "loss": 103.2648, - "step": 61760 - }, - { - "epoch": 0.49911521586308877, - "grad_norm": 475.1168212890625, - "learning_rate": 2.9842161306051446e-05, - "loss": 127.5444, - "step": 61770 - }, - { - "epoch": 0.4991960180673729, - "grad_norm": 1071.6934814453125, - "learning_rate": 2.9835312002157913e-05, - "loss": 125.5506, - "step": 61780 - }, - { - "epoch": 0.499276820271657, - "grad_norm": 923.67822265625, - "learning_rate": 2.9828462321196788e-05, - "loss": 104.3114, - "step": 61790 - }, - { - "epoch": 0.4993576224759411, - "grad_norm": 778.8935546875, - "learning_rate": 2.9821612263702226e-05, - "loss": 101.3178, - "step": 61800 - }, - { - "epoch": 0.49943842468022526, - "grad_norm": 785.8084106445312, - "learning_rate": 2.981476183020842e-05, - "loss": 112.6572, - "step": 61810 - }, - { - "epoch": 0.4995192268845094, - "grad_norm": 1581.164794921875, - "learning_rate": 2.9807911021249573e-05, - "loss": 128.2291, - "step": 61820 - }, - { - "epoch": 0.49960002908879353, - "grad_norm": 1636.1961669921875, - "learning_rate": 2.9801059837359925e-05, - "loss": 92.3238, - "step": 61830 - }, - { - "epoch": 0.49968083129307767, - "grad_norm": 976.117919921875, - "learning_rate": 2.9794208279073743e-05, - "loss": 78.8411, - "step": 61840 - }, - { - "epoch": 0.4997616334973618, - "grad_norm": 764.1289672851562, - "learning_rate": 2.9787356346925327e-05, - "loss": 89.3236, - "step": 61850 - }, - { - "epoch": 0.49984243570164594, - "grad_norm": 793.6039428710938, - "learning_rate": 2.978050404144901e-05, - "loss": 118.7494, - "step": 61860 - }, - { - "epoch": 0.4999232379059301, - "grad_norm": 912.1771850585938, - "learning_rate": 2.9773651363179144e-05, - "loss": 134.7565, - "step": 61870 - }, - { - "epoch": 0.5000040401102142, - "grad_norm": 627.3869018554688, - "learning_rate": 2.9766798312650112e-05, - "loss": 71.3938, - "step": 61880 - }, - { - "epoch": 0.5000848423144983, - "grad_norm": 2017.302490234375, - "learning_rate": 2.975994489039634e-05, - "loss": 90.6672, - "step": 61890 - }, - { - "epoch": 0.5001656445187824, - "grad_norm": 886.43994140625, - "learning_rate": 2.9753091096952255e-05, - "loss": 99.8712, - "step": 61900 - }, - { - "epoch": 0.5002464467230666, - "grad_norm": 901.8732299804688, - "learning_rate": 2.9746236932852355e-05, - "loss": 90.4444, - "step": 61910 - }, - { - "epoch": 0.5003272489273507, - "grad_norm": 715.142578125, - "learning_rate": 2.973938239863111e-05, - "loss": 90.7391, - "step": 61920 - }, - { - "epoch": 0.5004080511316349, - "grad_norm": 1375.3907470703125, - "learning_rate": 2.9732527494823083e-05, - "loss": 110.7941, - "step": 61930 - }, - { - "epoch": 0.500488853335919, - "grad_norm": 694.0430908203125, - "learning_rate": 2.97256722219628e-05, - "loss": 87.9629, - "step": 61940 - }, - { - "epoch": 0.5005696555402032, - "grad_norm": 477.39617919921875, - "learning_rate": 2.9718816580584884e-05, - "loss": 93.9715, - "step": 61950 - }, - { - "epoch": 0.5006504577444872, - "grad_norm": 880.4938354492188, - "learning_rate": 2.971196057122393e-05, - "loss": 107.4139, - "step": 61960 - }, - { - "epoch": 0.5007312599487714, - "grad_norm": 1238.34423828125, - "learning_rate": 2.9705104194414586e-05, - "loss": 100.5208, - "step": 61970 - }, - { - "epoch": 0.5008120621530555, - "grad_norm": 825.6590576171875, - "learning_rate": 2.9698247450691525e-05, - "loss": 103.5567, - "step": 61980 - }, - { - "epoch": 0.5008928643573397, - "grad_norm": 1105.45947265625, - "learning_rate": 2.9691390340589466e-05, - "loss": 96.2537, - "step": 61990 - }, - { - "epoch": 0.5009736665616238, - "grad_norm": 999.39013671875, - "learning_rate": 2.9684532864643122e-05, - "loss": 78.8136, - "step": 62000 } ], "logging_steps": 10, - "max_steps": 125000, + "max_steps": 250000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 2000, @@ -43427,7 +8427,7 @@ } }, "total_flos": 0.0, - "train_batch_size": 8, + "train_batch_size": 4, "trial_name": null, "trial_params": null }