|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.04848151844115758, |
|
"eval_steps": 500, |
|
"global_step": 12000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 4.040126536763131e-05, |
|
"grad_norm": 13786306.0, |
|
"learning_rate": 2e-08, |
|
"loss": 1195071.7, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 8.080253073526263e-05, |
|
"grad_norm": 3502914.5, |
|
"learning_rate": 4e-08, |
|
"loss": 1488604.5, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.00012120379610289395, |
|
"grad_norm": 5249533.5, |
|
"learning_rate": 6e-08, |
|
"loss": 1363003.1, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.00016160506147052525, |
|
"grad_norm": 17823226.0, |
|
"learning_rate": 8e-08, |
|
"loss": 1539778.9, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.00020200632683815657, |
|
"grad_norm": 14010826.0, |
|
"learning_rate": 1.0000000000000001e-07, |
|
"loss": 1378274.9, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0002424075922057879, |
|
"grad_norm": 23003992.0, |
|
"learning_rate": 1.2e-07, |
|
"loss": 1250654.1, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0002828088575734192, |
|
"grad_norm": 7809646.0, |
|
"learning_rate": 1.4e-07, |
|
"loss": 1819393.6, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0003232101229410505, |
|
"grad_norm": 43233020.0, |
|
"learning_rate": 1.6e-07, |
|
"loss": 1770900.4, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0003636113883086818, |
|
"grad_norm": 6094604.5, |
|
"learning_rate": 1.8e-07, |
|
"loss": 1529363.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.00040401265367631315, |
|
"grad_norm": 8696879.0, |
|
"learning_rate": 2.0000000000000002e-07, |
|
"loss": 1028300.1, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.00044441391904394446, |
|
"grad_norm": 7774881.5, |
|
"learning_rate": 2.2e-07, |
|
"loss": 1786590.6, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0004848151844115758, |
|
"grad_norm": 2879459.0, |
|
"learning_rate": 2.4e-07, |
|
"loss": 1377895.4, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.000525216449779207, |
|
"grad_norm": 19783298.0, |
|
"learning_rate": 2.6e-07, |
|
"loss": 1599422.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0005656177151468384, |
|
"grad_norm": 2185791.0, |
|
"learning_rate": 2.8e-07, |
|
"loss": 1243788.4, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0006060189805144697, |
|
"grad_norm": 9816560.0, |
|
"learning_rate": 3.0000000000000004e-07, |
|
"loss": 1245093.3, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.000646420245882101, |
|
"grad_norm": 9404809.0, |
|
"learning_rate": 3.2e-07, |
|
"loss": 1288713.5, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.0006868215112497323, |
|
"grad_norm": 8446407.0, |
|
"learning_rate": 3.4e-07, |
|
"loss": 1147532.8, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0007272227766173637, |
|
"grad_norm": 3988919.0, |
|
"learning_rate": 3.6e-07, |
|
"loss": 704286.9, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.000767624041984995, |
|
"grad_norm": 2574691.5, |
|
"learning_rate": 3.8e-07, |
|
"loss": 545635.6, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.0008080253073526263, |
|
"grad_norm": 21092594.0, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 728793.05, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0008484265727202576, |
|
"grad_norm": 2704460.5, |
|
"learning_rate": 4.2e-07, |
|
"loss": 467207.7, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.0008888278380878889, |
|
"grad_norm": 4876784.0, |
|
"learning_rate": 4.4e-07, |
|
"loss": 369614.55, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.0009292291034555202, |
|
"grad_norm": 10803276.0, |
|
"learning_rate": 4.6e-07, |
|
"loss": 428869.6, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.0009696303688231516, |
|
"grad_norm": 5475451.0, |
|
"learning_rate": 4.8e-07, |
|
"loss": 404623.45, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.0010100316341907828, |
|
"grad_norm": 3401077.25, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 606814.6, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.001050432899558414, |
|
"grad_norm": 1785725.0, |
|
"learning_rate": 5.2e-07, |
|
"loss": 230520.125, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0010908341649260454, |
|
"grad_norm": 26237066.0, |
|
"learning_rate": 5.4e-07, |
|
"loss": 251476.375, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.0011312354302936767, |
|
"grad_norm": 3433710.25, |
|
"learning_rate": 5.6e-07, |
|
"loss": 378523.225, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.001171636695661308, |
|
"grad_norm": 3058920.5, |
|
"learning_rate": 5.8e-07, |
|
"loss": 118036.575, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.0012120379610289394, |
|
"grad_norm": 2775939.25, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 128199.575, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0012524392263965707, |
|
"grad_norm": 2798658.75, |
|
"learning_rate": 6.2e-07, |
|
"loss": 145799.6125, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.001292840491764202, |
|
"grad_norm": 3050425.75, |
|
"learning_rate": 6.4e-07, |
|
"loss": 170385.65, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.0013332417571318333, |
|
"grad_norm": 483486.6875, |
|
"learning_rate": 6.6e-07, |
|
"loss": 79056.7, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.0013736430224994647, |
|
"grad_norm": 903465.9375, |
|
"learning_rate": 6.8e-07, |
|
"loss": 69717.8562, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.001414044287867096, |
|
"grad_norm": 166525.765625, |
|
"learning_rate": 7.000000000000001e-07, |
|
"loss": 29574.1, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.0014544455532347273, |
|
"grad_norm": 83730.7109375, |
|
"learning_rate": 7.2e-07, |
|
"loss": 9268.3289, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.0014948468186023586, |
|
"grad_norm": 13107.71484375, |
|
"learning_rate": 7.400000000000001e-07, |
|
"loss": 16814.9484, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.00153524808396999, |
|
"grad_norm": 199187.046875, |
|
"learning_rate": 7.6e-07, |
|
"loss": 4728.9383, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.0015756493493376213, |
|
"grad_norm": 209663.171875, |
|
"learning_rate": 7.8e-07, |
|
"loss": 3811.3047, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.0016160506147052526, |
|
"grad_norm": 261596.96875, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 1873.8627, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.001656451880072884, |
|
"grad_norm": 9995.125, |
|
"learning_rate": 8.200000000000001e-07, |
|
"loss": 1428.6535, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.0016968531454405152, |
|
"grad_norm": 2573.125732421875, |
|
"learning_rate": 8.4e-07, |
|
"loss": 533.159, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.0017372544108081465, |
|
"grad_norm": 547.4088134765625, |
|
"learning_rate": 8.6e-07, |
|
"loss": 702.8197, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.0017776556761757779, |
|
"grad_norm": 734.5255126953125, |
|
"learning_rate": 8.8e-07, |
|
"loss": 485.3759, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.0018180569415434092, |
|
"grad_norm": 5114.30615234375, |
|
"learning_rate": 9e-07, |
|
"loss": 545.9343, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.0018584582069110405, |
|
"grad_norm": 1266.8800048828125, |
|
"learning_rate": 9.2e-07, |
|
"loss": 482.805, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.0018988594722786718, |
|
"grad_norm": 5457.6689453125, |
|
"learning_rate": 9.400000000000001e-07, |
|
"loss": 550.8278, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.0019392607376463031, |
|
"grad_norm": 1092.0926513671875, |
|
"learning_rate": 9.6e-07, |
|
"loss": 572.7061, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.0019796620030139342, |
|
"grad_norm": 1708.60546875, |
|
"learning_rate": 9.8e-07, |
|
"loss": 447.9137, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.0020200632683815656, |
|
"grad_norm": 833.2102661132812, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 452.3768, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.002060464533749197, |
|
"grad_norm": 854.2012329101562, |
|
"learning_rate": 1.0200000000000002e-06, |
|
"loss": 453.9597, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.002100865799116828, |
|
"grad_norm": 883.29052734375, |
|
"learning_rate": 1.04e-06, |
|
"loss": 399.3994, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.0021412670644844595, |
|
"grad_norm": 876.2081909179688, |
|
"learning_rate": 1.06e-06, |
|
"loss": 508.956, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.002181668329852091, |
|
"grad_norm": 1216.003662109375, |
|
"learning_rate": 1.08e-06, |
|
"loss": 279.6047, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.002222069595219722, |
|
"grad_norm": 20951.376953125, |
|
"learning_rate": 1.1e-06, |
|
"loss": 523.4955, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.0022624708605873535, |
|
"grad_norm": 6249.7998046875, |
|
"learning_rate": 1.12e-06, |
|
"loss": 373.9713, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.002302872125954985, |
|
"grad_norm": 8211.34375, |
|
"learning_rate": 1.14e-06, |
|
"loss": 322.8237, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.002343273391322616, |
|
"grad_norm": 1029.943359375, |
|
"learning_rate": 1.16e-06, |
|
"loss": 443.8321, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.0023836746566902474, |
|
"grad_norm": 5415.279296875, |
|
"learning_rate": 1.18e-06, |
|
"loss": 580.9646, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.0024240759220578788, |
|
"grad_norm": 683.0673217773438, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 322.4693, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.00246447718742551, |
|
"grad_norm": 3990.54931640625, |
|
"learning_rate": 1.2200000000000002e-06, |
|
"loss": 543.5786, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.0025048784527931414, |
|
"grad_norm": 771.86767578125, |
|
"learning_rate": 1.24e-06, |
|
"loss": 464.2772, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.0025452797181607727, |
|
"grad_norm": 830.4238891601562, |
|
"learning_rate": 1.26e-06, |
|
"loss": 502.5308, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.002585680983528404, |
|
"grad_norm": 4528.138671875, |
|
"learning_rate": 1.28e-06, |
|
"loss": 368.3498, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.0026260822488960354, |
|
"grad_norm": 701.6304321289062, |
|
"learning_rate": 1.3e-06, |
|
"loss": 433.9283, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.0026664835142636667, |
|
"grad_norm": 593.0372924804688, |
|
"learning_rate": 1.32e-06, |
|
"loss": 327.8368, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.002706884779631298, |
|
"grad_norm": 718.9718627929688, |
|
"learning_rate": 1.34e-06, |
|
"loss": 363.8515, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.0027472860449989293, |
|
"grad_norm": 1276.8353271484375, |
|
"learning_rate": 1.36e-06, |
|
"loss": 457.8593, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.0027876873103665606, |
|
"grad_norm": 9622.8271484375, |
|
"learning_rate": 1.3800000000000001e-06, |
|
"loss": 448.6425, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.002828088575734192, |
|
"grad_norm": 795.946044921875, |
|
"learning_rate": 1.4000000000000001e-06, |
|
"loss": 370.567, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.0028684898411018233, |
|
"grad_norm": 2605.2119140625, |
|
"learning_rate": 1.4200000000000002e-06, |
|
"loss": 399.8033, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.0029088911064694546, |
|
"grad_norm": 3644.47607421875, |
|
"learning_rate": 1.44e-06, |
|
"loss": 519.6482, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.002949292371837086, |
|
"grad_norm": 859.5913696289062, |
|
"learning_rate": 1.46e-06, |
|
"loss": 366.024, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.0029896936372047172, |
|
"grad_norm": 9709.5673828125, |
|
"learning_rate": 1.4800000000000002e-06, |
|
"loss": 387.851, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.0030300949025723486, |
|
"grad_norm": 1355.1983642578125, |
|
"learning_rate": 1.5e-06, |
|
"loss": 287.8213, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.00307049616793998, |
|
"grad_norm": 1910.957275390625, |
|
"learning_rate": 1.52e-06, |
|
"loss": 263.4618, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.003110897433307611, |
|
"grad_norm": 1562.11669921875, |
|
"learning_rate": 1.54e-06, |
|
"loss": 514.2329, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.0031512986986752425, |
|
"grad_norm": 765.830078125, |
|
"learning_rate": 1.56e-06, |
|
"loss": 332.8841, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.003191699964042874, |
|
"grad_norm": 13385.1689453125, |
|
"learning_rate": 1.5800000000000003e-06, |
|
"loss": 456.157, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.003232101229410505, |
|
"grad_norm": 969.9119262695312, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 300.1112, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.0032725024947781365, |
|
"grad_norm": 1840.109619140625, |
|
"learning_rate": 1.62e-06, |
|
"loss": 384.0655, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.003312903760145768, |
|
"grad_norm": 1270.9483642578125, |
|
"learning_rate": 1.6400000000000002e-06, |
|
"loss": 355.3219, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.003353305025513399, |
|
"grad_norm": 668.4849243164062, |
|
"learning_rate": 1.6600000000000002e-06, |
|
"loss": 247.1219, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.0033937062908810304, |
|
"grad_norm": 1676.130615234375, |
|
"learning_rate": 1.68e-06, |
|
"loss": 312.9415, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.0034341075562486618, |
|
"grad_norm": 480.2029724121094, |
|
"learning_rate": 1.7000000000000002e-06, |
|
"loss": 388.1627, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.003474508821616293, |
|
"grad_norm": 579.8500366210938, |
|
"learning_rate": 1.72e-06, |
|
"loss": 423.7644, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.0035149100869839244, |
|
"grad_norm": 780.9789428710938, |
|
"learning_rate": 1.7399999999999999e-06, |
|
"loss": 335.4401, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.0035553113523515557, |
|
"grad_norm": 500.0970153808594, |
|
"learning_rate": 1.76e-06, |
|
"loss": 375.4284, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.003595712617719187, |
|
"grad_norm": 663.9608154296875, |
|
"learning_rate": 1.7800000000000001e-06, |
|
"loss": 432.7744, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.0036361138830868184, |
|
"grad_norm": 738.2296142578125, |
|
"learning_rate": 1.8e-06, |
|
"loss": 330.4268, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.0036765151484544497, |
|
"grad_norm": 888.3779907226562, |
|
"learning_rate": 1.8200000000000002e-06, |
|
"loss": 328.1051, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.003716916413822081, |
|
"grad_norm": 681.369873046875, |
|
"learning_rate": 1.84e-06, |
|
"loss": 455.4454, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.0037573176791897123, |
|
"grad_norm": 719.9152221679688, |
|
"learning_rate": 1.86e-06, |
|
"loss": 291.1318, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.0037977189445573436, |
|
"grad_norm": 765.2400512695312, |
|
"learning_rate": 1.8800000000000002e-06, |
|
"loss": 453.5919, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.003838120209924975, |
|
"grad_norm": 586.1494140625, |
|
"learning_rate": 1.9e-06, |
|
"loss": 338.6631, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.0038785214752926063, |
|
"grad_norm": 559.566650390625, |
|
"learning_rate": 1.92e-06, |
|
"loss": 309.9115, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.003918922740660237, |
|
"grad_norm": 2274.710205078125, |
|
"learning_rate": 1.94e-06, |
|
"loss": 366.6058, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.0039593240060278685, |
|
"grad_norm": 2413.857666015625, |
|
"learning_rate": 1.96e-06, |
|
"loss": 323.5407, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.0039997252713955, |
|
"grad_norm": 466.5931396484375, |
|
"learning_rate": 1.98e-06, |
|
"loss": 332.9158, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.004040126536763131, |
|
"grad_norm": 1368.92626953125, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 309.8214, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0040805278021307624, |
|
"grad_norm": 578.7489624023438, |
|
"learning_rate": 2.02e-06, |
|
"loss": 293.1464, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.004120929067498394, |
|
"grad_norm": 932.1323852539062, |
|
"learning_rate": 2.0400000000000004e-06, |
|
"loss": 382.6469, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.004161330332866025, |
|
"grad_norm": 861.5737915039062, |
|
"learning_rate": 2.06e-06, |
|
"loss": 394.3037, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.004201731598233656, |
|
"grad_norm": 540.3167114257812, |
|
"learning_rate": 2.08e-06, |
|
"loss": 356.1217, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.004242132863601288, |
|
"grad_norm": 645.2182006835938, |
|
"learning_rate": 2.1000000000000002e-06, |
|
"loss": 375.9719, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.004282534128968919, |
|
"grad_norm": 3072.72607421875, |
|
"learning_rate": 2.12e-06, |
|
"loss": 385.9836, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.00432293539433655, |
|
"grad_norm": 1441.8131103515625, |
|
"learning_rate": 2.14e-06, |
|
"loss": 447.29, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.004363336659704182, |
|
"grad_norm": 531.906494140625, |
|
"learning_rate": 2.16e-06, |
|
"loss": 307.2454, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.004403737925071813, |
|
"grad_norm": 4712.17236328125, |
|
"learning_rate": 2.1800000000000003e-06, |
|
"loss": 599.3445, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.004444139190439444, |
|
"grad_norm": 543.140380859375, |
|
"learning_rate": 2.2e-06, |
|
"loss": 236.8443, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.004484540455807076, |
|
"grad_norm": 724.6696166992188, |
|
"learning_rate": 2.2200000000000003e-06, |
|
"loss": 402.6835, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.004524941721174707, |
|
"grad_norm": 890.041015625, |
|
"learning_rate": 2.24e-06, |
|
"loss": 335.9333, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.004565342986542338, |
|
"grad_norm": 517.4255981445312, |
|
"learning_rate": 2.26e-06, |
|
"loss": 377.6742, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.00460574425190997, |
|
"grad_norm": 1773.51025390625, |
|
"learning_rate": 2.28e-06, |
|
"loss": 387.5232, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.004646145517277601, |
|
"grad_norm": 1350.251220703125, |
|
"learning_rate": 2.3e-06, |
|
"loss": 430.9959, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.004686546782645232, |
|
"grad_norm": 1267.9136962890625, |
|
"learning_rate": 2.32e-06, |
|
"loss": 417.8642, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.0047269480480128636, |
|
"grad_norm": 1760.7581787109375, |
|
"learning_rate": 2.34e-06, |
|
"loss": 335.7049, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.004767349313380495, |
|
"grad_norm": 471.7587890625, |
|
"learning_rate": 2.36e-06, |
|
"loss": 321.8847, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.004807750578748126, |
|
"grad_norm": 1277.0931396484375, |
|
"learning_rate": 2.38e-06, |
|
"loss": 339.8787, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.0048481518441157575, |
|
"grad_norm": 766.57568359375, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 343.6708, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.004888553109483389, |
|
"grad_norm": 733.90234375, |
|
"learning_rate": 2.42e-06, |
|
"loss": 284.1223, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.00492895437485102, |
|
"grad_norm": 705.3512573242188, |
|
"learning_rate": 2.4400000000000004e-06, |
|
"loss": 290.2316, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.0049693556402186515, |
|
"grad_norm": 1195.5079345703125, |
|
"learning_rate": 2.46e-06, |
|
"loss": 351.2376, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.005009756905586283, |
|
"grad_norm": 1140.5634765625, |
|
"learning_rate": 2.48e-06, |
|
"loss": 364.3263, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.005050158170953914, |
|
"grad_norm": 1419.2325439453125, |
|
"learning_rate": 2.5e-06, |
|
"loss": 420.2925, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.0050905594363215454, |
|
"grad_norm": 945.1781616210938, |
|
"learning_rate": 2.52e-06, |
|
"loss": 377.5975, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.005130960701689177, |
|
"grad_norm": 1156.38818359375, |
|
"learning_rate": 2.54e-06, |
|
"loss": 438.8611, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.005171361967056808, |
|
"grad_norm": 803.0604858398438, |
|
"learning_rate": 2.56e-06, |
|
"loss": 275.1979, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.005211763232424439, |
|
"grad_norm": 4027.070556640625, |
|
"learning_rate": 2.5800000000000003e-06, |
|
"loss": 417.5213, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.005252164497792071, |
|
"grad_norm": 762.65771484375, |
|
"learning_rate": 2.6e-06, |
|
"loss": 338.176, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.005292565763159702, |
|
"grad_norm": 514.9345092773438, |
|
"learning_rate": 2.6200000000000003e-06, |
|
"loss": 360.1529, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.005332967028527333, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.64e-06, |
|
"loss": 437.6661, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.005373368293894965, |
|
"grad_norm": 964.8443603515625, |
|
"learning_rate": 2.66e-06, |
|
"loss": 348.5103, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.005413769559262596, |
|
"grad_norm": 1033.2138671875, |
|
"learning_rate": 2.68e-06, |
|
"loss": 351.3489, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.005454170824630227, |
|
"grad_norm": 621.0077514648438, |
|
"learning_rate": 2.7e-06, |
|
"loss": 297.7263, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.005494572089997859, |
|
"grad_norm": 670.5285034179688, |
|
"learning_rate": 2.72e-06, |
|
"loss": 266.751, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.00553497335536549, |
|
"grad_norm": 1063.011962890625, |
|
"learning_rate": 2.74e-06, |
|
"loss": 388.5708, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.005575374620733121, |
|
"grad_norm": 1352.453369140625, |
|
"learning_rate": 2.7600000000000003e-06, |
|
"loss": 413.9111, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.005615775886100753, |
|
"grad_norm": 1039.40966796875, |
|
"learning_rate": 2.78e-06, |
|
"loss": 425.6994, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.005656177151468384, |
|
"grad_norm": 852.0213012695312, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"loss": 379.7008, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.005696578416836015, |
|
"grad_norm": 560.5407104492188, |
|
"learning_rate": 2.82e-06, |
|
"loss": 373.5774, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.0057369796822036466, |
|
"grad_norm": 2221.54248046875, |
|
"learning_rate": 2.8400000000000003e-06, |
|
"loss": 377.4932, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.005777380947571278, |
|
"grad_norm": 986.341064453125, |
|
"learning_rate": 2.86e-06, |
|
"loss": 381.8052, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.005817782212938909, |
|
"grad_norm": 2288.12060546875, |
|
"learning_rate": 2.88e-06, |
|
"loss": 396.2358, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.0058581834783065405, |
|
"grad_norm": 506.0441589355469, |
|
"learning_rate": 2.9e-06, |
|
"loss": 306.8475, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.005898584743674172, |
|
"grad_norm": 1451.433837890625, |
|
"learning_rate": 2.92e-06, |
|
"loss": 421.7085, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.005938986009041803, |
|
"grad_norm": 410.0963439941406, |
|
"learning_rate": 2.9400000000000002e-06, |
|
"loss": 245.0907, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.0059793872744094345, |
|
"grad_norm": 644.2957153320312, |
|
"learning_rate": 2.9600000000000005e-06, |
|
"loss": 256.7748, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.006019788539777066, |
|
"grad_norm": 587.813232421875, |
|
"learning_rate": 2.9800000000000003e-06, |
|
"loss": 307.0451, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.006060189805144697, |
|
"grad_norm": 661.851806640625, |
|
"learning_rate": 3e-06, |
|
"loss": 240.7283, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.0061005910705123284, |
|
"grad_norm": 9922.1162109375, |
|
"learning_rate": 3.0200000000000003e-06, |
|
"loss": 314.2891, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.00614099233587996, |
|
"grad_norm": 681.8471069335938, |
|
"learning_rate": 3.04e-06, |
|
"loss": 304.0082, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.006181393601247591, |
|
"grad_norm": 1679.9188232421875, |
|
"learning_rate": 3.06e-06, |
|
"loss": 316.8349, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.006221794866615222, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.08e-06, |
|
"loss": 273.9679, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.006262196131982854, |
|
"grad_norm": 934.2025756835938, |
|
"learning_rate": 3.1e-06, |
|
"loss": 281.7329, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.006302597397350485, |
|
"grad_norm": 1820.7791748046875, |
|
"learning_rate": 3.12e-06, |
|
"loss": 292.2456, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.006342998662718116, |
|
"grad_norm": 1122.6767578125, |
|
"learning_rate": 3.14e-06, |
|
"loss": 327.0943, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.006383399928085748, |
|
"grad_norm": 3407.740234375, |
|
"learning_rate": 3.1600000000000007e-06, |
|
"loss": 282.118, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.006423801193453379, |
|
"grad_norm": 556.7740478515625, |
|
"learning_rate": 3.1800000000000005e-06, |
|
"loss": 332.0084, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.00646420245882101, |
|
"grad_norm": 1229.8035888671875, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 344.9013, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.006504603724188642, |
|
"grad_norm": 949.2359008789062, |
|
"learning_rate": 3.22e-06, |
|
"loss": 371.0438, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.006545004989556273, |
|
"grad_norm": 1869.7620849609375, |
|
"learning_rate": 3.24e-06, |
|
"loss": 388.4585, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.006585406254923904, |
|
"grad_norm": 527.3277587890625, |
|
"learning_rate": 3.2599999999999997e-06, |
|
"loss": 372.7793, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.006625807520291536, |
|
"grad_norm": 1427.3751220703125, |
|
"learning_rate": 3.2800000000000004e-06, |
|
"loss": 386.9533, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.006666208785659167, |
|
"grad_norm": 1839.386474609375, |
|
"learning_rate": 3.3e-06, |
|
"loss": 414.8817, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.006706610051026798, |
|
"grad_norm": 767.32373046875, |
|
"learning_rate": 3.3200000000000004e-06, |
|
"loss": 262.8998, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.0067470113163944296, |
|
"grad_norm": 487.2662353515625, |
|
"learning_rate": 3.34e-06, |
|
"loss": 349.8497, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.006787412581762061, |
|
"grad_norm": 1609.6903076171875, |
|
"learning_rate": 3.36e-06, |
|
"loss": 328.737, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.006827813847129692, |
|
"grad_norm": 824.4249267578125, |
|
"learning_rate": 3.38e-06, |
|
"loss": 285.9903, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.0068682151124973235, |
|
"grad_norm": 3939.00244140625, |
|
"learning_rate": 3.4000000000000005e-06, |
|
"loss": 360.8893, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.006908616377864955, |
|
"grad_norm": 611.8134155273438, |
|
"learning_rate": 3.4200000000000003e-06, |
|
"loss": 316.7729, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.006949017643232586, |
|
"grad_norm": 3673.492919921875, |
|
"learning_rate": 3.44e-06, |
|
"loss": 446.8052, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.0069894189086002175, |
|
"grad_norm": 580.3406982421875, |
|
"learning_rate": 3.46e-06, |
|
"loss": 306.8866, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.007029820173967849, |
|
"grad_norm": 797.6139526367188, |
|
"learning_rate": 3.4799999999999997e-06, |
|
"loss": 360.2713, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.00707022143933548, |
|
"grad_norm": 1184.318115234375, |
|
"learning_rate": 3.5000000000000004e-06, |
|
"loss": 360.695, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.0071106227047031114, |
|
"grad_norm": 659.1159057617188, |
|
"learning_rate": 3.52e-06, |
|
"loss": 393.7832, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.007151023970070743, |
|
"grad_norm": 611.4116821289062, |
|
"learning_rate": 3.5400000000000004e-06, |
|
"loss": 302.0126, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.007191425235438374, |
|
"grad_norm": 2148.29541015625, |
|
"learning_rate": 3.5600000000000002e-06, |
|
"loss": 411.9108, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.007231826500806005, |
|
"grad_norm": 635.7216796875, |
|
"learning_rate": 3.58e-06, |
|
"loss": 236.3176, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.007272227766173637, |
|
"grad_norm": 367.6564025878906, |
|
"learning_rate": 3.6e-06, |
|
"loss": 293.0558, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.007312629031541268, |
|
"grad_norm": 712.876220703125, |
|
"learning_rate": 3.6200000000000005e-06, |
|
"loss": 349.5894, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.007353030296908899, |
|
"grad_norm": 505.4992980957031, |
|
"learning_rate": 3.6400000000000003e-06, |
|
"loss": 300.7681, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.007393431562276531, |
|
"grad_norm": 852.1588745117188, |
|
"learning_rate": 3.66e-06, |
|
"loss": 408.5499, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.007433832827644162, |
|
"grad_norm": 1410.185791015625, |
|
"learning_rate": 3.68e-06, |
|
"loss": 253.6884, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.007474234093011793, |
|
"grad_norm": 889.8245849609375, |
|
"learning_rate": 3.7e-06, |
|
"loss": 408.2236, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.007514635358379425, |
|
"grad_norm": 906.4953002929688, |
|
"learning_rate": 3.72e-06, |
|
"loss": 397.8984, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.007555036623747056, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.7400000000000006e-06, |
|
"loss": 238.5037, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.007595437889114687, |
|
"grad_norm": 13512.4267578125, |
|
"learning_rate": 3.7600000000000004e-06, |
|
"loss": 348.5812, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.007635839154482319, |
|
"grad_norm": 4588.23193359375, |
|
"learning_rate": 3.7800000000000002e-06, |
|
"loss": 364.4923, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.00767624041984995, |
|
"grad_norm": 803.103759765625, |
|
"learning_rate": 3.8e-06, |
|
"loss": 372.9157, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.007716641685217581, |
|
"grad_norm": 1621.907958984375, |
|
"learning_rate": 3.82e-06, |
|
"loss": 360.8791, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.0077570429505852126, |
|
"grad_norm": 592.7820434570312, |
|
"learning_rate": 3.84e-06, |
|
"loss": 375.443, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.007797444215952844, |
|
"grad_norm": 2514.99462890625, |
|
"learning_rate": 3.86e-06, |
|
"loss": 332.9206, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.007837845481320474, |
|
"grad_norm": 1829.7589111328125, |
|
"learning_rate": 3.88e-06, |
|
"loss": 442.6601, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.007878246746688106, |
|
"grad_norm": 897.420166015625, |
|
"learning_rate": 3.9e-06, |
|
"loss": 288.4365, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.007918648012055737, |
|
"grad_norm": 1314.1402587890625, |
|
"learning_rate": 3.92e-06, |
|
"loss": 334.6603, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.007959049277423368, |
|
"grad_norm": 2495.09130859375, |
|
"learning_rate": 3.9399999999999995e-06, |
|
"loss": 434.7513, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.007999450542791, |
|
"grad_norm": 402.0351867675781, |
|
"learning_rate": 3.96e-06, |
|
"loss": 307.8936, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.008039851808158631, |
|
"grad_norm": 751.7537841796875, |
|
"learning_rate": 3.98e-06, |
|
"loss": 308.6638, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.008080253073526262, |
|
"grad_norm": 604.2171630859375, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 392.2409, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.008120654338893894, |
|
"grad_norm": 1126.428955078125, |
|
"learning_rate": 4.0200000000000005e-06, |
|
"loss": 318.5683, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.008161055604261525, |
|
"grad_norm": 4945.88037109375, |
|
"learning_rate": 4.04e-06, |
|
"loss": 311.324, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.008201456869629156, |
|
"grad_norm": 745.3814697265625, |
|
"learning_rate": 4.06e-06, |
|
"loss": 381.683, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.008241858134996788, |
|
"grad_norm": 556.3217163085938, |
|
"learning_rate": 4.080000000000001e-06, |
|
"loss": 335.636, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.008282259400364419, |
|
"grad_norm": 1249.5528564453125, |
|
"learning_rate": 4.1000000000000006e-06, |
|
"loss": 216.6521, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.00832266066573205, |
|
"grad_norm": 4082.633544921875, |
|
"learning_rate": 4.12e-06, |
|
"loss": 422.9517, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.008363061931099681, |
|
"grad_norm": 1134.3583984375, |
|
"learning_rate": 4.14e-06, |
|
"loss": 368.1808, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.008403463196467313, |
|
"grad_norm": 473.27655029296875, |
|
"learning_rate": 4.16e-06, |
|
"loss": 323.5618, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.008443864461834944, |
|
"grad_norm": 1147.6612548828125, |
|
"learning_rate": 4.18e-06, |
|
"loss": 358.5086, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.008484265727202575, |
|
"grad_norm": 716.1121826171875, |
|
"learning_rate": 4.2000000000000004e-06, |
|
"loss": 289.7142, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.008524666992570207, |
|
"grad_norm": 595.6019287109375, |
|
"learning_rate": 4.22e-06, |
|
"loss": 249.1531, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.008565068257937838, |
|
"grad_norm": 1831.8653564453125, |
|
"learning_rate": 4.24e-06, |
|
"loss": 336.2978, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.00860546952330547, |
|
"grad_norm": 549.8616943359375, |
|
"learning_rate": 4.26e-06, |
|
"loss": 310.2739, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.0086458707886731, |
|
"grad_norm": 888.9124755859375, |
|
"learning_rate": 4.28e-06, |
|
"loss": 372.0684, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.008686272054040732, |
|
"grad_norm": 654.9121704101562, |
|
"learning_rate": 4.2999999999999995e-06, |
|
"loss": 218.5617, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.008726673319408363, |
|
"grad_norm": 1140.73486328125, |
|
"learning_rate": 4.32e-06, |
|
"loss": 396.7231, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.008767074584775995, |
|
"grad_norm": 818.670166015625, |
|
"learning_rate": 4.34e-06, |
|
"loss": 335.7865, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.008807475850143626, |
|
"grad_norm": 792.7094116210938, |
|
"learning_rate": 4.360000000000001e-06, |
|
"loss": 321.234, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.008847877115511257, |
|
"grad_norm": 2842.64697265625, |
|
"learning_rate": 4.38e-06, |
|
"loss": 247.4936, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.008888278380878889, |
|
"grad_norm": 756.8375244140625, |
|
"learning_rate": 4.4e-06, |
|
"loss": 358.118, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.00892867964624652, |
|
"grad_norm": 447.1920166015625, |
|
"learning_rate": 4.420000000000001e-06, |
|
"loss": 243.4523, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.008969080911614151, |
|
"grad_norm": 598.7188110351562, |
|
"learning_rate": 4.440000000000001e-06, |
|
"loss": 317.7062, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.009009482176981783, |
|
"grad_norm": 782.6261596679688, |
|
"learning_rate": 4.4600000000000005e-06, |
|
"loss": 423.8147, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.009049883442349414, |
|
"grad_norm": 650.9381103515625, |
|
"learning_rate": 4.48e-06, |
|
"loss": 372.274, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.009090284707717045, |
|
"grad_norm": 911.272216796875, |
|
"learning_rate": 4.5e-06, |
|
"loss": 309.0599, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.009130685973084677, |
|
"grad_norm": 1821.9324951171875, |
|
"learning_rate": 4.52e-06, |
|
"loss": 291.9244, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.009171087238452308, |
|
"grad_norm": 1428.2496337890625, |
|
"learning_rate": 4.540000000000001e-06, |
|
"loss": 299.4853, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.00921148850381994, |
|
"grad_norm": 474.319580078125, |
|
"learning_rate": 4.56e-06, |
|
"loss": 284.1177, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.00925188976918757, |
|
"grad_norm": 521.900146484375, |
|
"learning_rate": 4.58e-06, |
|
"loss": 305.8711, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.009292291034555202, |
|
"grad_norm": 742.1409301757812, |
|
"learning_rate": 4.6e-06, |
|
"loss": 233.3259, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.009332692299922833, |
|
"grad_norm": 702.3360595703125, |
|
"learning_rate": 4.62e-06, |
|
"loss": 351.8805, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.009373093565290464, |
|
"grad_norm": 969.7785034179688, |
|
"learning_rate": 4.64e-06, |
|
"loss": 273.1551, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.009413494830658096, |
|
"grad_norm": 7805.5947265625, |
|
"learning_rate": 4.66e-06, |
|
"loss": 303.4039, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.009453896096025727, |
|
"grad_norm": 683.2047119140625, |
|
"learning_rate": 4.68e-06, |
|
"loss": 299.0721, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.009494297361393358, |
|
"grad_norm": 433.588134765625, |
|
"learning_rate": 4.7e-06, |
|
"loss": 275.1868, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.00953469862676099, |
|
"grad_norm": 970.4158325195312, |
|
"learning_rate": 4.72e-06, |
|
"loss": 508.6281, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.009575099892128621, |
|
"grad_norm": 1364.8466796875, |
|
"learning_rate": 4.74e-06, |
|
"loss": 333.4644, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.009615501157496252, |
|
"grad_norm": 1310.9354248046875, |
|
"learning_rate": 4.76e-06, |
|
"loss": 271.3296, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.009655902422863884, |
|
"grad_norm": 551.7647094726562, |
|
"learning_rate": 4.780000000000001e-06, |
|
"loss": 339.265, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.009696303688231515, |
|
"grad_norm": 542.0902709960938, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 375.706, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.009736704953599146, |
|
"grad_norm": 517.1104736328125, |
|
"learning_rate": 4.8200000000000004e-06, |
|
"loss": 183.125, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.009777106218966778, |
|
"grad_norm": 1165.0673828125, |
|
"learning_rate": 4.84e-06, |
|
"loss": 303.7644, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.009817507484334409, |
|
"grad_norm": 4149.34130859375, |
|
"learning_rate": 4.86e-06, |
|
"loss": 361.1945, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.00985790874970204, |
|
"grad_norm": 957.4842529296875, |
|
"learning_rate": 4.880000000000001e-06, |
|
"loss": 238.9853, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.009898310015069672, |
|
"grad_norm": 493.18341064453125, |
|
"learning_rate": 4.9000000000000005e-06, |
|
"loss": 253.369, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.009938711280437303, |
|
"grad_norm": 913.3916015625, |
|
"learning_rate": 4.92e-06, |
|
"loss": 197.3718, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.009979112545804934, |
|
"grad_norm": 641.3722534179688, |
|
"learning_rate": 4.94e-06, |
|
"loss": 359.7244, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.010019513811172566, |
|
"grad_norm": 805.710693359375, |
|
"learning_rate": 4.96e-06, |
|
"loss": 314.5141, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.010059915076540197, |
|
"grad_norm": 979.5321655273438, |
|
"learning_rate": 4.98e-06, |
|
"loss": 402.7761, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.010100316341907828, |
|
"grad_norm": 434.0482177734375, |
|
"learning_rate": 5e-06, |
|
"loss": 190.8965, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.01014071760727546, |
|
"grad_norm": 907.0119018554688, |
|
"learning_rate": 5.02e-06, |
|
"loss": 269.7839, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.010181118872643091, |
|
"grad_norm": 363.0292053222656, |
|
"learning_rate": 5.04e-06, |
|
"loss": 258.706, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.010221520138010722, |
|
"grad_norm": 794.2672729492188, |
|
"learning_rate": 5.06e-06, |
|
"loss": 220.4875, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.010261921403378354, |
|
"grad_norm": 956.221923828125, |
|
"learning_rate": 5.08e-06, |
|
"loss": 283.6568, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.010302322668745985, |
|
"grad_norm": 779.488037109375, |
|
"learning_rate": 5.1e-06, |
|
"loss": 356.618, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.010342723934113616, |
|
"grad_norm": 505.3084716796875, |
|
"learning_rate": 5.12e-06, |
|
"loss": 195.6782, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.010383125199481247, |
|
"grad_norm": 1260.047119140625, |
|
"learning_rate": 5.140000000000001e-06, |
|
"loss": 321.1118, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.010423526464848879, |
|
"grad_norm": 776.1239624023438, |
|
"learning_rate": 5.1600000000000006e-06, |
|
"loss": 303.4421, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.01046392773021651, |
|
"grad_norm": 899.60009765625, |
|
"learning_rate": 5.18e-06, |
|
"loss": 267.6064, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.010504328995584141, |
|
"grad_norm": 1198.875732421875, |
|
"learning_rate": 5.2e-06, |
|
"loss": 311.9983, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.010544730260951773, |
|
"grad_norm": 2578.52734375, |
|
"learning_rate": 5.220000000000001e-06, |
|
"loss": 285.3186, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.010585131526319404, |
|
"grad_norm": 613.3502807617188, |
|
"learning_rate": 5.240000000000001e-06, |
|
"loss": 407.8097, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.010625532791687035, |
|
"grad_norm": 724.2944946289062, |
|
"learning_rate": 5.2600000000000005e-06, |
|
"loss": 375.3231, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.010665934057054667, |
|
"grad_norm": 625.8546752929688, |
|
"learning_rate": 5.28e-06, |
|
"loss": 337.0123, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.010706335322422298, |
|
"grad_norm": 2968.8515625, |
|
"learning_rate": 5.3e-06, |
|
"loss": 279.1451, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.01074673658778993, |
|
"grad_norm": 507.51885986328125, |
|
"learning_rate": 5.32e-06, |
|
"loss": 214.3604, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.01078713785315756, |
|
"grad_norm": 738.8612060546875, |
|
"learning_rate": 5.3400000000000005e-06, |
|
"loss": 332.2985, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.010827539118525192, |
|
"grad_norm": 684.1400146484375, |
|
"learning_rate": 5.36e-06, |
|
"loss": 330.1413, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.010867940383892823, |
|
"grad_norm": 6400.771484375, |
|
"learning_rate": 5.38e-06, |
|
"loss": 344.3884, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.010908341649260455, |
|
"grad_norm": 986.3028564453125, |
|
"learning_rate": 5.4e-06, |
|
"loss": 354.2751, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.010948742914628086, |
|
"grad_norm": 933.0596313476562, |
|
"learning_rate": 5.42e-06, |
|
"loss": 367.7645, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.010989144179995717, |
|
"grad_norm": 956.349365234375, |
|
"learning_rate": 5.44e-06, |
|
"loss": 293.7472, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.011029545445363349, |
|
"grad_norm": 473.7074890136719, |
|
"learning_rate": 5.46e-06, |
|
"loss": 281.8573, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.01106994671073098, |
|
"grad_norm": 1302.126953125, |
|
"learning_rate": 5.48e-06, |
|
"loss": 275.9536, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.011110347976098611, |
|
"grad_norm": 557.58203125, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 205.2503, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.011150749241466243, |
|
"grad_norm": 3368.84228515625, |
|
"learning_rate": 5.5200000000000005e-06, |
|
"loss": 328.5797, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.011191150506833874, |
|
"grad_norm": 1747.6932373046875, |
|
"learning_rate": 5.54e-06, |
|
"loss": 246.7141, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.011231551772201505, |
|
"grad_norm": 871.47802734375, |
|
"learning_rate": 5.56e-06, |
|
"loss": 261.7437, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.011271953037569137, |
|
"grad_norm": 1074.76416015625, |
|
"learning_rate": 5.580000000000001e-06, |
|
"loss": 292.0746, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.011312354302936768, |
|
"grad_norm": 575.856689453125, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 247.1602, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.0113527555683044, |
|
"grad_norm": 847.0436401367188, |
|
"learning_rate": 5.62e-06, |
|
"loss": 244.562, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.01139315683367203, |
|
"grad_norm": 479.6651306152344, |
|
"learning_rate": 5.64e-06, |
|
"loss": 196.8626, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.011433558099039662, |
|
"grad_norm": 1245.398681640625, |
|
"learning_rate": 5.66e-06, |
|
"loss": 338.9747, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.011473959364407293, |
|
"grad_norm": 502.24951171875, |
|
"learning_rate": 5.680000000000001e-06, |
|
"loss": 243.1993, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.011514360629774924, |
|
"grad_norm": 790.6267700195312, |
|
"learning_rate": 5.7000000000000005e-06, |
|
"loss": 289.827, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.011554761895142556, |
|
"grad_norm": 996.7022094726562, |
|
"learning_rate": 5.72e-06, |
|
"loss": 285.2487, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.011595163160510187, |
|
"grad_norm": 1381.54541015625, |
|
"learning_rate": 5.74e-06, |
|
"loss": 299.6274, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.011635564425877818, |
|
"grad_norm": 674.7173461914062, |
|
"learning_rate": 5.76e-06, |
|
"loss": 212.4647, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.01167596569124545, |
|
"grad_norm": 1115.6590576171875, |
|
"learning_rate": 5.78e-06, |
|
"loss": 350.1185, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.011716366956613081, |
|
"grad_norm": 967.9352416992188, |
|
"learning_rate": 5.8e-06, |
|
"loss": 303.8514, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.011756768221980712, |
|
"grad_norm": 826.4132690429688, |
|
"learning_rate": 5.82e-06, |
|
"loss": 390.5727, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.011797169487348344, |
|
"grad_norm": 554.1842651367188, |
|
"learning_rate": 5.84e-06, |
|
"loss": 228.6073, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.011837570752715975, |
|
"grad_norm": 555.0242919921875, |
|
"learning_rate": 5.86e-06, |
|
"loss": 237.2674, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.011877972018083606, |
|
"grad_norm": 554.800048828125, |
|
"learning_rate": 5.8800000000000005e-06, |
|
"loss": 307.6302, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.011918373283451238, |
|
"grad_norm": 7435.06005859375, |
|
"learning_rate": 5.9e-06, |
|
"loss": 363.9746, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.011958774548818869, |
|
"grad_norm": 3039.5869140625, |
|
"learning_rate": 5.920000000000001e-06, |
|
"loss": 294.7946, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.0119991758141865, |
|
"grad_norm": 9456.8857421875, |
|
"learning_rate": 5.940000000000001e-06, |
|
"loss": 348.9202, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.012039577079554132, |
|
"grad_norm": 833.540771484375, |
|
"learning_rate": 5.9600000000000005e-06, |
|
"loss": 302.1644, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.012079978344921763, |
|
"grad_norm": 1317.080810546875, |
|
"learning_rate": 5.98e-06, |
|
"loss": 346.3184, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.012120379610289394, |
|
"grad_norm": 546.1310424804688, |
|
"learning_rate": 6e-06, |
|
"loss": 185.3799, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.012160780875657026, |
|
"grad_norm": 1277.5128173828125, |
|
"learning_rate": 6.02e-06, |
|
"loss": 271.2112, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.012201182141024657, |
|
"grad_norm": 651.244384765625, |
|
"learning_rate": 6.040000000000001e-06, |
|
"loss": 206.7404, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.012241583406392288, |
|
"grad_norm": 886.7020874023438, |
|
"learning_rate": 6.0600000000000004e-06, |
|
"loss": 309.8574, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.01228198467175992, |
|
"grad_norm": 765.9307861328125, |
|
"learning_rate": 6.08e-06, |
|
"loss": 412.906, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.01232238593712755, |
|
"grad_norm": 1054.11669921875, |
|
"learning_rate": 6.1e-06, |
|
"loss": 356.0523, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.012362787202495182, |
|
"grad_norm": 786.2109375, |
|
"learning_rate": 6.12e-06, |
|
"loss": 260.9853, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.012403188467862813, |
|
"grad_norm": 1215.10400390625, |
|
"learning_rate": 6.1400000000000005e-06, |
|
"loss": 259.1632, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.012443589733230445, |
|
"grad_norm": 943.6842651367188, |
|
"learning_rate": 6.16e-06, |
|
"loss": 216.0937, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.012483990998598076, |
|
"grad_norm": 1460.5831298828125, |
|
"learning_rate": 6.18e-06, |
|
"loss": 337.1417, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.012524392263965707, |
|
"grad_norm": 3781.843017578125, |
|
"learning_rate": 6.2e-06, |
|
"loss": 374.3547, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.012564793529333339, |
|
"grad_norm": 676.937744140625, |
|
"learning_rate": 6.22e-06, |
|
"loss": 350.8003, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.01260519479470097, |
|
"grad_norm": 1300.2470703125, |
|
"learning_rate": 6.24e-06, |
|
"loss": 354.9542, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.012645596060068601, |
|
"grad_norm": 715.6893310546875, |
|
"learning_rate": 6.26e-06, |
|
"loss": 225.5772, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.012685997325436233, |
|
"grad_norm": 438.3706359863281, |
|
"learning_rate": 6.28e-06, |
|
"loss": 262.8397, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.012726398590803864, |
|
"grad_norm": 5010.02392578125, |
|
"learning_rate": 6.300000000000001e-06, |
|
"loss": 282.8991, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.012766799856171495, |
|
"grad_norm": 1642.5548095703125, |
|
"learning_rate": 6.320000000000001e-06, |
|
"loss": 273.512, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.012807201121539127, |
|
"grad_norm": 484.15069580078125, |
|
"learning_rate": 6.34e-06, |
|
"loss": 190.9169, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.012847602386906758, |
|
"grad_norm": 1157.349365234375, |
|
"learning_rate": 6.360000000000001e-06, |
|
"loss": 378.3686, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.01288800365227439, |
|
"grad_norm": 547.7994384765625, |
|
"learning_rate": 6.38e-06, |
|
"loss": 241.6315, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.01292840491764202, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 233.8554, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.012968806183009652, |
|
"grad_norm": 953.7396850585938, |
|
"learning_rate": 6.4199999999999995e-06, |
|
"loss": 316.3731, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.013009207448377283, |
|
"grad_norm": 539.7330932617188, |
|
"learning_rate": 6.44e-06, |
|
"loss": 234.0129, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.013049608713744915, |
|
"grad_norm": 736.7625732421875, |
|
"learning_rate": 6.460000000000001e-06, |
|
"loss": 335.0041, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.013090009979112546, |
|
"grad_norm": 1194.715576171875, |
|
"learning_rate": 6.48e-06, |
|
"loss": 276.9117, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.013130411244480177, |
|
"grad_norm": 8021.32763671875, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 374.7519, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.013170812509847809, |
|
"grad_norm": 931.0237426757812, |
|
"learning_rate": 6.519999999999999e-06, |
|
"loss": 278.108, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.01321121377521544, |
|
"grad_norm": 953.587890625, |
|
"learning_rate": 6.54e-06, |
|
"loss": 368.7168, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.013251615040583071, |
|
"grad_norm": 1098.186767578125, |
|
"learning_rate": 6.560000000000001e-06, |
|
"loss": 276.9152, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.013292016305950703, |
|
"grad_norm": 1590.1026611328125, |
|
"learning_rate": 6.58e-06, |
|
"loss": 422.8925, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.013332417571318334, |
|
"grad_norm": 857.4612426757812, |
|
"learning_rate": 6.6e-06, |
|
"loss": 343.3155, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.013372818836685965, |
|
"grad_norm": 941.4984130859375, |
|
"learning_rate": 6.62e-06, |
|
"loss": 187.542, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.013413220102053596, |
|
"grad_norm": 938.3494262695312, |
|
"learning_rate": 6.640000000000001e-06, |
|
"loss": 237.5327, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.013453621367421228, |
|
"grad_norm": 936.9948120117188, |
|
"learning_rate": 6.660000000000001e-06, |
|
"loss": 275.3572, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.013494022632788859, |
|
"grad_norm": 1025.8851318359375, |
|
"learning_rate": 6.68e-06, |
|
"loss": 346.2491, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.01353442389815649, |
|
"grad_norm": 2196.15478515625, |
|
"learning_rate": 6.700000000000001e-06, |
|
"loss": 242.7346, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.013574825163524122, |
|
"grad_norm": 843.5136108398438, |
|
"learning_rate": 6.72e-06, |
|
"loss": 188.1487, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.013615226428891753, |
|
"grad_norm": 714.3571166992188, |
|
"learning_rate": 6.740000000000001e-06, |
|
"loss": 358.2911, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.013655627694259384, |
|
"grad_norm": 689.7639770507812, |
|
"learning_rate": 6.76e-06, |
|
"loss": 150.4375, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.013696028959627016, |
|
"grad_norm": 1244.08837890625, |
|
"learning_rate": 6.78e-06, |
|
"loss": 300.2315, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.013736430224994647, |
|
"grad_norm": 1171.05419921875, |
|
"learning_rate": 6.800000000000001e-06, |
|
"loss": 241.5314, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.013776831490362278, |
|
"grad_norm": 1291.3603515625, |
|
"learning_rate": 6.82e-06, |
|
"loss": 449.0854, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.01381723275572991, |
|
"grad_norm": 1006.1640014648438, |
|
"learning_rate": 6.840000000000001e-06, |
|
"loss": 346.3529, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.013857634021097541, |
|
"grad_norm": 6614.48583984375, |
|
"learning_rate": 6.8599999999999995e-06, |
|
"loss": 289.8388, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.013898035286465172, |
|
"grad_norm": 574.2523803710938, |
|
"learning_rate": 6.88e-06, |
|
"loss": 187.4786, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.013938436551832804, |
|
"grad_norm": 577.289794921875, |
|
"learning_rate": 6.900000000000001e-06, |
|
"loss": 280.2674, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.013978837817200435, |
|
"grad_norm": 1416.127197265625, |
|
"learning_rate": 6.92e-06, |
|
"loss": 330.4, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.014019239082568066, |
|
"grad_norm": 973.31689453125, |
|
"learning_rate": 6.9400000000000005e-06, |
|
"loss": 251.0885, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.014059640347935698, |
|
"grad_norm": 2889.614013671875, |
|
"learning_rate": 6.9599999999999994e-06, |
|
"loss": 288.5922, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.014100041613303329, |
|
"grad_norm": 734.4885864257812, |
|
"learning_rate": 6.98e-06, |
|
"loss": 247.6378, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.01414044287867096, |
|
"grad_norm": 1920.7076416015625, |
|
"learning_rate": 7.000000000000001e-06, |
|
"loss": 175.3216, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.014180844144038592, |
|
"grad_norm": 676.3031005859375, |
|
"learning_rate": 7.0200000000000006e-06, |
|
"loss": 209.18, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.014221245409406223, |
|
"grad_norm": 1840.922119140625, |
|
"learning_rate": 7.04e-06, |
|
"loss": 241.9013, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.014261646674773854, |
|
"grad_norm": 1004.5390625, |
|
"learning_rate": 7.06e-06, |
|
"loss": 219.0837, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.014302047940141486, |
|
"grad_norm": 2489.928955078125, |
|
"learning_rate": 7.080000000000001e-06, |
|
"loss": 207.0227, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.014342449205509117, |
|
"grad_norm": 713.509033203125, |
|
"learning_rate": 7.1e-06, |
|
"loss": 208.6873, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.014382850470876748, |
|
"grad_norm": 453.5292053222656, |
|
"learning_rate": 7.1200000000000004e-06, |
|
"loss": 281.9877, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.01442325173624438, |
|
"grad_norm": 900.6409912109375, |
|
"learning_rate": 7.140000000000001e-06, |
|
"loss": 235.5468, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.01446365300161201, |
|
"grad_norm": 438.5726623535156, |
|
"learning_rate": 7.16e-06, |
|
"loss": 162.5427, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.014504054266979642, |
|
"grad_norm": 861.850341796875, |
|
"learning_rate": 7.180000000000001e-06, |
|
"loss": 236.0929, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.014544455532347273, |
|
"grad_norm": 892.953369140625, |
|
"learning_rate": 7.2e-06, |
|
"loss": 249.5097, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.014584856797714905, |
|
"grad_norm": 1040.248046875, |
|
"learning_rate": 7.22e-06, |
|
"loss": 268.9143, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.014625258063082536, |
|
"grad_norm": 774.4244384765625, |
|
"learning_rate": 7.240000000000001e-06, |
|
"loss": 235.6966, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.014665659328450167, |
|
"grad_norm": 540.7510986328125, |
|
"learning_rate": 7.26e-06, |
|
"loss": 222.3721, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.014706060593817799, |
|
"grad_norm": 3457.84912109375, |
|
"learning_rate": 7.280000000000001e-06, |
|
"loss": 262.0487, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.01474646185918543, |
|
"grad_norm": 802.951416015625, |
|
"learning_rate": 7.2999999999999996e-06, |
|
"loss": 277.8951, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.014786863124553061, |
|
"grad_norm": 1426.4849853515625, |
|
"learning_rate": 7.32e-06, |
|
"loss": 273.6454, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.014827264389920693, |
|
"grad_norm": 763.5048828125, |
|
"learning_rate": 7.340000000000001e-06, |
|
"loss": 324.4447, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.014867665655288324, |
|
"grad_norm": 2634.498046875, |
|
"learning_rate": 7.36e-06, |
|
"loss": 285.5299, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.014908066920655955, |
|
"grad_norm": 1109.640380859375, |
|
"learning_rate": 7.3800000000000005e-06, |
|
"loss": 421.3942, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.014948468186023587, |
|
"grad_norm": 1876.8143310546875, |
|
"learning_rate": 7.4e-06, |
|
"loss": 345.6086, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.014988869451391218, |
|
"grad_norm": 820.827880859375, |
|
"learning_rate": 7.420000000000001e-06, |
|
"loss": 241.7691, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.01502927071675885, |
|
"grad_norm": 1500.841552734375, |
|
"learning_rate": 7.44e-06, |
|
"loss": 247.7824, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.01506967198212648, |
|
"grad_norm": 1121.291259765625, |
|
"learning_rate": 7.4600000000000006e-06, |
|
"loss": 302.3958, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.015110073247494112, |
|
"grad_norm": 1057.3134765625, |
|
"learning_rate": 7.480000000000001e-06, |
|
"loss": 271.7261, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.015150474512861743, |
|
"grad_norm": 1772.322509765625, |
|
"learning_rate": 7.5e-06, |
|
"loss": 404.0533, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.015190875778229375, |
|
"grad_norm": 547.7960815429688, |
|
"learning_rate": 7.520000000000001e-06, |
|
"loss": 255.6221, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.015231277043597006, |
|
"grad_norm": 980.1959228515625, |
|
"learning_rate": 7.54e-06, |
|
"loss": 310.8835, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.015271678308964637, |
|
"grad_norm": 1686.9091796875, |
|
"learning_rate": 7.5600000000000005e-06, |
|
"loss": 246.4056, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.015312079574332269, |
|
"grad_norm": 867.049072265625, |
|
"learning_rate": 7.580000000000001e-06, |
|
"loss": 246.8477, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.0153524808396999, |
|
"grad_norm": 1169.5635986328125, |
|
"learning_rate": 7.6e-06, |
|
"loss": 184.4648, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.015392882105067531, |
|
"grad_norm": 689.9214477539062, |
|
"learning_rate": 7.620000000000001e-06, |
|
"loss": 323.659, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.015433283370435162, |
|
"grad_norm": 519.1124267578125, |
|
"learning_rate": 7.64e-06, |
|
"loss": 225.6972, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.015473684635802794, |
|
"grad_norm": 1257.6265869140625, |
|
"learning_rate": 7.660000000000001e-06, |
|
"loss": 337.094, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.015514085901170425, |
|
"grad_norm": 5566.001953125, |
|
"learning_rate": 7.68e-06, |
|
"loss": 191.7031, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.015554487166538056, |
|
"grad_norm": 734.2090454101562, |
|
"learning_rate": 7.7e-06, |
|
"loss": 221.7874, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.015594888431905688, |
|
"grad_norm": 1421.68359375, |
|
"learning_rate": 7.72e-06, |
|
"loss": 266.4913, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.01563528969727332, |
|
"grad_norm": 2573.87353515625, |
|
"learning_rate": 7.74e-06, |
|
"loss": 301.2037, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.01567569096264095, |
|
"grad_norm": 492.0390319824219, |
|
"learning_rate": 7.76e-06, |
|
"loss": 234.7707, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.01571609222800858, |
|
"grad_norm": 600.4080200195312, |
|
"learning_rate": 7.78e-06, |
|
"loss": 267.7107, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.01575649349337621, |
|
"grad_norm": 1099.673828125, |
|
"learning_rate": 7.8e-06, |
|
"loss": 292.7474, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.015796894758743844, |
|
"grad_norm": 541.96875, |
|
"learning_rate": 7.820000000000001e-06, |
|
"loss": 229.897, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.015837296024111474, |
|
"grad_norm": 3221.322509765625, |
|
"learning_rate": 7.84e-06, |
|
"loss": 200.1891, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.015877697289479107, |
|
"grad_norm": 2533.82177734375, |
|
"learning_rate": 7.860000000000001e-06, |
|
"loss": 240.3558, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.015918098554846737, |
|
"grad_norm": 909.9031372070312, |
|
"learning_rate": 7.879999999999999e-06, |
|
"loss": 231.9597, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.01595849982021437, |
|
"grad_norm": 669.5313110351562, |
|
"learning_rate": 7.9e-06, |
|
"loss": 316.9614, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.015998901085582, |
|
"grad_norm": 694.2930297851562, |
|
"learning_rate": 7.92e-06, |
|
"loss": 272.3228, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.016039302350949632, |
|
"grad_norm": 528.3792114257812, |
|
"learning_rate": 7.94e-06, |
|
"loss": 329.1032, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.016079703616317262, |
|
"grad_norm": 579.0252075195312, |
|
"learning_rate": 7.96e-06, |
|
"loss": 258.9907, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.016120104881684895, |
|
"grad_norm": 645.7807006835938, |
|
"learning_rate": 7.98e-06, |
|
"loss": 242.0821, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.016160506147052525, |
|
"grad_norm": 543.9231567382812, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 284.7925, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.016200907412420158, |
|
"grad_norm": 610.683349609375, |
|
"learning_rate": 8.02e-06, |
|
"loss": 220.6932, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.016241308677787787, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.040000000000001e-06, |
|
"loss": 230.9501, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.01628170994315542, |
|
"grad_norm": 1050.20947265625, |
|
"learning_rate": 8.06e-06, |
|
"loss": 292.7622, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.01632211120852305, |
|
"grad_norm": 1064.6005859375, |
|
"learning_rate": 8.08e-06, |
|
"loss": 346.057, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.016362512473890683, |
|
"grad_norm": 1817.19482421875, |
|
"learning_rate": 8.1e-06, |
|
"loss": 229.4932, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.016402913739258312, |
|
"grad_norm": 1169.279052734375, |
|
"learning_rate": 8.12e-06, |
|
"loss": 346.2139, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.016443315004625945, |
|
"grad_norm": 1004.5889282226562, |
|
"learning_rate": 8.14e-06, |
|
"loss": 257.1552, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.016483716269993575, |
|
"grad_norm": 660.7535400390625, |
|
"learning_rate": 8.160000000000001e-06, |
|
"loss": 247.7336, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.016524117535361208, |
|
"grad_norm": 638.9656372070312, |
|
"learning_rate": 8.18e-06, |
|
"loss": 360.9811, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.016564518800728838, |
|
"grad_norm": 865.1559448242188, |
|
"learning_rate": 8.200000000000001e-06, |
|
"loss": 267.7802, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.01660492006609647, |
|
"grad_norm": 528.1417236328125, |
|
"learning_rate": 8.22e-06, |
|
"loss": 211.1934, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.0166453213314641, |
|
"grad_norm": 602.795166015625, |
|
"learning_rate": 8.24e-06, |
|
"loss": 342.2845, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.016685722596831733, |
|
"grad_norm": 537.5540161132812, |
|
"learning_rate": 8.26e-06, |
|
"loss": 222.6927, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.016726123862199363, |
|
"grad_norm": 1009.3809814453125, |
|
"learning_rate": 8.28e-06, |
|
"loss": 278.8901, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.016766525127566996, |
|
"grad_norm": 1916.926513671875, |
|
"learning_rate": 8.3e-06, |
|
"loss": 335.051, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.016806926392934626, |
|
"grad_norm": 759.0120849609375, |
|
"learning_rate": 8.32e-06, |
|
"loss": 292.8282, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.01684732765830226, |
|
"grad_norm": 1126.7659912109375, |
|
"learning_rate": 8.34e-06, |
|
"loss": 356.8466, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.016887728923669888, |
|
"grad_norm": 4519.04833984375, |
|
"learning_rate": 8.36e-06, |
|
"loss": 254.2794, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.01692813018903752, |
|
"grad_norm": 625.6685791015625, |
|
"learning_rate": 8.380000000000001e-06, |
|
"loss": 256.8828, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.01696853145440515, |
|
"grad_norm": 901.2313842773438, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 173.8549, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.017008932719772784, |
|
"grad_norm": 612.3013916015625, |
|
"learning_rate": 8.42e-06, |
|
"loss": 278.5784, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.017049333985140414, |
|
"grad_norm": 500.49169921875, |
|
"learning_rate": 8.44e-06, |
|
"loss": 197.3738, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.017089735250508047, |
|
"grad_norm": 1060.2108154296875, |
|
"learning_rate": 8.46e-06, |
|
"loss": 298.4693, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.017130136515875676, |
|
"grad_norm": 612.9854736328125, |
|
"learning_rate": 8.48e-06, |
|
"loss": 329.1869, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.01717053778124331, |
|
"grad_norm": 1364.545654296875, |
|
"learning_rate": 8.500000000000002e-06, |
|
"loss": 334.3979, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.01721093904661094, |
|
"grad_norm": 696.9126586914062, |
|
"learning_rate": 8.52e-06, |
|
"loss": 271.8317, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.017251340311978572, |
|
"grad_norm": 688.8264770507812, |
|
"learning_rate": 8.540000000000001e-06, |
|
"loss": 203.2293, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.0172917415773462, |
|
"grad_norm": 1147.045654296875, |
|
"learning_rate": 8.56e-06, |
|
"loss": 349.6926, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.017332142842713835, |
|
"grad_norm": 477.3202209472656, |
|
"learning_rate": 8.580000000000001e-06, |
|
"loss": 146.8848, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.017372544108081464, |
|
"grad_norm": 1310.89599609375, |
|
"learning_rate": 8.599999999999999e-06, |
|
"loss": 247.6962, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.017412945373449097, |
|
"grad_norm": 1124.722900390625, |
|
"learning_rate": 8.62e-06, |
|
"loss": 347.9728, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.017453346638816727, |
|
"grad_norm": 1394.403564453125, |
|
"learning_rate": 8.64e-06, |
|
"loss": 261.2298, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.01749374790418436, |
|
"grad_norm": 889.3135375976562, |
|
"learning_rate": 8.66e-06, |
|
"loss": 176.3773, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.01753414916955199, |
|
"grad_norm": 1988.6873779296875, |
|
"learning_rate": 8.68e-06, |
|
"loss": 260.4097, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.017574550434919622, |
|
"grad_norm": 1403.53955078125, |
|
"learning_rate": 8.7e-06, |
|
"loss": 222.8027, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.017614951700287252, |
|
"grad_norm": 1020.5213623046875, |
|
"learning_rate": 8.720000000000001e-06, |
|
"loss": 202.7748, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.017655352965654885, |
|
"grad_norm": 4106.30126953125, |
|
"learning_rate": 8.740000000000001e-06, |
|
"loss": 257.8873, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.017695754231022515, |
|
"grad_norm": 458.9610290527344, |
|
"learning_rate": 8.76e-06, |
|
"loss": 274.0975, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.017736155496390148, |
|
"grad_norm": 1450.5250244140625, |
|
"learning_rate": 8.78e-06, |
|
"loss": 212.7885, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.017776556761757777, |
|
"grad_norm": 658.0136108398438, |
|
"learning_rate": 8.8e-06, |
|
"loss": 313.3228, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.01781695802712541, |
|
"grad_norm": 607.8909301757812, |
|
"learning_rate": 8.82e-06, |
|
"loss": 220.5824, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.01785735929249304, |
|
"grad_norm": 964.3424682617188, |
|
"learning_rate": 8.840000000000002e-06, |
|
"loss": 214.6224, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.017897760557860673, |
|
"grad_norm": 1267.5516357421875, |
|
"learning_rate": 8.86e-06, |
|
"loss": 231.4749, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.017938161823228303, |
|
"grad_norm": 852.99462890625, |
|
"learning_rate": 8.880000000000001e-06, |
|
"loss": 209.5046, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.017978563088595936, |
|
"grad_norm": 895.4765014648438, |
|
"learning_rate": 8.9e-06, |
|
"loss": 227.3372, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.018018964353963565, |
|
"grad_norm": 658.2288818359375, |
|
"learning_rate": 8.920000000000001e-06, |
|
"loss": 235.962, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.0180593656193312, |
|
"grad_norm": 1219.0494384765625, |
|
"learning_rate": 8.939999999999999e-06, |
|
"loss": 222.9912, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.018099766884698828, |
|
"grad_norm": 976.2411499023438, |
|
"learning_rate": 8.96e-06, |
|
"loss": 251.0392, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.01814016815006646, |
|
"grad_norm": 1230.3253173828125, |
|
"learning_rate": 8.98e-06, |
|
"loss": 208.2115, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.01818056941543409, |
|
"grad_norm": 738.1622314453125, |
|
"learning_rate": 9e-06, |
|
"loss": 248.237, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.018220970680801724, |
|
"grad_norm": 825.9674072265625, |
|
"learning_rate": 9.02e-06, |
|
"loss": 231.5556, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.018261371946169353, |
|
"grad_norm": 782.737060546875, |
|
"learning_rate": 9.04e-06, |
|
"loss": 213.6435, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.018301773211536986, |
|
"grad_norm": 712.6553344726562, |
|
"learning_rate": 9.06e-06, |
|
"loss": 234.0471, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.018342174476904616, |
|
"grad_norm": 1101.6629638671875, |
|
"learning_rate": 9.080000000000001e-06, |
|
"loss": 198.3389, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.01838257574227225, |
|
"grad_norm": 1747.929443359375, |
|
"learning_rate": 9.100000000000001e-06, |
|
"loss": 332.4391, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.01842297700763988, |
|
"grad_norm": 2980.9208984375, |
|
"learning_rate": 9.12e-06, |
|
"loss": 337.9396, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.01846337827300751, |
|
"grad_norm": 1282.68115234375, |
|
"learning_rate": 9.14e-06, |
|
"loss": 237.8889, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.01850377953837514, |
|
"grad_norm": 948.6932983398438, |
|
"learning_rate": 9.16e-06, |
|
"loss": 288.0977, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.018544180803742774, |
|
"grad_norm": 501.8581237792969, |
|
"learning_rate": 9.180000000000002e-06, |
|
"loss": 329.9324, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.018584582069110404, |
|
"grad_norm": 1850.9791259765625, |
|
"learning_rate": 9.2e-06, |
|
"loss": 319.7892, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.018624983334478037, |
|
"grad_norm": 730.577392578125, |
|
"learning_rate": 9.220000000000002e-06, |
|
"loss": 217.722, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.018665384599845666, |
|
"grad_norm": 408.53619384765625, |
|
"learning_rate": 9.24e-06, |
|
"loss": 190.9173, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.0187057858652133, |
|
"grad_norm": 884.1920776367188, |
|
"learning_rate": 9.260000000000001e-06, |
|
"loss": 193.8064, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.01874618713058093, |
|
"grad_norm": 1684.18408203125, |
|
"learning_rate": 9.28e-06, |
|
"loss": 197.2281, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.018786588395948562, |
|
"grad_norm": 1591.9307861328125, |
|
"learning_rate": 9.3e-06, |
|
"loss": 282.6273, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.01882698966131619, |
|
"grad_norm": 1377.8363037109375, |
|
"learning_rate": 9.32e-06, |
|
"loss": 306.7148, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.018867390926683825, |
|
"grad_norm": 600.4591674804688, |
|
"learning_rate": 9.34e-06, |
|
"loss": 290.5179, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.018907792192051454, |
|
"grad_norm": 1007.29296875, |
|
"learning_rate": 9.36e-06, |
|
"loss": 211.2058, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.018948193457419087, |
|
"grad_norm": 1834.026611328125, |
|
"learning_rate": 9.38e-06, |
|
"loss": 214.4897, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.018988594722786717, |
|
"grad_norm": 895.9391479492188, |
|
"learning_rate": 9.4e-06, |
|
"loss": 241.2341, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.01902899598815435, |
|
"grad_norm": 940.978271484375, |
|
"learning_rate": 9.420000000000001e-06, |
|
"loss": 255.7137, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.01906939725352198, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.44e-06, |
|
"loss": 202.176, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.019109798518889613, |
|
"grad_norm": 1429.115234375, |
|
"learning_rate": 9.460000000000001e-06, |
|
"loss": 199.2825, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.019150199784257242, |
|
"grad_norm": 1368.30322265625, |
|
"learning_rate": 9.48e-06, |
|
"loss": 265.0907, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.019190601049624875, |
|
"grad_norm": 773.6226196289062, |
|
"learning_rate": 9.5e-06, |
|
"loss": 272.2148, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.019231002314992505, |
|
"grad_norm": 799.7369384765625, |
|
"learning_rate": 9.52e-06, |
|
"loss": 259.8739, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.019271403580360138, |
|
"grad_norm": 718.1935424804688, |
|
"learning_rate": 9.54e-06, |
|
"loss": 255.6761, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.019311804845727767, |
|
"grad_norm": 1052.681396484375, |
|
"learning_rate": 9.560000000000002e-06, |
|
"loss": 240.1528, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.0193522061110954, |
|
"grad_norm": 351.5966491699219, |
|
"learning_rate": 9.58e-06, |
|
"loss": 170.535, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.01939260737646303, |
|
"grad_norm": 759.4265747070312, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 185.2903, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.019433008641830663, |
|
"grad_norm": 763.7293701171875, |
|
"learning_rate": 9.62e-06, |
|
"loss": 233.2488, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.019473409907198293, |
|
"grad_norm": 1342.382568359375, |
|
"learning_rate": 9.640000000000001e-06, |
|
"loss": 275.3923, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.019513811172565926, |
|
"grad_norm": 3298.531005859375, |
|
"learning_rate": 9.66e-06, |
|
"loss": 226.5765, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.019554212437933555, |
|
"grad_norm": 1122.7933349609375, |
|
"learning_rate": 9.68e-06, |
|
"loss": 178.3835, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.01959461370330119, |
|
"grad_norm": 1547.0048828125, |
|
"learning_rate": 9.7e-06, |
|
"loss": 305.318, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.019635014968668818, |
|
"grad_norm": 364.29541015625, |
|
"learning_rate": 9.72e-06, |
|
"loss": 170.9627, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.01967541623403645, |
|
"grad_norm": 1955.25634765625, |
|
"learning_rate": 9.74e-06, |
|
"loss": 302.9251, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.01971581749940408, |
|
"grad_norm": 2048.748291015625, |
|
"learning_rate": 9.760000000000001e-06, |
|
"loss": 202.4392, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.019756218764771714, |
|
"grad_norm": 660.9871215820312, |
|
"learning_rate": 9.78e-06, |
|
"loss": 187.9075, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.019796620030139343, |
|
"grad_norm": 689.9225463867188, |
|
"learning_rate": 9.800000000000001e-06, |
|
"loss": 311.5491, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.019837021295506976, |
|
"grad_norm": 953.7089233398438, |
|
"learning_rate": 9.820000000000001e-06, |
|
"loss": 299.3845, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.019877422560874606, |
|
"grad_norm": 614.9231567382812, |
|
"learning_rate": 9.84e-06, |
|
"loss": 168.2812, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.01991782382624224, |
|
"grad_norm": 1575.7044677734375, |
|
"learning_rate": 9.86e-06, |
|
"loss": 273.8306, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.01995822509160987, |
|
"grad_norm": 826.9859619140625, |
|
"learning_rate": 9.88e-06, |
|
"loss": 277.1096, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.0199986263569775, |
|
"grad_norm": 934.8515625, |
|
"learning_rate": 9.900000000000002e-06, |
|
"loss": 230.0742, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.02003902762234513, |
|
"grad_norm": 2514.587646484375, |
|
"learning_rate": 9.92e-06, |
|
"loss": 143.4857, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.020079428887712764, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.940000000000001e-06, |
|
"loss": 292.2021, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.020119830153080394, |
|
"grad_norm": 1051.95068359375, |
|
"learning_rate": 9.96e-06, |
|
"loss": 243.0733, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.020160231418448027, |
|
"grad_norm": 522.6783447265625, |
|
"learning_rate": 9.980000000000001e-06, |
|
"loss": 255.3195, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.020200632683815656, |
|
"grad_norm": 1430.8314208984375, |
|
"learning_rate": 1e-05, |
|
"loss": 181.9884, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.02024103394918329, |
|
"grad_norm": 2610.226806640625, |
|
"learning_rate": 1.002e-05, |
|
"loss": 240.1806, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.02028143521455092, |
|
"grad_norm": 706.251220703125, |
|
"learning_rate": 1.004e-05, |
|
"loss": 154.691, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.020321836479918552, |
|
"grad_norm": 1110.47021484375, |
|
"learning_rate": 1.006e-05, |
|
"loss": 270.7392, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.020362237745286182, |
|
"grad_norm": 1037.9814453125, |
|
"learning_rate": 1.008e-05, |
|
"loss": 260.6087, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.020402639010653815, |
|
"grad_norm": 988.474609375, |
|
"learning_rate": 1.0100000000000002e-05, |
|
"loss": 195.7535, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.020443040276021444, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.012e-05, |
|
"loss": 292.6674, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.020483441541389077, |
|
"grad_norm": 1622.9769287109375, |
|
"learning_rate": 1.0140000000000001e-05, |
|
"loss": 226.0767, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.020523842806756707, |
|
"grad_norm": 2696.41845703125, |
|
"learning_rate": 1.016e-05, |
|
"loss": 242.0753, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.02056424407212434, |
|
"grad_norm": 2494.242431640625, |
|
"learning_rate": 1.018e-05, |
|
"loss": 310.918, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.02060464533749197, |
|
"grad_norm": 549.554443359375, |
|
"learning_rate": 1.02e-05, |
|
"loss": 238.1347, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.020645046602859603, |
|
"grad_norm": 753.8233642578125, |
|
"learning_rate": 1.022e-05, |
|
"loss": 271.2417, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.020685447868227232, |
|
"grad_norm": 905.9545288085938, |
|
"learning_rate": 1.024e-05, |
|
"loss": 204.41, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.020725849133594865, |
|
"grad_norm": 1900.184326171875, |
|
"learning_rate": 1.026e-05, |
|
"loss": 203.9288, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.020766250398962495, |
|
"grad_norm": 533.8387451171875, |
|
"learning_rate": 1.0280000000000002e-05, |
|
"loss": 186.7773, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.020806651664330128, |
|
"grad_norm": 671.30322265625, |
|
"learning_rate": 1.03e-05, |
|
"loss": 164.2021, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.020847052929697758, |
|
"grad_norm": 764.4326171875, |
|
"learning_rate": 1.0320000000000001e-05, |
|
"loss": 234.4929, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.02088745419506539, |
|
"grad_norm": 921.5126953125, |
|
"learning_rate": 1.0340000000000001e-05, |
|
"loss": 257.7942, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.02092785546043302, |
|
"grad_norm": 1028.72412109375, |
|
"learning_rate": 1.036e-05, |
|
"loss": 219.407, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.020968256725800653, |
|
"grad_norm": 857.1285400390625, |
|
"learning_rate": 1.038e-05, |
|
"loss": 267.7374, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.021008657991168283, |
|
"grad_norm": 733.449951171875, |
|
"learning_rate": 1.04e-05, |
|
"loss": 259.0882, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.021049059256535916, |
|
"grad_norm": 853.8319091796875, |
|
"learning_rate": 1.042e-05, |
|
"loss": 236.0859, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.021089460521903546, |
|
"grad_norm": 780.4057006835938, |
|
"learning_rate": 1.0440000000000002e-05, |
|
"loss": 235.1703, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.02112986178727118, |
|
"grad_norm": 1369.0264892578125, |
|
"learning_rate": 1.046e-05, |
|
"loss": 262.7009, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.021170263052638808, |
|
"grad_norm": 802.2042236328125, |
|
"learning_rate": 1.0480000000000001e-05, |
|
"loss": 317.5121, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.02121066431800644, |
|
"grad_norm": 3069.8564453125, |
|
"learning_rate": 1.05e-05, |
|
"loss": 266.7954, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.02125106558337407, |
|
"grad_norm": 1437.6041259765625, |
|
"learning_rate": 1.0520000000000001e-05, |
|
"loss": 290.8906, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.021291466848741704, |
|
"grad_norm": 1116.5682373046875, |
|
"learning_rate": 1.0539999999999999e-05, |
|
"loss": 217.5333, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.021331868114109333, |
|
"grad_norm": 674.6525268554688, |
|
"learning_rate": 1.056e-05, |
|
"loss": 262.4274, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.021372269379476967, |
|
"grad_norm": 980.0327758789062, |
|
"learning_rate": 1.058e-05, |
|
"loss": 253.3151, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.021412670644844596, |
|
"grad_norm": 1546.599609375, |
|
"learning_rate": 1.06e-05, |
|
"loss": 284.0318, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.02145307191021223, |
|
"grad_norm": 4072.65478515625, |
|
"learning_rate": 1.062e-05, |
|
"loss": 299.3221, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.02149347317557986, |
|
"grad_norm": 835.0826416015625, |
|
"learning_rate": 1.064e-05, |
|
"loss": 217.7071, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.021533874440947492, |
|
"grad_norm": 825.3660278320312, |
|
"learning_rate": 1.0660000000000001e-05, |
|
"loss": 250.3456, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.02157427570631512, |
|
"grad_norm": 1196.41455078125, |
|
"learning_rate": 1.0680000000000001e-05, |
|
"loss": 217.1434, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.021614676971682754, |
|
"grad_norm": 556.876220703125, |
|
"learning_rate": 1.0700000000000001e-05, |
|
"loss": 239.3144, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.021655078237050384, |
|
"grad_norm": 602.3602905273438, |
|
"learning_rate": 1.072e-05, |
|
"loss": 308.0275, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.021695479502418017, |
|
"grad_norm": 747.0403442382812, |
|
"learning_rate": 1.074e-05, |
|
"loss": 249.5218, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.021735880767785647, |
|
"grad_norm": 2065.047607421875, |
|
"learning_rate": 1.076e-05, |
|
"loss": 289.0896, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.02177628203315328, |
|
"grad_norm": 1931.2476806640625, |
|
"learning_rate": 1.0780000000000002e-05, |
|
"loss": 270.5146, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.02181668329852091, |
|
"grad_norm": 5352.14013671875, |
|
"learning_rate": 1.08e-05, |
|
"loss": 330.6276, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.021857084563888542, |
|
"grad_norm": 750.8521728515625, |
|
"learning_rate": 1.0820000000000001e-05, |
|
"loss": 260.1782, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.021897485829256172, |
|
"grad_norm": 1566.5867919921875, |
|
"learning_rate": 1.084e-05, |
|
"loss": 277.7564, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.021937887094623805, |
|
"grad_norm": 4719.2412109375, |
|
"learning_rate": 1.0860000000000001e-05, |
|
"loss": 270.119, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.021978288359991435, |
|
"grad_norm": 886.023681640625, |
|
"learning_rate": 1.088e-05, |
|
"loss": 246.386, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.022018689625359068, |
|
"grad_norm": 15289.64453125, |
|
"learning_rate": 1.09e-05, |
|
"loss": 360.9848, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.022059090890726697, |
|
"grad_norm": 854.2662353515625, |
|
"learning_rate": 1.092e-05, |
|
"loss": 234.6689, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.02209949215609433, |
|
"grad_norm": 1195.251953125, |
|
"learning_rate": 1.094e-05, |
|
"loss": 233.2883, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.02213989342146196, |
|
"grad_norm": 2974.2724609375, |
|
"learning_rate": 1.096e-05, |
|
"loss": 256.327, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.022180294686829593, |
|
"grad_norm": 1651.259765625, |
|
"learning_rate": 1.098e-05, |
|
"loss": 258.9452, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.022220695952197222, |
|
"grad_norm": 1866.4769287109375, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 277.225, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.022261097217564856, |
|
"grad_norm": 2012.682861328125, |
|
"learning_rate": 1.1020000000000001e-05, |
|
"loss": 213.3877, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.022301498482932485, |
|
"grad_norm": 787.662353515625, |
|
"learning_rate": 1.1040000000000001e-05, |
|
"loss": 223.892, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.022341899748300118, |
|
"grad_norm": 9214.21484375, |
|
"learning_rate": 1.106e-05, |
|
"loss": 217.7184, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.022382301013667748, |
|
"grad_norm": 1571.4190673828125, |
|
"learning_rate": 1.108e-05, |
|
"loss": 268.1608, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.02242270227903538, |
|
"grad_norm": 1564.1744384765625, |
|
"learning_rate": 1.11e-05, |
|
"loss": 259.3926, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.02246310354440301, |
|
"grad_norm": 676.7450561523438, |
|
"learning_rate": 1.112e-05, |
|
"loss": 205.3203, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.022503504809770643, |
|
"grad_norm": 1126.58837890625, |
|
"learning_rate": 1.114e-05, |
|
"loss": 127.3151, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.022543906075138273, |
|
"grad_norm": 3336.22607421875, |
|
"learning_rate": 1.1160000000000002e-05, |
|
"loss": 226.2071, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.022584307340505906, |
|
"grad_norm": 416.8356018066406, |
|
"learning_rate": 1.118e-05, |
|
"loss": 257.1062, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.022624708605873536, |
|
"grad_norm": 653.0436401367188, |
|
"learning_rate": 1.1200000000000001e-05, |
|
"loss": 198.1556, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.02266510987124117, |
|
"grad_norm": 5374.44091796875, |
|
"learning_rate": 1.122e-05, |
|
"loss": 230.6045, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 0.0227055111366088, |
|
"grad_norm": 1517.1585693359375, |
|
"learning_rate": 1.124e-05, |
|
"loss": 217.2467, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.02274591240197643, |
|
"grad_norm": 1149.62646484375, |
|
"learning_rate": 1.126e-05, |
|
"loss": 294.9917, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 0.02278631366734406, |
|
"grad_norm": 1075.8157958984375, |
|
"learning_rate": 1.128e-05, |
|
"loss": 273.2365, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.022826714932711694, |
|
"grad_norm": 1706.23388671875, |
|
"learning_rate": 1.13e-05, |
|
"loss": 239.202, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.022867116198079324, |
|
"grad_norm": 939.1124877929688, |
|
"learning_rate": 1.132e-05, |
|
"loss": 262.5748, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.022907517463446957, |
|
"grad_norm": 1178.756591796875, |
|
"learning_rate": 1.134e-05, |
|
"loss": 205.6744, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 0.022947918728814586, |
|
"grad_norm": 402.2773132324219, |
|
"learning_rate": 1.1360000000000001e-05, |
|
"loss": 192.2235, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.02298831999418222, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1380000000000001e-05, |
|
"loss": 258.3543, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 0.02302872125954985, |
|
"grad_norm": 1016.2149658203125, |
|
"learning_rate": 1.1400000000000001e-05, |
|
"loss": 217.5345, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.023069122524917482, |
|
"grad_norm": 1061.48828125, |
|
"learning_rate": 1.142e-05, |
|
"loss": 227.8594, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 0.02310952379028511, |
|
"grad_norm": 481.3313293457031, |
|
"learning_rate": 1.144e-05, |
|
"loss": 194.8761, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.023149925055652745, |
|
"grad_norm": 782.8972778320312, |
|
"learning_rate": 1.146e-05, |
|
"loss": 173.4156, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 0.023190326321020374, |
|
"grad_norm": 672.4293212890625, |
|
"learning_rate": 1.148e-05, |
|
"loss": 206.9097, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.023230727586388007, |
|
"grad_norm": 2217.28369140625, |
|
"learning_rate": 1.1500000000000002e-05, |
|
"loss": 238.6014, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.023271128851755637, |
|
"grad_norm": 3414.3115234375, |
|
"learning_rate": 1.152e-05, |
|
"loss": 324.1321, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.02331153011712327, |
|
"grad_norm": 864.6550903320312, |
|
"learning_rate": 1.1540000000000001e-05, |
|
"loss": 213.1406, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 0.0233519313824909, |
|
"grad_norm": 752.6487426757812, |
|
"learning_rate": 1.156e-05, |
|
"loss": 136.1037, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.023392332647858533, |
|
"grad_norm": 550.0790405273438, |
|
"learning_rate": 1.1580000000000001e-05, |
|
"loss": 295.7111, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 0.023432733913226162, |
|
"grad_norm": 918.3283081054688, |
|
"learning_rate": 1.16e-05, |
|
"loss": 234.3793, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.023473135178593795, |
|
"grad_norm": 1468.6207275390625, |
|
"learning_rate": 1.162e-05, |
|
"loss": 191.9201, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 0.023513536443961425, |
|
"grad_norm": 1899.0367431640625, |
|
"learning_rate": 1.164e-05, |
|
"loss": 234.0571, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.023553937709329058, |
|
"grad_norm": 684.876953125, |
|
"learning_rate": 1.166e-05, |
|
"loss": 180.286, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 0.023594338974696687, |
|
"grad_norm": 1083.017578125, |
|
"learning_rate": 1.168e-05, |
|
"loss": 300.8844, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.02363474024006432, |
|
"grad_norm": 1754.2984619140625, |
|
"learning_rate": 1.1700000000000001e-05, |
|
"loss": 391.4959, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.02367514150543195, |
|
"grad_norm": 945.9032592773438, |
|
"learning_rate": 1.172e-05, |
|
"loss": 272.7161, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.023715542770799583, |
|
"grad_norm": 1564.7225341796875, |
|
"learning_rate": 1.1740000000000001e-05, |
|
"loss": 233.5665, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 0.023755944036167213, |
|
"grad_norm": 523.8766479492188, |
|
"learning_rate": 1.1760000000000001e-05, |
|
"loss": 204.2223, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.023796345301534846, |
|
"grad_norm": 1801.8782958984375, |
|
"learning_rate": 1.178e-05, |
|
"loss": 215.9124, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.023836746566902475, |
|
"grad_norm": 2132.8076171875, |
|
"learning_rate": 1.18e-05, |
|
"loss": 328.9819, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.02387714783227011, |
|
"grad_norm": 1254.7607421875, |
|
"learning_rate": 1.182e-05, |
|
"loss": 249.0257, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 0.023917549097637738, |
|
"grad_norm": 873.1114501953125, |
|
"learning_rate": 1.1840000000000002e-05, |
|
"loss": 156.6504, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.02395795036300537, |
|
"grad_norm": 2675.64892578125, |
|
"learning_rate": 1.186e-05, |
|
"loss": 213.5535, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 0.023998351628373, |
|
"grad_norm": 744.88671875, |
|
"learning_rate": 1.1880000000000001e-05, |
|
"loss": 189.1653, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.024038752893740634, |
|
"grad_norm": 979.5391845703125, |
|
"learning_rate": 1.19e-05, |
|
"loss": 223.2056, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.024079154159108263, |
|
"grad_norm": 872.666015625, |
|
"learning_rate": 1.1920000000000001e-05, |
|
"loss": 194.5632, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.024119555424475896, |
|
"grad_norm": 1239.266845703125, |
|
"learning_rate": 1.1940000000000001e-05, |
|
"loss": 221.0386, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 0.024159956689843526, |
|
"grad_norm": 1718.065673828125, |
|
"learning_rate": 1.196e-05, |
|
"loss": 237.8773, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.02420035795521116, |
|
"grad_norm": 1780.784423828125, |
|
"learning_rate": 1.198e-05, |
|
"loss": 119.0116, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 0.02424075922057879, |
|
"grad_norm": 921.1470947265625, |
|
"learning_rate": 1.2e-05, |
|
"loss": 225.7212, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.02428116048594642, |
|
"grad_norm": 2161.641845703125, |
|
"learning_rate": 1.202e-05, |
|
"loss": 181.6732, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 0.02432156175131405, |
|
"grad_norm": 1104.2542724609375, |
|
"learning_rate": 1.204e-05, |
|
"loss": 187.8781, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.024361963016681684, |
|
"grad_norm": 1214.7061767578125, |
|
"learning_rate": 1.206e-05, |
|
"loss": 221.4846, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 0.024402364282049314, |
|
"grad_norm": 1222.162841796875, |
|
"learning_rate": 1.2080000000000001e-05, |
|
"loss": 172.2742, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.024442765547416947, |
|
"grad_norm": 828.8873291015625, |
|
"learning_rate": 1.2100000000000001e-05, |
|
"loss": 227.2261, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.024483166812784576, |
|
"grad_norm": 621.658447265625, |
|
"learning_rate": 1.2120000000000001e-05, |
|
"loss": 236.3511, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.02452356807815221, |
|
"grad_norm": 1036.8626708984375, |
|
"learning_rate": 1.214e-05, |
|
"loss": 233.8868, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 0.02456396934351984, |
|
"grad_norm": 1811.7554931640625, |
|
"learning_rate": 1.216e-05, |
|
"loss": 238.5441, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.02460437060888747, |
|
"grad_norm": 1012.0717163085938, |
|
"learning_rate": 1.2180000000000002e-05, |
|
"loss": 234.7135, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 0.0246447718742551, |
|
"grad_norm": 1480.242919921875, |
|
"learning_rate": 1.22e-05, |
|
"loss": 275.5092, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.02468517313962273, |
|
"grad_norm": 1884.02001953125, |
|
"learning_rate": 1.2220000000000002e-05, |
|
"loss": 242.7474, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 0.024725574404990364, |
|
"grad_norm": 1144.38916015625, |
|
"learning_rate": 1.224e-05, |
|
"loss": 368.5085, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.024765975670357994, |
|
"grad_norm": 1017.46875, |
|
"learning_rate": 1.2260000000000001e-05, |
|
"loss": 224.2549, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 0.024806376935725627, |
|
"grad_norm": 1646.821044921875, |
|
"learning_rate": 1.2280000000000001e-05, |
|
"loss": 221.9995, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.024846778201093257, |
|
"grad_norm": 719.86376953125, |
|
"learning_rate": 1.23e-05, |
|
"loss": 221.8839, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.02488717946646089, |
|
"grad_norm": 865.7079467773438, |
|
"learning_rate": 1.232e-05, |
|
"loss": 216.4899, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.02492758073182852, |
|
"grad_norm": 837.6893310546875, |
|
"learning_rate": 1.234e-05, |
|
"loss": 212.6082, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 0.024967981997196152, |
|
"grad_norm": 619.9283447265625, |
|
"learning_rate": 1.236e-05, |
|
"loss": 231.417, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.025008383262563782, |
|
"grad_norm": 938.736328125, |
|
"learning_rate": 1.238e-05, |
|
"loss": 222.0059, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 0.025048784527931415, |
|
"grad_norm": 768.3204956054688, |
|
"learning_rate": 1.24e-05, |
|
"loss": 269.6617, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.025089185793299044, |
|
"grad_norm": 25211.0859375, |
|
"learning_rate": 1.2420000000000001e-05, |
|
"loss": 283.9776, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 0.025129587058666678, |
|
"grad_norm": 1000.73046875, |
|
"learning_rate": 1.244e-05, |
|
"loss": 237.4733, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.025169988324034307, |
|
"grad_norm": 1383.859375, |
|
"learning_rate": 1.2460000000000001e-05, |
|
"loss": 207.5625, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 0.02521038958940194, |
|
"grad_norm": 1157.95654296875, |
|
"learning_rate": 1.248e-05, |
|
"loss": 188.4448, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.02525079085476957, |
|
"grad_norm": 437.2555847167969, |
|
"learning_rate": 1.25e-05, |
|
"loss": 253.4308, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.025291192120137203, |
|
"grad_norm": 735.6442260742188, |
|
"learning_rate": 1.252e-05, |
|
"loss": 239.4786, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.025331593385504832, |
|
"grad_norm": 3905.33935546875, |
|
"learning_rate": 1.2540000000000002e-05, |
|
"loss": 340.7768, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 0.025371994650872465, |
|
"grad_norm": 1644.240234375, |
|
"learning_rate": 1.256e-05, |
|
"loss": 221.2143, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.025412395916240095, |
|
"grad_norm": 745.5984497070312, |
|
"learning_rate": 1.258e-05, |
|
"loss": 154.9017, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 0.025452797181607728, |
|
"grad_norm": 2822.624267578125, |
|
"learning_rate": 1.2600000000000001e-05, |
|
"loss": 193.9914, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.025493198446975358, |
|
"grad_norm": 415.230712890625, |
|
"learning_rate": 1.2620000000000001e-05, |
|
"loss": 150.345, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 0.02553359971234299, |
|
"grad_norm": 2347.072021484375, |
|
"learning_rate": 1.2640000000000003e-05, |
|
"loss": 239.6525, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.02557400097771062, |
|
"grad_norm": 1561.8658447265625, |
|
"learning_rate": 1.2659999999999999e-05, |
|
"loss": 288.6605, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 0.025614402243078253, |
|
"grad_norm": 651.1387329101562, |
|
"learning_rate": 1.268e-05, |
|
"loss": 266.1772, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.025654803508445883, |
|
"grad_norm": 544.3804931640625, |
|
"learning_rate": 1.27e-05, |
|
"loss": 273.7383, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.025695204773813516, |
|
"grad_norm": 1525.947021484375, |
|
"learning_rate": 1.2720000000000002e-05, |
|
"loss": 251.1667, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.025735606039181146, |
|
"grad_norm": 1018.3317260742188, |
|
"learning_rate": 1.2740000000000002e-05, |
|
"loss": 210.4547, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 0.02577600730454878, |
|
"grad_norm": 911.9342651367188, |
|
"learning_rate": 1.276e-05, |
|
"loss": 278.7565, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.025816408569916408, |
|
"grad_norm": 3710.921630859375, |
|
"learning_rate": 1.278e-05, |
|
"loss": 229.9252, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 0.02585680983528404, |
|
"grad_norm": 709.6052856445312, |
|
"learning_rate": 1.2800000000000001e-05, |
|
"loss": 197.896, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.02589721110065167, |
|
"grad_norm": 688.3367919921875, |
|
"learning_rate": 1.2820000000000001e-05, |
|
"loss": 205.0635, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 0.025937612366019304, |
|
"grad_norm": 1232.222412109375, |
|
"learning_rate": 1.2839999999999999e-05, |
|
"loss": 196.4835, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.025978013631386934, |
|
"grad_norm": 665.0555419921875, |
|
"learning_rate": 1.286e-05, |
|
"loss": 211.4123, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 0.026018414896754567, |
|
"grad_norm": 2699.49755859375, |
|
"learning_rate": 1.288e-05, |
|
"loss": 266.4242, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.026058816162122196, |
|
"grad_norm": 1464.1005859375, |
|
"learning_rate": 1.29e-05, |
|
"loss": 271.645, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.02609921742748983, |
|
"grad_norm": 605.3104858398438, |
|
"learning_rate": 1.2920000000000002e-05, |
|
"loss": 158.6754, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 0.02613961869285746, |
|
"grad_norm": 643.6430053710938, |
|
"learning_rate": 1.294e-05, |
|
"loss": 246.8146, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 0.026180019958225092, |
|
"grad_norm": 780.9293823242188, |
|
"learning_rate": 1.296e-05, |
|
"loss": 270.3225, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.02622042122359272, |
|
"grad_norm": 2428.1328125, |
|
"learning_rate": 1.2980000000000001e-05, |
|
"loss": 218.6574, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 0.026260822488960354, |
|
"grad_norm": 737.8549194335938, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 241.2201, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.026301223754327984, |
|
"grad_norm": 690.761474609375, |
|
"learning_rate": 1.3020000000000002e-05, |
|
"loss": 215.8646, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 0.026341625019695617, |
|
"grad_norm": 693.7470092773438, |
|
"learning_rate": 1.3039999999999999e-05, |
|
"loss": 226.2056, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 0.026382026285063247, |
|
"grad_norm": 1125.502197265625, |
|
"learning_rate": 1.306e-05, |
|
"loss": 250.0323, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 0.02642242755043088, |
|
"grad_norm": 3172.219482421875, |
|
"learning_rate": 1.308e-05, |
|
"loss": 247.7785, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 0.02646282881579851, |
|
"grad_norm": 1940.4234619140625, |
|
"learning_rate": 1.3100000000000002e-05, |
|
"loss": 178.4448, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.026503230081166142, |
|
"grad_norm": 632.4578857421875, |
|
"learning_rate": 1.3120000000000001e-05, |
|
"loss": 206.7619, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.026543631346533772, |
|
"grad_norm": 1491.89306640625, |
|
"learning_rate": 1.314e-05, |
|
"loss": 164.1459, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 0.026584032611901405, |
|
"grad_norm": 831.048828125, |
|
"learning_rate": 1.316e-05, |
|
"loss": 202.0544, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 0.026624433877269035, |
|
"grad_norm": 812.6109619140625, |
|
"learning_rate": 1.3180000000000001e-05, |
|
"loss": 127.2249, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 0.026664835142636668, |
|
"grad_norm": 678.7022094726562, |
|
"learning_rate": 1.32e-05, |
|
"loss": 262.2538, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.026705236408004297, |
|
"grad_norm": 1193.4014892578125, |
|
"learning_rate": 1.3220000000000002e-05, |
|
"loss": 258.1862, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 0.02674563767337193, |
|
"grad_norm": 881.2777099609375, |
|
"learning_rate": 1.324e-05, |
|
"loss": 181.1106, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 0.02678603893873956, |
|
"grad_norm": 751.0634765625, |
|
"learning_rate": 1.326e-05, |
|
"loss": 328.8563, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 0.026826440204107193, |
|
"grad_norm": 810.8790893554688, |
|
"learning_rate": 1.3280000000000002e-05, |
|
"loss": 175.6053, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 0.026866841469474823, |
|
"grad_norm": 544.2944946289062, |
|
"learning_rate": 1.3300000000000001e-05, |
|
"loss": 172.9304, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.026907242734842456, |
|
"grad_norm": 1003.13818359375, |
|
"learning_rate": 1.3320000000000001e-05, |
|
"loss": 180.5678, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 0.026947644000210085, |
|
"grad_norm": 1979.7833251953125, |
|
"learning_rate": 1.334e-05, |
|
"loss": 212.5934, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 0.026988045265577718, |
|
"grad_norm": 660.824462890625, |
|
"learning_rate": 1.336e-05, |
|
"loss": 170.068, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 0.027028446530945348, |
|
"grad_norm": 753.3848876953125, |
|
"learning_rate": 1.338e-05, |
|
"loss": 226.8918, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 0.02706884779631298, |
|
"grad_norm": 1572.1107177734375, |
|
"learning_rate": 1.3400000000000002e-05, |
|
"loss": 285.1505, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.02710924906168061, |
|
"grad_norm": 727.326416015625, |
|
"learning_rate": 1.3420000000000002e-05, |
|
"loss": 259.9044, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 0.027149650327048244, |
|
"grad_norm": 1423.973388671875, |
|
"learning_rate": 1.344e-05, |
|
"loss": 208.3222, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.027190051592415873, |
|
"grad_norm": 3651.573974609375, |
|
"learning_rate": 1.346e-05, |
|
"loss": 276.9352, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 0.027230452857783506, |
|
"grad_norm": 986.3583984375, |
|
"learning_rate": 1.3480000000000001e-05, |
|
"loss": 233.0887, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 0.027270854123151136, |
|
"grad_norm": 1490.3424072265625, |
|
"learning_rate": 1.3500000000000001e-05, |
|
"loss": 308.2051, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.02731125538851877, |
|
"grad_norm": 1850.4970703125, |
|
"learning_rate": 1.352e-05, |
|
"loss": 220.4273, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 0.0273516566538864, |
|
"grad_norm": 558.9097900390625, |
|
"learning_rate": 1.3539999999999999e-05, |
|
"loss": 192.5242, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 0.02739205791925403, |
|
"grad_norm": 1742.455322265625, |
|
"learning_rate": 1.356e-05, |
|
"loss": 228.9638, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 0.02743245918462166, |
|
"grad_norm": 6109.70947265625, |
|
"learning_rate": 1.358e-05, |
|
"loss": 218.1945, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 0.027472860449989294, |
|
"grad_norm": 2294.8359375, |
|
"learning_rate": 1.3600000000000002e-05, |
|
"loss": 259.6226, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.027513261715356924, |
|
"grad_norm": 1345.371826171875, |
|
"learning_rate": 1.362e-05, |
|
"loss": 208.2849, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 0.027553662980724557, |
|
"grad_norm": 631.337158203125, |
|
"learning_rate": 1.364e-05, |
|
"loss": 174.1924, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.027594064246092186, |
|
"grad_norm": 2324.395751953125, |
|
"learning_rate": 1.3660000000000001e-05, |
|
"loss": 217.651, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 0.02763446551145982, |
|
"grad_norm": 1169.88916015625, |
|
"learning_rate": 1.3680000000000001e-05, |
|
"loss": 152.5202, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 0.02767486677682745, |
|
"grad_norm": 580.0879516601562, |
|
"learning_rate": 1.3700000000000001e-05, |
|
"loss": 260.7849, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.027715268042195082, |
|
"grad_norm": 1336.279052734375, |
|
"learning_rate": 1.3719999999999999e-05, |
|
"loss": 184.8877, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 0.02775566930756271, |
|
"grad_norm": 1506.610107421875, |
|
"learning_rate": 1.374e-05, |
|
"loss": 236.7283, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 0.027796070572930345, |
|
"grad_norm": 1100.15234375, |
|
"learning_rate": 1.376e-05, |
|
"loss": 263.8396, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 0.027836471838297974, |
|
"grad_norm": 794.3699340820312, |
|
"learning_rate": 1.3780000000000002e-05, |
|
"loss": 290.786, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 0.027876873103665607, |
|
"grad_norm": 2009.521240234375, |
|
"learning_rate": 1.3800000000000002e-05, |
|
"loss": 173.6581, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.027917274369033237, |
|
"grad_norm": 1658.059814453125, |
|
"learning_rate": 1.382e-05, |
|
"loss": 210.0458, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 0.02795767563440087, |
|
"grad_norm": 632.90869140625, |
|
"learning_rate": 1.384e-05, |
|
"loss": 179.4226, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 0.0279980768997685, |
|
"grad_norm": 1309.6959228515625, |
|
"learning_rate": 1.3860000000000001e-05, |
|
"loss": 168.1927, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 0.028038478165136133, |
|
"grad_norm": 1135.08935546875, |
|
"learning_rate": 1.3880000000000001e-05, |
|
"loss": 189.0599, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 0.028078879430503762, |
|
"grad_norm": 642.1088256835938, |
|
"learning_rate": 1.3900000000000002e-05, |
|
"loss": 191.8304, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.028119280695871395, |
|
"grad_norm": 1067.7398681640625, |
|
"learning_rate": 1.3919999999999999e-05, |
|
"loss": 153.804, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 0.028159681961239025, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.394e-05, |
|
"loss": 131.2499, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 0.028200083226606658, |
|
"grad_norm": 4239.3876953125, |
|
"learning_rate": 1.396e-05, |
|
"loss": 219.0053, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 0.028240484491974287, |
|
"grad_norm": 1219.2093505859375, |
|
"learning_rate": 1.3980000000000002e-05, |
|
"loss": 216.1493, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 0.02828088575734192, |
|
"grad_norm": 829.5226440429688, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 197.1629, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.02832128702270955, |
|
"grad_norm": 783.1107177734375, |
|
"learning_rate": 1.402e-05, |
|
"loss": 197.8331, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 0.028361688288077183, |
|
"grad_norm": 1040.990966796875, |
|
"learning_rate": 1.4040000000000001e-05, |
|
"loss": 234.4111, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 0.028402089553444813, |
|
"grad_norm": 952.650390625, |
|
"learning_rate": 1.4060000000000001e-05, |
|
"loss": 138.1043, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 0.028442490818812446, |
|
"grad_norm": 2120.21337890625, |
|
"learning_rate": 1.408e-05, |
|
"loss": 190.9683, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.028482892084180075, |
|
"grad_norm": 1032.8970947265625, |
|
"learning_rate": 1.4099999999999999e-05, |
|
"loss": 211.108, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.02852329334954771, |
|
"grad_norm": 845.619384765625, |
|
"learning_rate": 1.412e-05, |
|
"loss": 249.2893, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 0.028563694614915338, |
|
"grad_norm": 2072.714599609375, |
|
"learning_rate": 1.414e-05, |
|
"loss": 148.9613, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 0.02860409588028297, |
|
"grad_norm": 1205.255615234375, |
|
"learning_rate": 1.4160000000000002e-05, |
|
"loss": 208.2003, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 0.0286444971456506, |
|
"grad_norm": 1514.6177978515625, |
|
"learning_rate": 1.4180000000000001e-05, |
|
"loss": 211.4245, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 0.028684898411018234, |
|
"grad_norm": 599.4893188476562, |
|
"learning_rate": 1.42e-05, |
|
"loss": 270.9207, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.028725299676385863, |
|
"grad_norm": 622.9778442382812, |
|
"learning_rate": 1.422e-05, |
|
"loss": 248.9024, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 0.028765700941753496, |
|
"grad_norm": 827.0371704101562, |
|
"learning_rate": 1.4240000000000001e-05, |
|
"loss": 228.1889, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 0.028806102207121126, |
|
"grad_norm": 1157.1502685546875, |
|
"learning_rate": 1.426e-05, |
|
"loss": 181.2518, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 0.02884650347248876, |
|
"grad_norm": 1035.8681640625, |
|
"learning_rate": 1.4280000000000002e-05, |
|
"loss": 223.2644, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 0.02888690473785639, |
|
"grad_norm": 1198.443359375, |
|
"learning_rate": 1.43e-05, |
|
"loss": 193.8474, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.02892730600322402, |
|
"grad_norm": 1005.276611328125, |
|
"learning_rate": 1.432e-05, |
|
"loss": 250.6531, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 0.02896770726859165, |
|
"grad_norm": 2052.37158203125, |
|
"learning_rate": 1.434e-05, |
|
"loss": 184.0699, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 0.029008108533959284, |
|
"grad_norm": 22271.3046875, |
|
"learning_rate": 1.4360000000000001e-05, |
|
"loss": 298.0969, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 0.029048509799326914, |
|
"grad_norm": 1663.1221923828125, |
|
"learning_rate": 1.4380000000000001e-05, |
|
"loss": 256.0283, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 0.029088911064694547, |
|
"grad_norm": 5215.501953125, |
|
"learning_rate": 1.44e-05, |
|
"loss": 208.372, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.029129312330062176, |
|
"grad_norm": 1427.092529296875, |
|
"learning_rate": 1.4420000000000001e-05, |
|
"loss": 196.7174, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 0.02916971359542981, |
|
"grad_norm": 755.31494140625, |
|
"learning_rate": 1.444e-05, |
|
"loss": 207.5049, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 0.02921011486079744, |
|
"grad_norm": 1840.75927734375, |
|
"learning_rate": 1.4460000000000002e-05, |
|
"loss": 246.7446, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 0.029250516126165072, |
|
"grad_norm": 1352.86767578125, |
|
"learning_rate": 1.4480000000000002e-05, |
|
"loss": 193.7286, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 0.029290917391532702, |
|
"grad_norm": 1088.9520263671875, |
|
"learning_rate": 1.45e-05, |
|
"loss": 213.7931, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.029331318656900335, |
|
"grad_norm": 2126.0322265625, |
|
"learning_rate": 1.452e-05, |
|
"loss": 227.5255, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 0.029371719922267964, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4540000000000001e-05, |
|
"loss": 149.3132, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 0.029412121187635597, |
|
"grad_norm": 582.3840942382812, |
|
"learning_rate": 1.4560000000000001e-05, |
|
"loss": 219.7635, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 0.029452522453003227, |
|
"grad_norm": 935.360107421875, |
|
"learning_rate": 1.4580000000000003e-05, |
|
"loss": 219.2881, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 0.02949292371837086, |
|
"grad_norm": 2865.860595703125, |
|
"learning_rate": 1.4599999999999999e-05, |
|
"loss": 170.7084, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.02953332498373849, |
|
"grad_norm": 1290.148681640625, |
|
"learning_rate": 1.462e-05, |
|
"loss": 273.1594, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 0.029573726249106123, |
|
"grad_norm": 1073.5101318359375, |
|
"learning_rate": 1.464e-05, |
|
"loss": 276.2752, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 0.029614127514473752, |
|
"grad_norm": 2523.898681640625, |
|
"learning_rate": 1.4660000000000002e-05, |
|
"loss": 229.5607, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 0.029654528779841385, |
|
"grad_norm": 2364.947021484375, |
|
"learning_rate": 1.4680000000000002e-05, |
|
"loss": 194.6274, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 0.029694930045209015, |
|
"grad_norm": 1139.0018310546875, |
|
"learning_rate": 1.47e-05, |
|
"loss": 143.1356, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.029735331310576648, |
|
"grad_norm": 1392.792236328125, |
|
"learning_rate": 1.472e-05, |
|
"loss": 220.7791, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.029775732575944278, |
|
"grad_norm": 1605.3038330078125, |
|
"learning_rate": 1.4740000000000001e-05, |
|
"loss": 159.6861, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 0.02981613384131191, |
|
"grad_norm": 1095.6702880859375, |
|
"learning_rate": 1.4760000000000001e-05, |
|
"loss": 190.0395, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 0.02985653510667954, |
|
"grad_norm": 3128.165283203125, |
|
"learning_rate": 1.4779999999999999e-05, |
|
"loss": 217.6996, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 0.029896936372047173, |
|
"grad_norm": 868.65966796875, |
|
"learning_rate": 1.48e-05, |
|
"loss": 303.5258, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.029937337637414803, |
|
"grad_norm": 1097.5875244140625, |
|
"learning_rate": 1.482e-05, |
|
"loss": 208.767, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 0.029977738902782436, |
|
"grad_norm": 1184.8807373046875, |
|
"learning_rate": 1.4840000000000002e-05, |
|
"loss": 212.3098, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 0.030018140168150065, |
|
"grad_norm": 1545.9532470703125, |
|
"learning_rate": 1.4860000000000002e-05, |
|
"loss": 142.4259, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 0.0300585414335177, |
|
"grad_norm": 1245.4107666015625, |
|
"learning_rate": 1.488e-05, |
|
"loss": 164.8042, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.030098942698885328, |
|
"grad_norm": 577.9031982421875, |
|
"learning_rate": 1.49e-05, |
|
"loss": 216.6332, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.03013934396425296, |
|
"grad_norm": 1043.52392578125, |
|
"learning_rate": 1.4920000000000001e-05, |
|
"loss": 247.0811, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 0.03017974522962059, |
|
"grad_norm": 1351.240966796875, |
|
"learning_rate": 1.4940000000000001e-05, |
|
"loss": 198.2244, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 0.030220146494988224, |
|
"grad_norm": 1484.0284423828125, |
|
"learning_rate": 1.4960000000000002e-05, |
|
"loss": 197.885, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 0.030260547760355853, |
|
"grad_norm": 1378.754638671875, |
|
"learning_rate": 1.4979999999999999e-05, |
|
"loss": 201.7562, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 0.030300949025723486, |
|
"grad_norm": 7344.61767578125, |
|
"learning_rate": 1.5e-05, |
|
"loss": 231.1991, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.030341350291091116, |
|
"grad_norm": 3804.617919921875, |
|
"learning_rate": 1.502e-05, |
|
"loss": 310.0097, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 0.03038175155645875, |
|
"grad_norm": 1033.6324462890625, |
|
"learning_rate": 1.5040000000000002e-05, |
|
"loss": 198.1659, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 0.03042215282182638, |
|
"grad_norm": 653.5598754882812, |
|
"learning_rate": 1.5060000000000001e-05, |
|
"loss": 178.0301, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 0.030462554087194012, |
|
"grad_norm": 1495.2060546875, |
|
"learning_rate": 1.508e-05, |
|
"loss": 250.4168, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 0.03050295535256164, |
|
"grad_norm": 3768.185791015625, |
|
"learning_rate": 1.51e-05, |
|
"loss": 161.5591, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.030543356617929274, |
|
"grad_norm": 966.1246337890625, |
|
"learning_rate": 1.5120000000000001e-05, |
|
"loss": 173.9351, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 0.030583757883296904, |
|
"grad_norm": 1045.052490234375, |
|
"learning_rate": 1.514e-05, |
|
"loss": 140.5521, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 0.030624159148664537, |
|
"grad_norm": 1348.0595703125, |
|
"learning_rate": 1.5160000000000002e-05, |
|
"loss": 318.5935, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 0.030664560414032167, |
|
"grad_norm": 798.9227294921875, |
|
"learning_rate": 1.518e-05, |
|
"loss": 269.7896, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 0.0307049616793998, |
|
"grad_norm": 1470.73583984375, |
|
"learning_rate": 1.52e-05, |
|
"loss": 282.1007, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.03074536294476743, |
|
"grad_norm": 770.0698852539062, |
|
"learning_rate": 1.5220000000000002e-05, |
|
"loss": 262.2653, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 0.030785764210135062, |
|
"grad_norm": 571.9855346679688, |
|
"learning_rate": 1.5240000000000001e-05, |
|
"loss": 192.3983, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 0.030826165475502692, |
|
"grad_norm": 1054.919921875, |
|
"learning_rate": 1.5260000000000003e-05, |
|
"loss": 219.8108, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 0.030866566740870325, |
|
"grad_norm": 556.494873046875, |
|
"learning_rate": 1.528e-05, |
|
"loss": 165.5094, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 0.030906968006237955, |
|
"grad_norm": 1616.6512451171875, |
|
"learning_rate": 1.53e-05, |
|
"loss": 183.0181, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.030947369271605588, |
|
"grad_norm": 678.8358764648438, |
|
"learning_rate": 1.5320000000000002e-05, |
|
"loss": 138.8231, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 0.030987770536973217, |
|
"grad_norm": 817.9168090820312, |
|
"learning_rate": 1.5340000000000002e-05, |
|
"loss": 166.3831, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 0.03102817180234085, |
|
"grad_norm": 1391.115966796875, |
|
"learning_rate": 1.536e-05, |
|
"loss": 265.5596, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.03106857306770848, |
|
"grad_norm": 2713.052490234375, |
|
"learning_rate": 1.538e-05, |
|
"loss": 221.1131, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 0.031108974333076113, |
|
"grad_norm": 1156.0615234375, |
|
"learning_rate": 1.54e-05, |
|
"loss": 188.7715, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.031149375598443742, |
|
"grad_norm": 1744.91845703125, |
|
"learning_rate": 1.542e-05, |
|
"loss": 203.3742, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 0.031189776863811376, |
|
"grad_norm": 1208.4666748046875, |
|
"learning_rate": 1.544e-05, |
|
"loss": 146.0077, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 0.031230178129179005, |
|
"grad_norm": 643.0628662109375, |
|
"learning_rate": 1.546e-05, |
|
"loss": 195.1551, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 0.03127057939454664, |
|
"grad_norm": 1256.745849609375, |
|
"learning_rate": 1.548e-05, |
|
"loss": 128.5509, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 0.03131098065991427, |
|
"grad_norm": 1408.4251708984375, |
|
"learning_rate": 1.55e-05, |
|
"loss": 202.1482, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.0313513819252819, |
|
"grad_norm": 2253.969970703125, |
|
"learning_rate": 1.552e-05, |
|
"loss": 258.4842, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 0.03139178319064953, |
|
"grad_norm": 1037.827392578125, |
|
"learning_rate": 1.554e-05, |
|
"loss": 167.7397, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 0.03143218445601716, |
|
"grad_norm": 1066.5501708984375, |
|
"learning_rate": 1.556e-05, |
|
"loss": 218.4733, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 0.031472585721384796, |
|
"grad_norm": 1018.4628295898438, |
|
"learning_rate": 1.558e-05, |
|
"loss": 196.4911, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 0.03151298698675242, |
|
"grad_norm": 1526.3201904296875, |
|
"learning_rate": 1.56e-05, |
|
"loss": 214.0186, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.031553388252120056, |
|
"grad_norm": 829.8909301757812, |
|
"learning_rate": 1.5620000000000003e-05, |
|
"loss": 150.7465, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 0.03159378951748769, |
|
"grad_norm": 1091.463623046875, |
|
"learning_rate": 1.5640000000000003e-05, |
|
"loss": 240.4906, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 0.03163419078285532, |
|
"grad_norm": 470.00250244140625, |
|
"learning_rate": 1.566e-05, |
|
"loss": 159.8959, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 0.03167459204822295, |
|
"grad_norm": 1311.1846923828125, |
|
"learning_rate": 1.568e-05, |
|
"loss": 205.6226, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 0.03171499331359058, |
|
"grad_norm": 1698.6754150390625, |
|
"learning_rate": 1.5700000000000002e-05, |
|
"loss": 217.6704, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.031755394578958214, |
|
"grad_norm": 1376.8931884765625, |
|
"learning_rate": 1.5720000000000002e-05, |
|
"loss": 172.5748, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 0.03179579584432585, |
|
"grad_norm": 1433.7232666015625, |
|
"learning_rate": 1.5740000000000002e-05, |
|
"loss": 185.2836, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 0.03183619710969347, |
|
"grad_norm": 4025.76220703125, |
|
"learning_rate": 1.5759999999999998e-05, |
|
"loss": 315.4204, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 0.031876598375061106, |
|
"grad_norm": 853.8228759765625, |
|
"learning_rate": 1.578e-05, |
|
"loss": 153.3957, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 0.03191699964042874, |
|
"grad_norm": 944.7000732421875, |
|
"learning_rate": 1.58e-05, |
|
"loss": 128.8845, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.03195740090579637, |
|
"grad_norm": 643.9895629882812, |
|
"learning_rate": 1.582e-05, |
|
"loss": 223.8146, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 0.031997802171164, |
|
"grad_norm": 793.1214599609375, |
|
"learning_rate": 1.584e-05, |
|
"loss": 231.9548, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 0.03203820343653163, |
|
"grad_norm": 1593.3084716796875, |
|
"learning_rate": 1.586e-05, |
|
"loss": 222.1474, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 0.032078604701899265, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.588e-05, |
|
"loss": 156.401, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 0.0321190059672669, |
|
"grad_norm": 941.607177734375, |
|
"learning_rate": 1.59e-05, |
|
"loss": 239.5231, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.032159407232634524, |
|
"grad_norm": 1561.1961669921875, |
|
"learning_rate": 1.592e-05, |
|
"loss": 269.0299, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 0.03219980849800216, |
|
"grad_norm": 4084.514892578125, |
|
"learning_rate": 1.594e-05, |
|
"loss": 269.6239, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 0.03224020976336979, |
|
"grad_norm": 1185.47314453125, |
|
"learning_rate": 1.596e-05, |
|
"loss": 217.7421, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 0.03228061102873742, |
|
"grad_norm": 1212.916748046875, |
|
"learning_rate": 1.598e-05, |
|
"loss": 187.7411, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 0.03232101229410505, |
|
"grad_norm": 658.2976684570312, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 230.9428, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.03236141355947268, |
|
"grad_norm": 1249.109130859375, |
|
"learning_rate": 1.6020000000000002e-05, |
|
"loss": 222.3688, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 0.032401814824840315, |
|
"grad_norm": 3268.414306640625, |
|
"learning_rate": 1.604e-05, |
|
"loss": 330.1671, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 0.03244221609020795, |
|
"grad_norm": 2235.10595703125, |
|
"learning_rate": 1.606e-05, |
|
"loss": 239.3141, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 0.032482617355575574, |
|
"grad_norm": 461.5077819824219, |
|
"learning_rate": 1.6080000000000002e-05, |
|
"loss": 210.1967, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 0.03252301862094321, |
|
"grad_norm": 754.9512939453125, |
|
"learning_rate": 1.6100000000000002e-05, |
|
"loss": 196.5837, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.03256341988631084, |
|
"grad_norm": 1378.9425048828125, |
|
"learning_rate": 1.612e-05, |
|
"loss": 261.1862, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.03260382115167847, |
|
"grad_norm": 12839.7548828125, |
|
"learning_rate": 1.6139999999999998e-05, |
|
"loss": 188.6295, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 0.0326442224170461, |
|
"grad_norm": 1415.6925048828125, |
|
"learning_rate": 1.616e-05, |
|
"loss": 212.514, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 0.03268462368241373, |
|
"grad_norm": 633.0122680664062, |
|
"learning_rate": 1.618e-05, |
|
"loss": 167.5946, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 0.032725024947781366, |
|
"grad_norm": 1421.470947265625, |
|
"learning_rate": 1.62e-05, |
|
"loss": 141.4309, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.032765426213149, |
|
"grad_norm": 7646.71875, |
|
"learning_rate": 1.622e-05, |
|
"loss": 228.34, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 0.032805827478516625, |
|
"grad_norm": 1138.2156982421875, |
|
"learning_rate": 1.624e-05, |
|
"loss": 209.1626, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 0.03284622874388426, |
|
"grad_norm": 649.6055908203125, |
|
"learning_rate": 1.626e-05, |
|
"loss": 204.4883, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 0.03288663000925189, |
|
"grad_norm": 646.7942504882812, |
|
"learning_rate": 1.628e-05, |
|
"loss": 155.1816, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 0.032927031274619524, |
|
"grad_norm": 632.5813598632812, |
|
"learning_rate": 1.63e-05, |
|
"loss": 144.9363, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.03296743253998715, |
|
"grad_norm": 859.72900390625, |
|
"learning_rate": 1.6320000000000003e-05, |
|
"loss": 207.8656, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 0.03300783380535478, |
|
"grad_norm": 661.2268676757812, |
|
"learning_rate": 1.634e-05, |
|
"loss": 156.5535, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 0.033048235070722416, |
|
"grad_norm": 1062.429931640625, |
|
"learning_rate": 1.636e-05, |
|
"loss": 206.9411, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 0.03308863633609005, |
|
"grad_norm": 1048.27294921875, |
|
"learning_rate": 1.6380000000000002e-05, |
|
"loss": 186.6454, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 0.033129037601457675, |
|
"grad_norm": 1131.9554443359375, |
|
"learning_rate": 1.6400000000000002e-05, |
|
"loss": 255.3936, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.03316943886682531, |
|
"grad_norm": 1163.3974609375, |
|
"learning_rate": 1.6420000000000002e-05, |
|
"loss": 188.8816, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 0.03320984013219294, |
|
"grad_norm": 555.4834594726562, |
|
"learning_rate": 1.644e-05, |
|
"loss": 134.8741, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 0.033250241397560575, |
|
"grad_norm": 837.699951171875, |
|
"learning_rate": 1.646e-05, |
|
"loss": 183.4674, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 0.0332906426629282, |
|
"grad_norm": 731.1207275390625, |
|
"learning_rate": 1.648e-05, |
|
"loss": 117.9002, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 0.033331043928295834, |
|
"grad_norm": 747.77880859375, |
|
"learning_rate": 1.65e-05, |
|
"loss": 222.1833, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.03337144519366347, |
|
"grad_norm": 1067.3509521484375, |
|
"learning_rate": 1.652e-05, |
|
"loss": 186.3794, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 0.0334118464590311, |
|
"grad_norm": 1084.0224609375, |
|
"learning_rate": 1.654e-05, |
|
"loss": 236.8639, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 0.033452247724398726, |
|
"grad_norm": 1279.375732421875, |
|
"learning_rate": 1.656e-05, |
|
"loss": 259.652, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 0.03349264898976636, |
|
"grad_norm": 947.9122314453125, |
|
"learning_rate": 1.658e-05, |
|
"loss": 233.7726, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 0.03353305025513399, |
|
"grad_norm": 439.6305236816406, |
|
"learning_rate": 1.66e-05, |
|
"loss": 177.2815, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.033573451520501625, |
|
"grad_norm": 815.047119140625, |
|
"learning_rate": 1.662e-05, |
|
"loss": 176.6726, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 0.03361385278586925, |
|
"grad_norm": 6470.5361328125, |
|
"learning_rate": 1.664e-05, |
|
"loss": 222.3292, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 0.033654254051236884, |
|
"grad_norm": 790.7086791992188, |
|
"learning_rate": 1.666e-05, |
|
"loss": 194.9121, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 0.03369465531660452, |
|
"grad_norm": 794.2681884765625, |
|
"learning_rate": 1.668e-05, |
|
"loss": 175.7233, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 0.03373505658197215, |
|
"grad_norm": 1229.4151611328125, |
|
"learning_rate": 1.6700000000000003e-05, |
|
"loss": 227.8069, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.033775457847339777, |
|
"grad_norm": 792.4901123046875, |
|
"learning_rate": 1.672e-05, |
|
"loss": 198.4519, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 0.03381585911270741, |
|
"grad_norm": 1083.63623046875, |
|
"learning_rate": 1.674e-05, |
|
"loss": 164.7306, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 0.03385626037807504, |
|
"grad_norm": 1077.84765625, |
|
"learning_rate": 1.6760000000000002e-05, |
|
"loss": 154.6505, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 0.033896661643442676, |
|
"grad_norm": 2413.804931640625, |
|
"learning_rate": 1.6780000000000002e-05, |
|
"loss": 247.3373, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 0.0339370629088103, |
|
"grad_norm": 2946.898681640625, |
|
"learning_rate": 1.6800000000000002e-05, |
|
"loss": 161.9533, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.033977464174177935, |
|
"grad_norm": 1092.52392578125, |
|
"learning_rate": 1.6819999999999998e-05, |
|
"loss": 216.0875, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 0.03401786543954557, |
|
"grad_norm": 930.1837158203125, |
|
"learning_rate": 1.684e-05, |
|
"loss": 169.9454, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 0.0340582667049132, |
|
"grad_norm": 514.6612548828125, |
|
"learning_rate": 1.686e-05, |
|
"loss": 202.2469, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 0.03409866797028083, |
|
"grad_norm": 747.2769775390625, |
|
"learning_rate": 1.688e-05, |
|
"loss": 176.8761, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 0.03413906923564846, |
|
"grad_norm": 1148.2232666015625, |
|
"learning_rate": 1.69e-05, |
|
"loss": 124.8942, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.03417947050101609, |
|
"grad_norm": 1272.0814208984375, |
|
"learning_rate": 1.692e-05, |
|
"loss": 201.3207, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 0.034219871766383726, |
|
"grad_norm": 658.9476318359375, |
|
"learning_rate": 1.694e-05, |
|
"loss": 193.7989, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 0.03426027303175135, |
|
"grad_norm": 1220.1839599609375, |
|
"learning_rate": 1.696e-05, |
|
"loss": 204.7586, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 0.034300674297118985, |
|
"grad_norm": 650.1453857421875, |
|
"learning_rate": 1.698e-05, |
|
"loss": 243.6956, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 0.03434107556248662, |
|
"grad_norm": 654.3552856445312, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 200.6542, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.03438147682785425, |
|
"grad_norm": 831.0885009765625, |
|
"learning_rate": 1.702e-05, |
|
"loss": 245.5201, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 0.03442187809322188, |
|
"grad_norm": 945.2589111328125, |
|
"learning_rate": 1.704e-05, |
|
"loss": 192.208, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 0.03446227935858951, |
|
"grad_norm": 929.2937622070312, |
|
"learning_rate": 1.706e-05, |
|
"loss": 195.5855, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 0.034502680623957144, |
|
"grad_norm": 1585.8245849609375, |
|
"learning_rate": 1.7080000000000002e-05, |
|
"loss": 161.8802, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 0.03454308188932478, |
|
"grad_norm": 1783.3887939453125, |
|
"learning_rate": 1.7100000000000002e-05, |
|
"loss": 259.6359, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.0345834831546924, |
|
"grad_norm": 1169.6343994140625, |
|
"learning_rate": 1.712e-05, |
|
"loss": 126.7247, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 0.034623884420060036, |
|
"grad_norm": 967.0234375, |
|
"learning_rate": 1.7140000000000002e-05, |
|
"loss": 228.0373, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 0.03466428568542767, |
|
"grad_norm": 1204.86865234375, |
|
"learning_rate": 1.7160000000000002e-05, |
|
"loss": 183.3604, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 0.0347046869507953, |
|
"grad_norm": 3491.818603515625, |
|
"learning_rate": 1.718e-05, |
|
"loss": 151.8515, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 0.03474508821616293, |
|
"grad_norm": 698.922607421875, |
|
"learning_rate": 1.7199999999999998e-05, |
|
"loss": 364.246, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.03478548948153056, |
|
"grad_norm": 754.37646484375, |
|
"learning_rate": 1.722e-05, |
|
"loss": 214.1979, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 0.034825890746898194, |
|
"grad_norm": 1443.7174072265625, |
|
"learning_rate": 1.724e-05, |
|
"loss": 146.2507, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 0.03486629201226583, |
|
"grad_norm": 1486.6044921875, |
|
"learning_rate": 1.726e-05, |
|
"loss": 209.7959, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 0.034906693277633453, |
|
"grad_norm": 1869.3212890625, |
|
"learning_rate": 1.728e-05, |
|
"loss": 216.6505, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.03494709454300109, |
|
"grad_norm": 1274.484130859375, |
|
"learning_rate": 1.73e-05, |
|
"loss": 175.598, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.03498749580836872, |
|
"grad_norm": 906.1784057617188, |
|
"learning_rate": 1.732e-05, |
|
"loss": 194.2832, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 0.03502789707373635, |
|
"grad_norm": 897.7321166992188, |
|
"learning_rate": 1.734e-05, |
|
"loss": 167.8126, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 0.03506829833910398, |
|
"grad_norm": 1329.6224365234375, |
|
"learning_rate": 1.736e-05, |
|
"loss": 100.7456, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.03510869960447161, |
|
"grad_norm": 2856.0009765625, |
|
"learning_rate": 1.7380000000000003e-05, |
|
"loss": 136.0745, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 0.035149100869839245, |
|
"grad_norm": 851.291748046875, |
|
"learning_rate": 1.74e-05, |
|
"loss": 276.2555, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.03518950213520688, |
|
"grad_norm": 1677.5799560546875, |
|
"learning_rate": 1.742e-05, |
|
"loss": 209.6706, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 0.035229903400574504, |
|
"grad_norm": 640.1288452148438, |
|
"learning_rate": 1.7440000000000002e-05, |
|
"loss": 223.2772, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 0.03527030466594214, |
|
"grad_norm": 4403.36962890625, |
|
"learning_rate": 1.7460000000000002e-05, |
|
"loss": 219.3949, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 0.03531070593130977, |
|
"grad_norm": 2118.526123046875, |
|
"learning_rate": 1.7480000000000002e-05, |
|
"loss": 264.3927, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 0.0353511071966774, |
|
"grad_norm": 1355.910400390625, |
|
"learning_rate": 1.75e-05, |
|
"loss": 172.0311, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.03539150846204503, |
|
"grad_norm": 1693.513671875, |
|
"learning_rate": 1.752e-05, |
|
"loss": 185.7292, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 0.03543190972741266, |
|
"grad_norm": 1187.20263671875, |
|
"learning_rate": 1.754e-05, |
|
"loss": 157.557, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 0.035472310992780295, |
|
"grad_norm": 893.8792114257812, |
|
"learning_rate": 1.756e-05, |
|
"loss": 191.817, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 0.03551271225814793, |
|
"grad_norm": 1077.1021728515625, |
|
"learning_rate": 1.758e-05, |
|
"loss": 189.9844, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 0.035553113523515555, |
|
"grad_norm": 929.8483276367188, |
|
"learning_rate": 1.76e-05, |
|
"loss": 197.5466, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.03559351478888319, |
|
"grad_norm": 2271.410400390625, |
|
"learning_rate": 1.762e-05, |
|
"loss": 228.7887, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 0.03563391605425082, |
|
"grad_norm": 2570.052001953125, |
|
"learning_rate": 1.764e-05, |
|
"loss": 155.6589, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 0.035674317319618454, |
|
"grad_norm": 657.0897216796875, |
|
"learning_rate": 1.766e-05, |
|
"loss": 225.2949, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 0.03571471858498608, |
|
"grad_norm": 1940.9866943359375, |
|
"learning_rate": 1.7680000000000004e-05, |
|
"loss": 162.1829, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 0.03575511985035371, |
|
"grad_norm": 1256.1148681640625, |
|
"learning_rate": 1.77e-05, |
|
"loss": 174.6637, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.035795521115721346, |
|
"grad_norm": 394.8336181640625, |
|
"learning_rate": 1.772e-05, |
|
"loss": 173.8255, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 0.03583592238108898, |
|
"grad_norm": 2153.534423828125, |
|
"learning_rate": 1.774e-05, |
|
"loss": 171.3043, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 0.035876323646456605, |
|
"grad_norm": 760.2359619140625, |
|
"learning_rate": 1.7760000000000003e-05, |
|
"loss": 200.2851, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 0.03591672491182424, |
|
"grad_norm": 1478.953369140625, |
|
"learning_rate": 1.7780000000000003e-05, |
|
"loss": 225.8184, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 0.03595712617719187, |
|
"grad_norm": 1087.418212890625, |
|
"learning_rate": 1.78e-05, |
|
"loss": 185.8715, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.035997527442559504, |
|
"grad_norm": 2434.63623046875, |
|
"learning_rate": 1.7820000000000002e-05, |
|
"loss": 178.0469, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 0.03603792870792713, |
|
"grad_norm": 1043.3377685546875, |
|
"learning_rate": 1.7840000000000002e-05, |
|
"loss": 179.4526, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 0.036078329973294763, |
|
"grad_norm": 1111.5899658203125, |
|
"learning_rate": 1.7860000000000002e-05, |
|
"loss": 210.2127, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 0.0361187312386624, |
|
"grad_norm": 688.8521728515625, |
|
"learning_rate": 1.7879999999999998e-05, |
|
"loss": 126.2401, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 0.03615913250403003, |
|
"grad_norm": 958.8832397460938, |
|
"learning_rate": 1.79e-05, |
|
"loss": 221.2132, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.036199533769397656, |
|
"grad_norm": 1401.521728515625, |
|
"learning_rate": 1.792e-05, |
|
"loss": 236.2084, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 0.03623993503476529, |
|
"grad_norm": 10308.7021484375, |
|
"learning_rate": 1.794e-05, |
|
"loss": 186.4272, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 0.03628033630013292, |
|
"grad_norm": 1097.2364501953125, |
|
"learning_rate": 1.796e-05, |
|
"loss": 178.1187, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 0.036320737565500555, |
|
"grad_norm": 1978.429931640625, |
|
"learning_rate": 1.798e-05, |
|
"loss": 196.0737, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 0.03636113883086818, |
|
"grad_norm": 3581.975341796875, |
|
"learning_rate": 1.8e-05, |
|
"loss": 209.1971, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.036401540096235814, |
|
"grad_norm": 1180.078125, |
|
"learning_rate": 1.802e-05, |
|
"loss": 220.5676, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 0.03644194136160345, |
|
"grad_norm": 1235.55859375, |
|
"learning_rate": 1.804e-05, |
|
"loss": 124.504, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 0.03648234262697108, |
|
"grad_norm": 1020.11865234375, |
|
"learning_rate": 1.8060000000000003e-05, |
|
"loss": 99.5129, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 0.036522743892338706, |
|
"grad_norm": 962.7813110351562, |
|
"learning_rate": 1.808e-05, |
|
"loss": 178.8402, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 0.03656314515770634, |
|
"grad_norm": 1318.480224609375, |
|
"learning_rate": 1.81e-05, |
|
"loss": 180.0119, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.03660354642307397, |
|
"grad_norm": 2261.136962890625, |
|
"learning_rate": 1.812e-05, |
|
"loss": 329.5452, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 0.036643947688441605, |
|
"grad_norm": 3996.27197265625, |
|
"learning_rate": 1.8140000000000003e-05, |
|
"loss": 224.336, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 0.03668434895380923, |
|
"grad_norm": 724.8762817382812, |
|
"learning_rate": 1.8160000000000002e-05, |
|
"loss": 148.4422, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 0.036724750219176865, |
|
"grad_norm": 838.4283447265625, |
|
"learning_rate": 1.818e-05, |
|
"loss": 160.2637, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 0.0367651514845445, |
|
"grad_norm": 1999.3927001953125, |
|
"learning_rate": 1.8200000000000002e-05, |
|
"loss": 252.0707, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.03680555274991213, |
|
"grad_norm": 1008.3331298828125, |
|
"learning_rate": 1.8220000000000002e-05, |
|
"loss": 112.7752, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 0.03684595401527976, |
|
"grad_norm": 1373.5531005859375, |
|
"learning_rate": 1.824e-05, |
|
"loss": 140.497, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 0.03688635528064739, |
|
"grad_norm": 698.5148315429688, |
|
"learning_rate": 1.826e-05, |
|
"loss": 132.642, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 0.03692675654601502, |
|
"grad_norm": 1003.8239135742188, |
|
"learning_rate": 1.828e-05, |
|
"loss": 172.7703, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 0.03696715781138265, |
|
"grad_norm": 695.0194702148438, |
|
"learning_rate": 1.83e-05, |
|
"loss": 188.6919, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.03700755907675028, |
|
"grad_norm": 844.4266967773438, |
|
"learning_rate": 1.832e-05, |
|
"loss": 253.677, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 0.037047960342117915, |
|
"grad_norm": 1122.217041015625, |
|
"learning_rate": 1.834e-05, |
|
"loss": 197.4021, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 0.03708836160748555, |
|
"grad_norm": 1105.47314453125, |
|
"learning_rate": 1.8360000000000004e-05, |
|
"loss": 146.558, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 0.037128762872853174, |
|
"grad_norm": 1299.706298828125, |
|
"learning_rate": 1.838e-05, |
|
"loss": 196.9577, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 0.03716916413822081, |
|
"grad_norm": 4088.533447265625, |
|
"learning_rate": 1.84e-05, |
|
"loss": 178.9004, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.03720956540358844, |
|
"grad_norm": 2833.98388671875, |
|
"learning_rate": 1.842e-05, |
|
"loss": 220.8664, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 0.037249966668956074, |
|
"grad_norm": 649.87548828125, |
|
"learning_rate": 1.8440000000000003e-05, |
|
"loss": 163.9712, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 0.0372903679343237, |
|
"grad_norm": 1362.6846923828125, |
|
"learning_rate": 1.846e-05, |
|
"loss": 186.3454, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 0.03733076919969133, |
|
"grad_norm": 4132.52099609375, |
|
"learning_rate": 1.848e-05, |
|
"loss": 205.0476, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 0.037371170465058966, |
|
"grad_norm": 1643.118896484375, |
|
"learning_rate": 1.85e-05, |
|
"loss": 209.0955, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.0374115717304266, |
|
"grad_norm": 5062.59716796875, |
|
"learning_rate": 1.8520000000000002e-05, |
|
"loss": 145.1507, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 0.037451972995794225, |
|
"grad_norm": 390.54925537109375, |
|
"learning_rate": 1.8540000000000002e-05, |
|
"loss": 221.6778, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 0.03749237426116186, |
|
"grad_norm": 593.5599975585938, |
|
"learning_rate": 1.856e-05, |
|
"loss": 188.7002, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 0.03753277552652949, |
|
"grad_norm": 1883.616455078125, |
|
"learning_rate": 1.858e-05, |
|
"loss": 189.2378, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 0.037573176791897124, |
|
"grad_norm": 1161.8848876953125, |
|
"learning_rate": 1.86e-05, |
|
"loss": 197.2501, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.03761357805726475, |
|
"grad_norm": 692.7471313476562, |
|
"learning_rate": 1.862e-05, |
|
"loss": 171.7747, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 0.03765397932263238, |
|
"grad_norm": 644.8750610351562, |
|
"learning_rate": 1.864e-05, |
|
"loss": 243.3304, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 0.037694380588000016, |
|
"grad_norm": 1095.04443359375, |
|
"learning_rate": 1.866e-05, |
|
"loss": 262.2983, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 0.03773478185336765, |
|
"grad_norm": 1455.4674072265625, |
|
"learning_rate": 1.868e-05, |
|
"loss": 179.6782, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 0.037775183118735275, |
|
"grad_norm": 788.5756225585938, |
|
"learning_rate": 1.87e-05, |
|
"loss": 108.4792, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.03781558438410291, |
|
"grad_norm": 1122.2181396484375, |
|
"learning_rate": 1.872e-05, |
|
"loss": 261.3737, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 0.03785598564947054, |
|
"grad_norm": 1429.47607421875, |
|
"learning_rate": 1.8740000000000004e-05, |
|
"loss": 143.9239, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 0.037896386914838175, |
|
"grad_norm": 1072.0924072265625, |
|
"learning_rate": 1.876e-05, |
|
"loss": 217.753, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 0.0379367881802058, |
|
"grad_norm": 2434.5927734375, |
|
"learning_rate": 1.878e-05, |
|
"loss": 144.4152, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 0.037977189445573434, |
|
"grad_norm": 803.2135620117188, |
|
"learning_rate": 1.88e-05, |
|
"loss": 167.2345, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.03801759071094107, |
|
"grad_norm": 1207.8824462890625, |
|
"learning_rate": 1.8820000000000003e-05, |
|
"loss": 211.6839, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 0.0380579919763087, |
|
"grad_norm": 1132.8394775390625, |
|
"learning_rate": 1.8840000000000003e-05, |
|
"loss": 147.7339, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 0.038098393241676326, |
|
"grad_norm": 802.6298828125, |
|
"learning_rate": 1.886e-05, |
|
"loss": 139.8525, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 0.03813879450704396, |
|
"grad_norm": 3992.421875, |
|
"learning_rate": 1.888e-05, |
|
"loss": 199.5186, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 0.03817919577241159, |
|
"grad_norm": 1528.7784423828125, |
|
"learning_rate": 1.8900000000000002e-05, |
|
"loss": 186.0713, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.038219597037779225, |
|
"grad_norm": 853.1768798828125, |
|
"learning_rate": 1.8920000000000002e-05, |
|
"loss": 222.1405, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 0.03825999830314685, |
|
"grad_norm": 991.3099975585938, |
|
"learning_rate": 1.894e-05, |
|
"loss": 219.1491, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 0.038300399568514484, |
|
"grad_norm": 2823.091064453125, |
|
"learning_rate": 1.896e-05, |
|
"loss": 216.1606, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 0.03834080083388212, |
|
"grad_norm": 792.9014892578125, |
|
"learning_rate": 1.898e-05, |
|
"loss": 185.3038, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 0.03838120209924975, |
|
"grad_norm": 760.1093139648438, |
|
"learning_rate": 1.9e-05, |
|
"loss": 213.5252, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.03842160336461738, |
|
"grad_norm": 1229.9395751953125, |
|
"learning_rate": 1.902e-05, |
|
"loss": 99.1262, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 0.03846200462998501, |
|
"grad_norm": 1547.040771484375, |
|
"learning_rate": 1.904e-05, |
|
"loss": 193.0997, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 0.03850240589535264, |
|
"grad_norm": 5546.650390625, |
|
"learning_rate": 1.906e-05, |
|
"loss": 191.2586, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 0.038542807160720276, |
|
"grad_norm": 1049.4068603515625, |
|
"learning_rate": 1.908e-05, |
|
"loss": 126.0632, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 0.0385832084260879, |
|
"grad_norm": 670.5220947265625, |
|
"learning_rate": 1.91e-05, |
|
"loss": 152.6221, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.038623609691455535, |
|
"grad_norm": 1974.0662841796875, |
|
"learning_rate": 1.9120000000000003e-05, |
|
"loss": 175.8063, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 0.03866401095682317, |
|
"grad_norm": 4551.0517578125, |
|
"learning_rate": 1.914e-05, |
|
"loss": 188.1946, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 0.0387044122221908, |
|
"grad_norm": 806.855712890625, |
|
"learning_rate": 1.916e-05, |
|
"loss": 166.843, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 0.03874481348755843, |
|
"grad_norm": 531.7847290039062, |
|
"learning_rate": 1.918e-05, |
|
"loss": 141.4506, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 0.03878521475292606, |
|
"grad_norm": 931.3214111328125, |
|
"learning_rate": 1.9200000000000003e-05, |
|
"loss": 184.4449, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.03882561601829369, |
|
"grad_norm": 728.2243041992188, |
|
"learning_rate": 1.9220000000000002e-05, |
|
"loss": 144.1032, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 0.038866017283661326, |
|
"grad_norm": 1086.5455322265625, |
|
"learning_rate": 1.924e-05, |
|
"loss": 166.8239, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 0.03890641854902895, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9260000000000002e-05, |
|
"loss": 195.737, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 0.038946819814396585, |
|
"grad_norm": 1560.2904052734375, |
|
"learning_rate": 1.9280000000000002e-05, |
|
"loss": 165.3053, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 0.03898722107976422, |
|
"grad_norm": 1141.6810302734375, |
|
"learning_rate": 1.93e-05, |
|
"loss": 170.9885, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.03902762234513185, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.932e-05, |
|
"loss": 154.8165, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 0.03906802361049948, |
|
"grad_norm": 1502.9287109375, |
|
"learning_rate": 1.934e-05, |
|
"loss": 161.04, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 0.03910842487586711, |
|
"grad_norm": 1250.09619140625, |
|
"learning_rate": 1.936e-05, |
|
"loss": 140.4067, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 0.039148826141234744, |
|
"grad_norm": 1319.486328125, |
|
"learning_rate": 1.938e-05, |
|
"loss": 183.2919, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 0.03918922740660238, |
|
"grad_norm": 1285.4862060546875, |
|
"learning_rate": 1.94e-05, |
|
"loss": 200.0876, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.03922962867197, |
|
"grad_norm": 935.0762939453125, |
|
"learning_rate": 1.942e-05, |
|
"loss": 135.3171, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 0.039270029937337636, |
|
"grad_norm": 1619.843017578125, |
|
"learning_rate": 1.944e-05, |
|
"loss": 193.6001, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 0.03931043120270527, |
|
"grad_norm": 1271.864990234375, |
|
"learning_rate": 1.946e-05, |
|
"loss": 115.0868, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 0.0393508324680729, |
|
"grad_norm": 724.2216186523438, |
|
"learning_rate": 1.948e-05, |
|
"loss": 184.4641, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 0.03939123373344053, |
|
"grad_norm": 1635.4261474609375, |
|
"learning_rate": 1.9500000000000003e-05, |
|
"loss": 208.8505, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.03943163499880816, |
|
"grad_norm": 2965.97265625, |
|
"learning_rate": 1.9520000000000003e-05, |
|
"loss": 172.8497, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 0.039472036264175794, |
|
"grad_norm": 2287.69287109375, |
|
"learning_rate": 1.954e-05, |
|
"loss": 182.5987, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 0.03951243752954343, |
|
"grad_norm": 812.2120971679688, |
|
"learning_rate": 1.956e-05, |
|
"loss": 125.8975, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 0.039552838794911054, |
|
"grad_norm": 2255.82275390625, |
|
"learning_rate": 1.9580000000000002e-05, |
|
"loss": 205.0292, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 0.03959324006027869, |
|
"grad_norm": 794.7918701171875, |
|
"learning_rate": 1.9600000000000002e-05, |
|
"loss": 140.7012, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.03963364132564632, |
|
"grad_norm": 1928.4591064453125, |
|
"learning_rate": 1.9620000000000002e-05, |
|
"loss": 230.6109, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 0.03967404259101395, |
|
"grad_norm": 497.63433837890625, |
|
"learning_rate": 1.9640000000000002e-05, |
|
"loss": 175.5, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 0.03971444385638158, |
|
"grad_norm": 605.4525146484375, |
|
"learning_rate": 1.966e-05, |
|
"loss": 146.0165, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 0.03975484512174921, |
|
"grad_norm": 12817.353515625, |
|
"learning_rate": 1.968e-05, |
|
"loss": 223.3047, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 0.039795246387116845, |
|
"grad_norm": 1107.6016845703125, |
|
"learning_rate": 1.97e-05, |
|
"loss": 164.6401, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.03983564765248448, |
|
"grad_norm": 1794.5758056640625, |
|
"learning_rate": 1.972e-05, |
|
"loss": 211.4972, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 0.039876048917852104, |
|
"grad_norm": 1120.5679931640625, |
|
"learning_rate": 1.974e-05, |
|
"loss": 198.396, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 0.03991645018321974, |
|
"grad_norm": 627.087646484375, |
|
"learning_rate": 1.976e-05, |
|
"loss": 142.4417, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 0.03995685144858737, |
|
"grad_norm": 415.39337158203125, |
|
"learning_rate": 1.978e-05, |
|
"loss": 109.121, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 0.039997252713955, |
|
"grad_norm": 1138.5850830078125, |
|
"learning_rate": 1.9800000000000004e-05, |
|
"loss": 190.1185, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.04003765397932263, |
|
"grad_norm": 506.6181945800781, |
|
"learning_rate": 1.982e-05, |
|
"loss": 152.6702, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 0.04007805524469026, |
|
"grad_norm": 1317.2239990234375, |
|
"learning_rate": 1.984e-05, |
|
"loss": 210.7801, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 0.040118456510057895, |
|
"grad_norm": 1474.5733642578125, |
|
"learning_rate": 1.986e-05, |
|
"loss": 224.4348, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 0.04015885777542553, |
|
"grad_norm": 1309.1876220703125, |
|
"learning_rate": 1.9880000000000003e-05, |
|
"loss": 87.4433, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 0.040199259040793155, |
|
"grad_norm": 861.6743774414062, |
|
"learning_rate": 1.9900000000000003e-05, |
|
"loss": 136.5576, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.04023966030616079, |
|
"grad_norm": 847.05517578125, |
|
"learning_rate": 1.992e-05, |
|
"loss": 182.9425, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 0.04028006157152842, |
|
"grad_norm": 1133.978515625, |
|
"learning_rate": 1.994e-05, |
|
"loss": 205.3185, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 0.040320462836896054, |
|
"grad_norm": 670.1548461914062, |
|
"learning_rate": 1.9960000000000002e-05, |
|
"loss": 190.8365, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 0.04036086410226368, |
|
"grad_norm": 887.6602783203125, |
|
"learning_rate": 1.9980000000000002e-05, |
|
"loss": 180.067, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 0.04040126536763131, |
|
"grad_norm": 687.7095336914062, |
|
"learning_rate": 2e-05, |
|
"loss": 127.6409, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.040441666632998946, |
|
"grad_norm": 385.455810546875, |
|
"learning_rate": 2.002e-05, |
|
"loss": 161.4435, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 0.04048206789836658, |
|
"grad_norm": 1409.2718505859375, |
|
"learning_rate": 2.004e-05, |
|
"loss": 182.3646, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 0.040522469163734205, |
|
"grad_norm": 803.884765625, |
|
"learning_rate": 2.006e-05, |
|
"loss": 210.5829, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 0.04056287042910184, |
|
"grad_norm": 1560.3782958984375, |
|
"learning_rate": 2.008e-05, |
|
"loss": 164.0962, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 0.04060327169446947, |
|
"grad_norm": 2029.135009765625, |
|
"learning_rate": 2.01e-05, |
|
"loss": 140.0395, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.040643672959837104, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.012e-05, |
|
"loss": 144.4245, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 0.04068407422520473, |
|
"grad_norm": 1140.849853515625, |
|
"learning_rate": 2.014e-05, |
|
"loss": 179.6542, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 0.040724475490572364, |
|
"grad_norm": 1746.8056640625, |
|
"learning_rate": 2.016e-05, |
|
"loss": 222.9251, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 0.04076487675594, |
|
"grad_norm": 722.617919921875, |
|
"learning_rate": 2.0180000000000003e-05, |
|
"loss": 175.7524, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 0.04080527802130763, |
|
"grad_norm": 837.6021118164062, |
|
"learning_rate": 2.0200000000000003e-05, |
|
"loss": 180.1091, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.040845679286675256, |
|
"grad_norm": 1460.4134521484375, |
|
"learning_rate": 2.022e-05, |
|
"loss": 188.8666, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 0.04088608055204289, |
|
"grad_norm": 770.96484375, |
|
"learning_rate": 2.024e-05, |
|
"loss": 164.6192, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 0.04092648181741052, |
|
"grad_norm": 887.3283081054688, |
|
"learning_rate": 2.0260000000000003e-05, |
|
"loss": 181.0503, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 0.040966883082778155, |
|
"grad_norm": 1267.9124755859375, |
|
"learning_rate": 2.0280000000000002e-05, |
|
"loss": 191.1377, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 0.04100728434814578, |
|
"grad_norm": 785.875244140625, |
|
"learning_rate": 2.0300000000000002e-05, |
|
"loss": 175.139, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 0.041047685613513414, |
|
"grad_norm": 3146.860107421875, |
|
"learning_rate": 2.032e-05, |
|
"loss": 189.9724, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 0.04108808687888105, |
|
"grad_norm": 1048.8988037109375, |
|
"learning_rate": 2.0340000000000002e-05, |
|
"loss": 219.648, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 0.04112848814424868, |
|
"grad_norm": 534.6448364257812, |
|
"learning_rate": 2.036e-05, |
|
"loss": 165.6375, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 0.041168889409616306, |
|
"grad_norm": 2196.583984375, |
|
"learning_rate": 2.038e-05, |
|
"loss": 219.21, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 0.04120929067498394, |
|
"grad_norm": 868.2059326171875, |
|
"learning_rate": 2.04e-05, |
|
"loss": 186.5126, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.04124969194035157, |
|
"grad_norm": 1177.8323974609375, |
|
"learning_rate": 2.042e-05, |
|
"loss": 173.2809, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 0.041290093205719205, |
|
"grad_norm": 1213.236572265625, |
|
"learning_rate": 2.044e-05, |
|
"loss": 195.4992, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 0.04133049447108683, |
|
"grad_norm": 712.7450561523438, |
|
"learning_rate": 2.046e-05, |
|
"loss": 123.3338, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 0.041370895736454465, |
|
"grad_norm": 1610.07373046875, |
|
"learning_rate": 2.048e-05, |
|
"loss": 182.489, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 0.0414112970018221, |
|
"grad_norm": 1756.5374755859375, |
|
"learning_rate": 2.05e-05, |
|
"loss": 271.8728, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.04145169826718973, |
|
"grad_norm": 1240.2843017578125, |
|
"learning_rate": 2.052e-05, |
|
"loss": 166.9299, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 0.04149209953255736, |
|
"grad_norm": 803.8770141601562, |
|
"learning_rate": 2.054e-05, |
|
"loss": 155.5844, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 0.04153250079792499, |
|
"grad_norm": 954.6376953125, |
|
"learning_rate": 2.0560000000000003e-05, |
|
"loss": 199.4898, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 0.04157290206329262, |
|
"grad_norm": 667.2396240234375, |
|
"learning_rate": 2.0580000000000003e-05, |
|
"loss": 112.1381, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 0.041613303328660256, |
|
"grad_norm": 1343.771240234375, |
|
"learning_rate": 2.06e-05, |
|
"loss": 200.9802, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.04165370459402788, |
|
"grad_norm": 1224.6522216796875, |
|
"learning_rate": 2.062e-05, |
|
"loss": 128.1033, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 0.041694105859395515, |
|
"grad_norm": 845.0104370117188, |
|
"learning_rate": 2.0640000000000002e-05, |
|
"loss": 165.8742, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 0.04173450712476315, |
|
"grad_norm": 532.292236328125, |
|
"learning_rate": 2.0660000000000002e-05, |
|
"loss": 131.5288, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 0.04177490839013078, |
|
"grad_norm": 876.7532958984375, |
|
"learning_rate": 2.0680000000000002e-05, |
|
"loss": 136.8921, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 0.04181530965549841, |
|
"grad_norm": 2460.5791015625, |
|
"learning_rate": 2.07e-05, |
|
"loss": 152.2601, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.04185571092086604, |
|
"grad_norm": 1794.477783203125, |
|
"learning_rate": 2.072e-05, |
|
"loss": 201.8371, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 0.041896112186233674, |
|
"grad_norm": 1977.7452392578125, |
|
"learning_rate": 2.074e-05, |
|
"loss": 247.7894, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 0.04193651345160131, |
|
"grad_norm": 667.1467895507812, |
|
"learning_rate": 2.076e-05, |
|
"loss": 222.6323, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 0.04197691471696893, |
|
"grad_norm": 351.2049865722656, |
|
"learning_rate": 2.078e-05, |
|
"loss": 138.2745, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 0.042017315982336566, |
|
"grad_norm": 944.7986450195312, |
|
"learning_rate": 2.08e-05, |
|
"loss": 196.8081, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.0420577172477042, |
|
"grad_norm": 1119.5870361328125, |
|
"learning_rate": 2.082e-05, |
|
"loss": 163.2965, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 0.04209811851307183, |
|
"grad_norm": 684.2236938476562, |
|
"learning_rate": 2.084e-05, |
|
"loss": 154.9308, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 0.04213851977843946, |
|
"grad_norm": 1214.9677734375, |
|
"learning_rate": 2.086e-05, |
|
"loss": 181.0213, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 0.04217892104380709, |
|
"grad_norm": 979.2085571289062, |
|
"learning_rate": 2.0880000000000003e-05, |
|
"loss": 214.0994, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 0.042219322309174724, |
|
"grad_norm": 1195.45947265625, |
|
"learning_rate": 2.09e-05, |
|
"loss": 184.8288, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 0.04225972357454236, |
|
"grad_norm": 3266.998291015625, |
|
"learning_rate": 2.092e-05, |
|
"loss": 154.9474, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 0.04230012483990998, |
|
"grad_norm": 1256.9425048828125, |
|
"learning_rate": 2.0940000000000003e-05, |
|
"loss": 119.0928, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 0.042340526105277616, |
|
"grad_norm": 816.3329467773438, |
|
"learning_rate": 2.0960000000000003e-05, |
|
"loss": 138.4739, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 0.04238092737064525, |
|
"grad_norm": 1145.7486572265625, |
|
"learning_rate": 2.098e-05, |
|
"loss": 179.8457, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 0.04242132863601288, |
|
"grad_norm": 4142.47998046875, |
|
"learning_rate": 2.1e-05, |
|
"loss": 190.1194, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.04246172990138051, |
|
"grad_norm": 748.7318115234375, |
|
"learning_rate": 2.1020000000000002e-05, |
|
"loss": 156.9302, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 0.04250213116674814, |
|
"grad_norm": 1273.5777587890625, |
|
"learning_rate": 2.1040000000000002e-05, |
|
"loss": 175.272, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 0.042542532432115775, |
|
"grad_norm": 551.0802612304688, |
|
"learning_rate": 2.106e-05, |
|
"loss": 167.7439, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 0.04258293369748341, |
|
"grad_norm": 604.2559814453125, |
|
"learning_rate": 2.1079999999999998e-05, |
|
"loss": 153.7686, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 0.042623334962851034, |
|
"grad_norm": 460.89154052734375, |
|
"learning_rate": 2.11e-05, |
|
"loss": 117.5873, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 0.04266373622821867, |
|
"grad_norm": 1551.710693359375, |
|
"learning_rate": 2.112e-05, |
|
"loss": 165.0876, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 0.0427041374935863, |
|
"grad_norm": 934.1738891601562, |
|
"learning_rate": 2.114e-05, |
|
"loss": 143.8318, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 0.04274453875895393, |
|
"grad_norm": 1267.439453125, |
|
"learning_rate": 2.116e-05, |
|
"loss": 187.996, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 0.04278494002432156, |
|
"grad_norm": 1843.2279052734375, |
|
"learning_rate": 2.118e-05, |
|
"loss": 181.2761, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 0.04282534128968919, |
|
"grad_norm": 2143.365966796875, |
|
"learning_rate": 2.12e-05, |
|
"loss": 158.841, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.042865742555056825, |
|
"grad_norm": 925.6973266601562, |
|
"learning_rate": 2.122e-05, |
|
"loss": 210.331, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 0.04290614382042446, |
|
"grad_norm": 2171.398681640625, |
|
"learning_rate": 2.124e-05, |
|
"loss": 225.7547, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 0.042946545085792084, |
|
"grad_norm": 1160.7347412109375, |
|
"learning_rate": 2.1260000000000003e-05, |
|
"loss": 153.8757, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 0.04298694635115972, |
|
"grad_norm": 1135.9134521484375, |
|
"learning_rate": 2.128e-05, |
|
"loss": 188.1846, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 0.04302734761652735, |
|
"grad_norm": 2029.5738525390625, |
|
"learning_rate": 2.13e-05, |
|
"loss": 201.2368, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 0.043067748881894984, |
|
"grad_norm": 814.3689575195312, |
|
"learning_rate": 2.1320000000000003e-05, |
|
"loss": 206.1595, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 0.04310815014726261, |
|
"grad_norm": 422.2183837890625, |
|
"learning_rate": 2.1340000000000002e-05, |
|
"loss": 157.6857, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 0.04314855141263024, |
|
"grad_norm": 880.4464111328125, |
|
"learning_rate": 2.1360000000000002e-05, |
|
"loss": 163.0372, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 0.043188952677997876, |
|
"grad_norm": 484.959228515625, |
|
"learning_rate": 2.138e-05, |
|
"loss": 144.4623, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 0.04322935394336551, |
|
"grad_norm": 1225.7841796875, |
|
"learning_rate": 2.1400000000000002e-05, |
|
"loss": 226.4971, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.043269755208733135, |
|
"grad_norm": 569.0413208007812, |
|
"learning_rate": 2.142e-05, |
|
"loss": 186.6787, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 0.04331015647410077, |
|
"grad_norm": 1226.3690185546875, |
|
"learning_rate": 2.144e-05, |
|
"loss": 219.7577, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 0.0433505577394684, |
|
"grad_norm": 1915.441650390625, |
|
"learning_rate": 2.146e-05, |
|
"loss": 194.0689, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 0.043390959004836034, |
|
"grad_norm": 1193.3553466796875, |
|
"learning_rate": 2.148e-05, |
|
"loss": 149.6157, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 0.04343136027020366, |
|
"grad_norm": 562.625, |
|
"learning_rate": 2.15e-05, |
|
"loss": 204.4145, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.04347176153557129, |
|
"grad_norm": 12496.0029296875, |
|
"learning_rate": 2.152e-05, |
|
"loss": 248.7909, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 0.043512162800938926, |
|
"grad_norm": 1115.0479736328125, |
|
"learning_rate": 2.154e-05, |
|
"loss": 183.6795, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 0.04355256406630656, |
|
"grad_norm": 1180.6798095703125, |
|
"learning_rate": 2.1560000000000004e-05, |
|
"loss": 161.4541, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 0.043592965331674186, |
|
"grad_norm": 2096.56298828125, |
|
"learning_rate": 2.158e-05, |
|
"loss": 177.0552, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 0.04363336659704182, |
|
"grad_norm": 1031.798828125, |
|
"learning_rate": 2.16e-05, |
|
"loss": 160.7948, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.04367376786240945, |
|
"grad_norm": 1001.6660766601562, |
|
"learning_rate": 2.162e-05, |
|
"loss": 279.4042, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 0.043714169127777085, |
|
"grad_norm": 5103.36669921875, |
|
"learning_rate": 2.1640000000000003e-05, |
|
"loss": 198.244, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 0.04375457039314471, |
|
"grad_norm": 813.843017578125, |
|
"learning_rate": 2.166e-05, |
|
"loss": 204.9555, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 0.043794971658512344, |
|
"grad_norm": 1556.0152587890625, |
|
"learning_rate": 2.168e-05, |
|
"loss": 178.9818, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 0.04383537292387998, |
|
"grad_norm": 1136.54443359375, |
|
"learning_rate": 2.1700000000000002e-05, |
|
"loss": 214.708, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.04387577418924761, |
|
"grad_norm": 913.5957641601562, |
|
"learning_rate": 2.1720000000000002e-05, |
|
"loss": 162.8733, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 0.043916175454615236, |
|
"grad_norm": 1809.7088623046875, |
|
"learning_rate": 2.1740000000000002e-05, |
|
"loss": 202.5214, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 0.04395657671998287, |
|
"grad_norm": 1365.0262451171875, |
|
"learning_rate": 2.176e-05, |
|
"loss": 197.6029, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 0.0439969779853505, |
|
"grad_norm": 1124.8267822265625, |
|
"learning_rate": 2.178e-05, |
|
"loss": 195.3639, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 0.044037379250718135, |
|
"grad_norm": 933.0280151367188, |
|
"learning_rate": 2.18e-05, |
|
"loss": 166.1828, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.04407778051608576, |
|
"grad_norm": 893.4417724609375, |
|
"learning_rate": 2.182e-05, |
|
"loss": 125.791, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 0.044118181781453394, |
|
"grad_norm": 1642.8094482421875, |
|
"learning_rate": 2.184e-05, |
|
"loss": 144.994, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 0.04415858304682103, |
|
"grad_norm": 1519.8751220703125, |
|
"learning_rate": 2.186e-05, |
|
"loss": 158.417, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 0.04419898431218866, |
|
"grad_norm": 1085.8243408203125, |
|
"learning_rate": 2.188e-05, |
|
"loss": 189.8353, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 0.04423938557755629, |
|
"grad_norm": 907.6950073242188, |
|
"learning_rate": 2.19e-05, |
|
"loss": 131.8621, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 0.04427978684292392, |
|
"grad_norm": 988.0367431640625, |
|
"learning_rate": 2.192e-05, |
|
"loss": 110.0259, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 0.04432018810829155, |
|
"grad_norm": 358.17889404296875, |
|
"learning_rate": 2.1940000000000003e-05, |
|
"loss": 144.1581, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 0.044360589373659186, |
|
"grad_norm": 3126.972900390625, |
|
"learning_rate": 2.196e-05, |
|
"loss": 242.1895, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 0.04440099063902681, |
|
"grad_norm": 660.0758056640625, |
|
"learning_rate": 2.198e-05, |
|
"loss": 307.6868, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 0.044441391904394445, |
|
"grad_norm": 1388.6563720703125, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 191.1774, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.04448179316976208, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2020000000000003e-05, |
|
"loss": 182.281, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 0.04452219443512971, |
|
"grad_norm": 1133.305419921875, |
|
"learning_rate": 2.2040000000000002e-05, |
|
"loss": 141.6396, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 0.04456259570049734, |
|
"grad_norm": 760.71728515625, |
|
"learning_rate": 2.206e-05, |
|
"loss": 184.8224, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 0.04460299696586497, |
|
"grad_norm": 700.0611572265625, |
|
"learning_rate": 2.2080000000000002e-05, |
|
"loss": 174.1201, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 0.0446433982312326, |
|
"grad_norm": 464.8121643066406, |
|
"learning_rate": 2.2100000000000002e-05, |
|
"loss": 147.1738, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 0.044683799496600236, |
|
"grad_norm": 659.0347900390625, |
|
"learning_rate": 2.212e-05, |
|
"loss": 121.4146, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 0.04472420076196786, |
|
"grad_norm": 495.3475646972656, |
|
"learning_rate": 2.214e-05, |
|
"loss": 122.6828, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 0.044764602027335496, |
|
"grad_norm": 899.0453491210938, |
|
"learning_rate": 2.216e-05, |
|
"loss": 171.483, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 0.04480500329270313, |
|
"grad_norm": 1652.7977294921875, |
|
"learning_rate": 2.218e-05, |
|
"loss": 136.9625, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 0.04484540455807076, |
|
"grad_norm": 1164.9923095703125, |
|
"learning_rate": 2.22e-05, |
|
"loss": 218.6428, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.04488580582343839, |
|
"grad_norm": 1860.8387451171875, |
|
"learning_rate": 2.222e-05, |
|
"loss": 128.8451, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 0.04492620708880602, |
|
"grad_norm": 1793.767578125, |
|
"learning_rate": 2.224e-05, |
|
"loss": 127.518, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 0.044966608354173654, |
|
"grad_norm": 910.3096923828125, |
|
"learning_rate": 2.226e-05, |
|
"loss": 102.4282, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 0.04500700961954129, |
|
"grad_norm": 3313.176025390625, |
|
"learning_rate": 2.228e-05, |
|
"loss": 205.4728, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 0.04504741088490891, |
|
"grad_norm": 959.5852661132812, |
|
"learning_rate": 2.23e-05, |
|
"loss": 181.3004, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 0.045087812150276546, |
|
"grad_norm": 3117.2705078125, |
|
"learning_rate": 2.2320000000000003e-05, |
|
"loss": 159.9985, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 0.04512821341564418, |
|
"grad_norm": 4501.052734375, |
|
"learning_rate": 2.234e-05, |
|
"loss": 150.3723, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 0.04516861468101181, |
|
"grad_norm": 981.978515625, |
|
"learning_rate": 2.236e-05, |
|
"loss": 142.0348, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 0.04520901594637944, |
|
"grad_norm": 1109.785400390625, |
|
"learning_rate": 2.2380000000000003e-05, |
|
"loss": 163.3822, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 0.04524941721174707, |
|
"grad_norm": 1274.3033447265625, |
|
"learning_rate": 2.2400000000000002e-05, |
|
"loss": 268.0728, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.045289818477114704, |
|
"grad_norm": 750.956298828125, |
|
"learning_rate": 2.2420000000000002e-05, |
|
"loss": 164.3101, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 0.04533021974248234, |
|
"grad_norm": 908.2940063476562, |
|
"learning_rate": 2.244e-05, |
|
"loss": 205.5986, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 0.045370621007849964, |
|
"grad_norm": 1060.370849609375, |
|
"learning_rate": 2.2460000000000002e-05, |
|
"loss": 152.6007, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 0.0454110222732176, |
|
"grad_norm": 1556.2314453125, |
|
"learning_rate": 2.248e-05, |
|
"loss": 204.8753, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 0.04545142353858523, |
|
"grad_norm": 683.9859619140625, |
|
"learning_rate": 2.25e-05, |
|
"loss": 142.627, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.04549182480395286, |
|
"grad_norm": 1313.3026123046875, |
|
"learning_rate": 2.252e-05, |
|
"loss": 183.1745, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 0.04553222606932049, |
|
"grad_norm": 1381.1295166015625, |
|
"learning_rate": 2.254e-05, |
|
"loss": 175.9728, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 0.04557262733468812, |
|
"grad_norm": 509.716796875, |
|
"learning_rate": 2.256e-05, |
|
"loss": 210.1509, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 0.045613028600055755, |
|
"grad_norm": 2492.5234375, |
|
"learning_rate": 2.258e-05, |
|
"loss": 231.53, |
|
"step": 11290 |
|
}, |
|
{ |
|
"epoch": 0.04565342986542339, |
|
"grad_norm": 1633.21630859375, |
|
"learning_rate": 2.26e-05, |
|
"loss": 152.2005, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.045693831130791014, |
|
"grad_norm": 768.939208984375, |
|
"learning_rate": 2.2620000000000004e-05, |
|
"loss": 185.2856, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 0.04573423239615865, |
|
"grad_norm": 799.730712890625, |
|
"learning_rate": 2.264e-05, |
|
"loss": 194.0909, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 0.04577463366152628, |
|
"grad_norm": 3679.43310546875, |
|
"learning_rate": 2.266e-05, |
|
"loss": 354.1977, |
|
"step": 11330 |
|
}, |
|
{ |
|
"epoch": 0.04581503492689391, |
|
"grad_norm": 1576.691650390625, |
|
"learning_rate": 2.268e-05, |
|
"loss": 181.5113, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 0.04585543619226154, |
|
"grad_norm": 1108.6512451171875, |
|
"learning_rate": 2.2700000000000003e-05, |
|
"loss": 184.2569, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 0.04589583745762917, |
|
"grad_norm": 1905.447998046875, |
|
"learning_rate": 2.2720000000000003e-05, |
|
"loss": 217.2384, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 0.045936238722996806, |
|
"grad_norm": 884.3139038085938, |
|
"learning_rate": 2.274e-05, |
|
"loss": 141.6524, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 0.04597663998836444, |
|
"grad_norm": 617.2454223632812, |
|
"learning_rate": 2.2760000000000002e-05, |
|
"loss": 148.7823, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 0.046017041253732065, |
|
"grad_norm": 551.2994384765625, |
|
"learning_rate": 2.2780000000000002e-05, |
|
"loss": 133.9882, |
|
"step": 11390 |
|
}, |
|
{ |
|
"epoch": 0.0460574425190997, |
|
"grad_norm": 700.8192138671875, |
|
"learning_rate": 2.2800000000000002e-05, |
|
"loss": 154.8847, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.04609784378446733, |
|
"grad_norm": 1914.3916015625, |
|
"learning_rate": 2.282e-05, |
|
"loss": 145.8075, |
|
"step": 11410 |
|
}, |
|
{ |
|
"epoch": 0.046138245049834964, |
|
"grad_norm": 2445.005859375, |
|
"learning_rate": 2.284e-05, |
|
"loss": 199.9154, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 0.04617864631520259, |
|
"grad_norm": 1541.644775390625, |
|
"learning_rate": 2.286e-05, |
|
"loss": 168.6186, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 0.04621904758057022, |
|
"grad_norm": 816.3759765625, |
|
"learning_rate": 2.288e-05, |
|
"loss": 181.8408, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 0.046259448845937856, |
|
"grad_norm": 1267.0755615234375, |
|
"learning_rate": 2.29e-05, |
|
"loss": 172.8114, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 0.04629985011130549, |
|
"grad_norm": 855.4586791992188, |
|
"learning_rate": 2.292e-05, |
|
"loss": 197.7767, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 0.046340251376673115, |
|
"grad_norm": 1094.00634765625, |
|
"learning_rate": 2.294e-05, |
|
"loss": 171.698, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 0.04638065264204075, |
|
"grad_norm": 1205.0, |
|
"learning_rate": 2.296e-05, |
|
"loss": 215.9526, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 0.04642105390740838, |
|
"grad_norm": 1176.0413818359375, |
|
"learning_rate": 2.298e-05, |
|
"loss": 163.7831, |
|
"step": 11490 |
|
}, |
|
{ |
|
"epoch": 0.046461455172776014, |
|
"grad_norm": 786.1920776367188, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 167.4065, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.04650185643814364, |
|
"grad_norm": 516.4776000976562, |
|
"learning_rate": 2.302e-05, |
|
"loss": 165.6811, |
|
"step": 11510 |
|
}, |
|
{ |
|
"epoch": 0.046542257703511274, |
|
"grad_norm": 396.16497802734375, |
|
"learning_rate": 2.304e-05, |
|
"loss": 168.2221, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 0.04658265896887891, |
|
"grad_norm": 882.178466796875, |
|
"learning_rate": 2.306e-05, |
|
"loss": 140.923, |
|
"step": 11530 |
|
}, |
|
{ |
|
"epoch": 0.04662306023424654, |
|
"grad_norm": 934.631591796875, |
|
"learning_rate": 2.3080000000000003e-05, |
|
"loss": 186.3469, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 0.046663461499614166, |
|
"grad_norm": 1052.7349853515625, |
|
"learning_rate": 2.3100000000000002e-05, |
|
"loss": 201.8761, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 0.0467038627649818, |
|
"grad_norm": 1325.5751953125, |
|
"learning_rate": 2.312e-05, |
|
"loss": 255.0451, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 0.04674426403034943, |
|
"grad_norm": 827.1654663085938, |
|
"learning_rate": 2.3140000000000002e-05, |
|
"loss": 162.8754, |
|
"step": 11570 |
|
}, |
|
{ |
|
"epoch": 0.046784665295717065, |
|
"grad_norm": 506.81097412109375, |
|
"learning_rate": 2.3160000000000002e-05, |
|
"loss": 99.88, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 0.04682506656108469, |
|
"grad_norm": 1403.5689697265625, |
|
"learning_rate": 2.318e-05, |
|
"loss": 157.3405, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 0.046865467826452324, |
|
"grad_norm": 1264.109375, |
|
"learning_rate": 2.32e-05, |
|
"loss": 135.8957, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.04690586909181996, |
|
"grad_norm": 851.1090698242188, |
|
"learning_rate": 2.322e-05, |
|
"loss": 98.8174, |
|
"step": 11610 |
|
}, |
|
{ |
|
"epoch": 0.04694627035718759, |
|
"grad_norm": 1114.6529541015625, |
|
"learning_rate": 2.324e-05, |
|
"loss": 174.8662, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 0.046986671622555216, |
|
"grad_norm": 997.0178833007812, |
|
"learning_rate": 2.326e-05, |
|
"loss": 202.974, |
|
"step": 11630 |
|
}, |
|
{ |
|
"epoch": 0.04702707288792285, |
|
"grad_norm": 2686.275146484375, |
|
"learning_rate": 2.328e-05, |
|
"loss": 180.5854, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 0.04706747415329048, |
|
"grad_norm": 1341.779541015625, |
|
"learning_rate": 2.3300000000000004e-05, |
|
"loss": 188.3465, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 0.047107875418658116, |
|
"grad_norm": 1174.360595703125, |
|
"learning_rate": 2.332e-05, |
|
"loss": 100.4389, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 0.04714827668402574, |
|
"grad_norm": 485.88134765625, |
|
"learning_rate": 2.334e-05, |
|
"loss": 84.7096, |
|
"step": 11670 |
|
}, |
|
{ |
|
"epoch": 0.047188677949393375, |
|
"grad_norm": 700.1830444335938, |
|
"learning_rate": 2.336e-05, |
|
"loss": 252.0006, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 0.04722907921476101, |
|
"grad_norm": 1874.8160400390625, |
|
"learning_rate": 2.3380000000000003e-05, |
|
"loss": 271.8778, |
|
"step": 11690 |
|
}, |
|
{ |
|
"epoch": 0.04726948048012864, |
|
"grad_norm": 780.0234985351562, |
|
"learning_rate": 2.3400000000000003e-05, |
|
"loss": 156.8313, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.04730988174549627, |
|
"grad_norm": 1217.7091064453125, |
|
"learning_rate": 2.342e-05, |
|
"loss": 139.3272, |
|
"step": 11710 |
|
}, |
|
{ |
|
"epoch": 0.0473502830108639, |
|
"grad_norm": 1002.0595092773438, |
|
"learning_rate": 2.344e-05, |
|
"loss": 155.965, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 0.04739068427623153, |
|
"grad_norm": 597.17529296875, |
|
"learning_rate": 2.3460000000000002e-05, |
|
"loss": 172.1847, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 0.047431085541599166, |
|
"grad_norm": 910.2625732421875, |
|
"learning_rate": 2.3480000000000002e-05, |
|
"loss": 148.3666, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 0.04747148680696679, |
|
"grad_norm": 1215.2615966796875, |
|
"learning_rate": 2.35e-05, |
|
"loss": 194.8361, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 0.047511888072334425, |
|
"grad_norm": 762.500244140625, |
|
"learning_rate": 2.3520000000000002e-05, |
|
"loss": 113.7633, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 0.04755228933770206, |
|
"grad_norm": 3147.351318359375, |
|
"learning_rate": 2.354e-05, |
|
"loss": 194.7475, |
|
"step": 11770 |
|
}, |
|
{ |
|
"epoch": 0.04759269060306969, |
|
"grad_norm": 1164.023681640625, |
|
"learning_rate": 2.356e-05, |
|
"loss": 108.1381, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 0.04763309186843732, |
|
"grad_norm": 2093.1826171875, |
|
"learning_rate": 2.358e-05, |
|
"loss": 165.2831, |
|
"step": 11790 |
|
}, |
|
{ |
|
"epoch": 0.04767349313380495, |
|
"grad_norm": 628.909423828125, |
|
"learning_rate": 2.36e-05, |
|
"loss": 172.9029, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.047713894399172584, |
|
"grad_norm": 400.00811767578125, |
|
"learning_rate": 2.362e-05, |
|
"loss": 181.3741, |
|
"step": 11810 |
|
}, |
|
{ |
|
"epoch": 0.04775429566454022, |
|
"grad_norm": 1536.272216796875, |
|
"learning_rate": 2.364e-05, |
|
"loss": 154.7509, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 0.04779469692990784, |
|
"grad_norm": 1407.3763427734375, |
|
"learning_rate": 2.366e-05, |
|
"loss": 174.5441, |
|
"step": 11830 |
|
}, |
|
{ |
|
"epoch": 0.047835098195275476, |
|
"grad_norm": 726.475830078125, |
|
"learning_rate": 2.3680000000000004e-05, |
|
"loss": 171.3695, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 0.04787549946064311, |
|
"grad_norm": 1060.8369140625, |
|
"learning_rate": 2.37e-05, |
|
"loss": 150.6572, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 0.04791590072601074, |
|
"grad_norm": 2245.559814453125, |
|
"learning_rate": 2.372e-05, |
|
"loss": 172.8109, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 0.04795630199137837, |
|
"grad_norm": 2129.251708984375, |
|
"learning_rate": 2.374e-05, |
|
"loss": 191.5867, |
|
"step": 11870 |
|
}, |
|
{ |
|
"epoch": 0.047996703256746, |
|
"grad_norm": 1103.0263671875, |
|
"learning_rate": 2.3760000000000003e-05, |
|
"loss": 213.8663, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 0.048037104522113634, |
|
"grad_norm": 1063.9124755859375, |
|
"learning_rate": 2.3780000000000003e-05, |
|
"loss": 194.2018, |
|
"step": 11890 |
|
}, |
|
{ |
|
"epoch": 0.04807750578748127, |
|
"grad_norm": 7122.65185546875, |
|
"learning_rate": 2.38e-05, |
|
"loss": 229.5696, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.04811790705284889, |
|
"grad_norm": 1017.9201049804688, |
|
"learning_rate": 2.3820000000000002e-05, |
|
"loss": 146.0323, |
|
"step": 11910 |
|
}, |
|
{ |
|
"epoch": 0.048158308318216526, |
|
"grad_norm": 1932.71435546875, |
|
"learning_rate": 2.3840000000000002e-05, |
|
"loss": 143.6282, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 0.04819870958358416, |
|
"grad_norm": 753.262939453125, |
|
"learning_rate": 2.3860000000000002e-05, |
|
"loss": 151.7239, |
|
"step": 11930 |
|
}, |
|
{ |
|
"epoch": 0.04823911084895179, |
|
"grad_norm": 1837.5714111328125, |
|
"learning_rate": 2.3880000000000002e-05, |
|
"loss": 192.4802, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 0.04827951211431942, |
|
"grad_norm": 1542.8756103515625, |
|
"learning_rate": 2.39e-05, |
|
"loss": 239.2184, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 0.04831991337968705, |
|
"grad_norm": 1126.86572265625, |
|
"learning_rate": 2.392e-05, |
|
"loss": 132.8466, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 0.048360314645054685, |
|
"grad_norm": 650.4667358398438, |
|
"learning_rate": 2.394e-05, |
|
"loss": 121.4513, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 0.04840071591042232, |
|
"grad_norm": 708.6920776367188, |
|
"learning_rate": 2.396e-05, |
|
"loss": 172.2358, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 0.048441117175789944, |
|
"grad_norm": 1389.237548828125, |
|
"learning_rate": 2.398e-05, |
|
"loss": 220.8339, |
|
"step": 11990 |
|
}, |
|
{ |
|
"epoch": 0.04848151844115758, |
|
"grad_norm": 580.6951293945312, |
|
"learning_rate": 2.4e-05, |
|
"loss": 178.8768, |
|
"step": 12000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 250000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 2000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|